Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345638
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
66 KB
Subscribers
None
View Options
diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py
index cc2a92d..4e77eb6 100644
--- a/swh/provenance/graph.py
+++ b/swh/provenance/graph.py
@@ -1,275 +1,257 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
from datetime import datetime, timezone
import os
from typing import Any, Dict, Optional, Set
from swh.core.statsd import statsd
from swh.model.hashutil import hash_to_hex
from swh.model.model import Sha1Git
from .archive import ArchiveInterface
from .interface import ProvenanceInterface
from .model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
UTCMIN = datetime.min.replace(tzinfo=timezone.utc)
class HistoryNode:
def __init__(
self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False
) -> None:
self.entry = entry
# A revision is `is_head` if it is directly pointed by an origin (ie. a head
# revision for some snapshot)
self.is_head = is_head
# A revision is `in_history` if it appears in the history graph of an already
# processed revision in the provenance database
self.in_history = in_history
# XXX: the current simplified version of the origin-revision layer algorithm
# does not use this previous two flags at all. They are kept for now but might
# be removed in the future (hence, RevisionEntry might be used instead of
# HistoryNode).
def __str__(self) -> str:
return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>"
def as_dict(self) -> Dict[str, Any]:
return {
"rev": hash_to_hex(self.entry.id),
"is_head": self.is_head,
"in_history": self.in_history,
}
class HistoryGraph:
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
def __init__(
self,
provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
) -> None:
self._head = HistoryNode(
revision,
is_head=provenance.revision_visited(revision),
in_history=provenance.revision_in_history(revision),
)
self._graph: Dict[HistoryNode, Set[HistoryNode]] = {}
stack = [self._head]
while stack:
current = stack.pop()
if current not in self._graph:
self._graph[current] = set()
current.entry.retrieve_parents(archive)
for parent in current.entry.parents:
node = HistoryNode(
parent,
is_head=provenance.revision_visited(parent),
in_history=provenance.revision_in_history(parent),
)
self._graph[current].add(node)
stack.append(node)
@property
def head(self) -> HistoryNode:
return self._head
@property
def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]:
return self._graph
def __str__(self) -> str:
return f"<HistoryGraph: head={self._head}, graph={self._graph}"
def as_dict(self) -> Dict[str, Any]:
return {
"head": self.head.as_dict(),
"graph": {
hash_to_hex(node.entry.id): sorted(
[parent.as_dict() for parent in parents],
key=lambda d: d["rev"],
)
for node, parents in self._graph.items()
},
}
class IsochroneNode:
def __init__(
self,
entry: DirectoryEntry,
dbdate: Optional[datetime] = None,
depth: int = 0,
prefix: bytes = b"",
) -> None:
self.entry = entry
self.depth = depth
# dbdate is the maxdate for this node that comes from the DB
self._dbdate: Optional[datetime] = dbdate
# maxdate is set by the maxdate computation algorithm
self.maxdate: Optional[datetime] = None
- # known is True if this node is already known in the db; either because
- # the current directory actually exists in the database, or because all
- # the content of the current directory is known (subdirectories and files)
- self.known = self.dbdate is not None
self.invalid = False
self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name
self.children: Set[IsochroneNode] = set()
@property
def dbdate(self) -> Optional[datetime]:
# use a property to make this attribute (mostly) read-only
return self._dbdate
def invalidate(self) -> None:
statsd.increment(
metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"}
)
self._dbdate = None
self.maxdate = None
- self.known = False
self.invalid = True
def add_directory(
self, child: DirectoryEntry, date: Optional[datetime] = None
) -> IsochroneNode:
# we should not be processing this node (ie add subdirectories or files) if it's
# actually known by the provenance DB
assert self.dbdate is None
node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path)
self.children.add(node)
return node
def __str__(self) -> str:
return (
- f"<{self.entry}: depth={self.depth}, "
- f"dbdate={self.dbdate}, maxdate={self.maxdate}, "
- f"known={self.known}, invalid={self.invalid}, path={self.path!r}, "
+ f"<{self.entry}: depth={self.depth}, dbdate={self.dbdate}, "
+ f"maxdate={self.maxdate}, invalid={self.invalid}, path={self.path!r}, "
f"children=[{', '.join(str(child) for child in self.children)}]>"
)
def __eq__(self, other: Any) -> bool:
return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__
def __hash__(self) -> int:
# only immutable attributes are considered to compute hash
return hash((self.entry, self.depth, self.path))
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_isochrone_graph"})
def build_isochrone_graph(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
directory: DirectoryEntry,
minsize: int = 0,
) -> IsochroneNode:
assert revision.date is not None
assert revision.root == directory.id
# this function process a revision in 2 steps:
#
# 1. build the tree structure of IsochroneNode objects (one INode per
# directory under the root directory of the revision but not following
# known subdirectories), and gather the dates from the DB for already
# known objects; for files, just keep all the dates in a global 'fdates'
# dict; note that in this step, we will only recurse the directories
# that are not already known.
#
# 2. compute the maxdate for each node of the tree that was not found in the DB.
# Build the nodes structure
root_date = provenance.directory_get_date_in_isochrone_frontier(directory)
root = IsochroneNode(directory, dbdate=root_date)
stack = [root]
fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date}
while stack:
current = stack.pop()
if current.dbdate is None or current.dbdate >= revision.date:
# If current directory has an associated date in the isochrone frontier that
# is greater or equal to the current revision's one, it should be ignored as
# the revision is being processed out of order.
if current.dbdate is not None and current.dbdate >= revision.date:
current.invalidate()
# Pre-query all known dates for directories in the current directory
# for the provenance object to have them cached and (potentially) improve
# performance.
current.entry.retrieve_children(archive, minsize=minsize)
ddates = provenance.directory_get_dates_in_isochrone_frontier(
current.entry.dirs
)
for dir in current.entry.dirs:
# Recursively analyse subdirectory nodes
node = current.add_directory(dir, date=ddates.get(dir.id, None))
stack.append(node)
fdates.update(provenance.content_get_early_dates(current.entry.files))
# Precalculate max known date for each node in the graph (only directory nodes are
# pushed to the stack).
stack = [root]
while stack:
current = stack.pop()
# Current directory node is known if it already has an assigned date (ie. it was
# already seen as an isochrone frontier).
- if current.known:
+ if current.dbdate is not None:
assert current.maxdate is None
current.maxdate = current.dbdate
else:
if any(x.maxdate is None for x in current.children):
# at least one child of current has no maxdate yet
# Current node needs to be analysed again after its children.
stack.append(current)
for child in current.children:
if child.maxdate is None:
# if child.maxdate is None, it must be processed
stack.append(child)
else:
# all the files and directories under current have a maxdate,
# we can infer the maxdate for current directory
assert current.maxdate is None
# if all content is already known, update current directory info.
- current.maxdate = max(
- [UTCMIN]
- + [
- child.maxdate
- for child in current.children
- if child.maxdate is not None # unnecessary, but needed for mypy
- ]
- + [
- fdates.get(file.id, revision.date)
- for file in current.entry.files
- ]
+ current.maxdate = min(
+ max(
+ [UTCMIN]
+ + [
+ child.maxdate
+ for child in current.children
+ if child.maxdate is not None # for mypy
+ ]
+ + [
+ fdates.get(file.id, revision.date)
+ for file in current.entry.files
+ ]
+ ),
+ revision.date,
)
- if current.maxdate <= revision.date:
- current.known = (
- # true if all subdirectories are known
- all(child.known for child in current.children)
- # true if all files are in fdates, i.e. if all files were known
- # *before building this isochrone graph node*
- # Note: the 'all()' is lazy: will stop iterating as soon as
- # possible
- and all((file.id in fdates) for file in current.entry.files)
- )
- else:
- # at least one content is being processed out-of-order, then current
- # node should be treated as unknown
- current.maxdate = revision.date
- current.known = False
return root
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
index 2b2f523..b3dd843 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
@@ -1,401 +1,370 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "G/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E/D"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
maxdate: 1000000090.0
- known: True
path: "G/I"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
index b05f986..b370e6f 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
@@ -1,401 +1,370 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "G/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E/D"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
maxdate: 1000000090.0
- known: True
path: "G/I"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
index 45e3a2e..3639820 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
@@ -1,371 +1,344 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
index cee448f..8ed1c0f 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
@@ -1,365 +1,339 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
diff --git a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
index 147e560..a4aad93 100644
--- a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
+++ b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
@@ -1,185 +1,174 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
graph:
entry:
id: "2afae58027276dad2bdced5a505e8d781a7add5b"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R03
- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R04
- rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
graph:
entry:
id: "2afae58027276dad2bdced5a505e8d781a7add5b"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R05
- rev: "1dfac0491892096948d6a02bf12a2fed4bf75743"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000005.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000005.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000005.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000005.0
invalid: True
path: "A/B/C"
# Isochrone graph for R06
- rev: "53519b5a5e8cf12a4f81f82e489f95c1d04d5314"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000050.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000050.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000005.0
maxdate: 1000000005.0
- known: True
path: "A/B/C"
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
index 364a564..11d5881 100644
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -1,114 +1,113 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from copy import deepcopy
from datetime import datetime, timezone
from typing import Any, Dict
import pytest
import yaml
from swh.model.hashutil import hash_to_bytes
from swh.provenance.archive import ArchiveInterface
from swh.provenance.graph import IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
load_repo_data,
ts2dt,
)
def isochrone_graph_from_dict(d: Dict[str, Any], depth: int = 0) -> IsochroneNode:
"""Takes a dictionary representing a tree of IsochroneNode objects, and
recursively builds the corresponding graph."""
d = deepcopy(d)
d["entry"]["id"] = hash_to_bytes(d["entry"]["id"])
d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8")
dbdate = d.get("dbdate", None)
if dbdate is not None:
dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc)
children = d.get("children", [])
node = IsochroneNode(
entry=DirectoryEntry(**d["entry"]),
dbdate=dbdate,
depth=depth,
)
node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc)
- node.known = d.get("known", False)
node.invalid = d.get("invalid", False)
node.path = bytes(d["path"], encoding="utf-8")
node.children = set(
isochrone_graph_from_dict(child, depth=depth + 1) for child in children
)
return node
@pytest.mark.parametrize(
"repo, lower, mindepth",
(
("cmdbts2", True, 1),
("cmdbts2", False, 1),
("cmdbts2", True, 2),
("cmdbts2", False, 2),
("out-of-order", True, 1),
),
)
@pytest.mark.parametrize("batch", (True, False))
def test_isochrone_graph(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
repo: str,
lower: bool,
mindepth: int,
batch: bool,
) -> None:
# read data/README.md for more details on how these datasets are generated
data = load_repo_data(repo)
fill_storage(archive.storage, data)
revisions = {rev["id"]: rev for rev in data["revision"]}
filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml"
with open(get_datafile(filename)) as file:
for expected in yaml.full_load(file):
print("# Processing revision", expected["rev"])
revision = revisions[hash_to_bytes(expected["rev"])]
entry = RevisionEntry(
id=revision["id"],
date=ts2dt(revision["date"]),
root=revision["directory"],
)
expected_graph = isochrone_graph_from_dict(expected["graph"])
print("Expected graph:", expected_graph)
# Create graph for current revision and check it has the expected structure.
assert entry.root is not None
computed_graph = build_isochrone_graph(
provenance,
archive,
entry,
DirectoryEntry(entry.root),
)
print("Computed graph:", computed_graph)
assert computed_graph == expected_graph
# Add current revision so that provenance info is kept up to date for the
# following ones.
revision_add(
provenance,
archive,
[entry],
lower=lower,
mindepth=mindepth,
commit=not batch,
)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:27 PM (6 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3251831
Attached To
rDPROV Provenance database
Event Timeline
Log In to Comment