diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py index cc2a92d..4e77eb6 100644 --- a/swh/provenance/graph.py +++ b/swh/provenance/graph.py @@ -1,275 +1,257 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations from datetime import datetime, timezone import os from typing import Any, Dict, Optional, Set from swh.core.statsd import statsd from swh.model.hashutil import hash_to_hex from swh.model.model import Sha1Git from .archive import ArchiveInterface from .interface import ProvenanceInterface from .model import DirectoryEntry, RevisionEntry GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds" GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total" UTCMIN = datetime.min.replace(tzinfo=timezone.utc) class HistoryNode: def __init__( self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False ) -> None: self.entry = entry # A revision is `is_head` if it is directly pointed by an origin (ie. a head # revision for some snapshot) self.is_head = is_head # A revision is `in_history` if it appears in the history graph of an already # processed revision in the provenance database self.in_history = in_history # XXX: the current simplified version of the origin-revision layer algorithm # does not use this previous two flags at all. They are kept for now but might # be removed in the future (hence, RevisionEntry might be used instead of # HistoryNode). def __str__(self) -> str: return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>" def as_dict(self) -> Dict[str, Any]: return { "rev": hash_to_hex(self.entry.id), "is_head": self.is_head, "in_history": self.in_history, } class HistoryGraph: @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"}) def __init__( self, provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry, ) -> None: self._head = HistoryNode( revision, is_head=provenance.revision_visited(revision), in_history=provenance.revision_in_history(revision), ) self._graph: Dict[HistoryNode, Set[HistoryNode]] = {} stack = [self._head] while stack: current = stack.pop() if current not in self._graph: self._graph[current] = set() current.entry.retrieve_parents(archive) for parent in current.entry.parents: node = HistoryNode( parent, is_head=provenance.revision_visited(parent), in_history=provenance.revision_in_history(parent), ) self._graph[current].add(node) stack.append(node) @property def head(self) -> HistoryNode: return self._head @property def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]: return self._graph def __str__(self) -> str: return f"<HistoryGraph: head={self._head}, graph={self._graph}" def as_dict(self) -> Dict[str, Any]: return { "head": self.head.as_dict(), "graph": { hash_to_hex(node.entry.id): sorted( [parent.as_dict() for parent in parents], key=lambda d: d["rev"], ) for node, parents in self._graph.items() }, } class IsochroneNode: def __init__( self, entry: DirectoryEntry, dbdate: Optional[datetime] = None, depth: int = 0, prefix: bytes = b"", ) -> None: self.entry = entry self.depth = depth # dbdate is the maxdate for this node that comes from the DB self._dbdate: Optional[datetime] = dbdate # maxdate is set by the maxdate computation algorithm self.maxdate: Optional[datetime] = None - # known is True if this node is already known in the db; either because - # the current directory actually exists in the database, or because all - # the content of the current directory is known (subdirectories and files) - self.known = self.dbdate is not None self.invalid = False self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name self.children: Set[IsochroneNode] = set() @property def dbdate(self) -> Optional[datetime]: # use a property to make this attribute (mostly) read-only return self._dbdate def invalidate(self) -> None: statsd.increment( metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"} ) self._dbdate = None self.maxdate = None - self.known = False self.invalid = True def add_directory( self, child: DirectoryEntry, date: Optional[datetime] = None ) -> IsochroneNode: # we should not be processing this node (ie add subdirectories or files) if it's # actually known by the provenance DB assert self.dbdate is None node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) self.children.add(node) return node def __str__(self) -> str: return ( - f"<{self.entry}: depth={self.depth}, " - f"dbdate={self.dbdate}, maxdate={self.maxdate}, " - f"known={self.known}, invalid={self.invalid}, path={self.path!r}, " + f"<{self.entry}: depth={self.depth}, dbdate={self.dbdate}, " + f"maxdate={self.maxdate}, invalid={self.invalid}, path={self.path!r}, " f"children=[{', '.join(str(child) for child in self.children)}]>" ) def __eq__(self, other: Any) -> bool: return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ def __hash__(self) -> int: # only immutable attributes are considered to compute hash return hash((self.entry, self.depth, self.path)) @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_isochrone_graph"}) def build_isochrone_graph( provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry, directory: DirectoryEntry, minsize: int = 0, ) -> IsochroneNode: assert revision.date is not None assert revision.root == directory.id # this function process a revision in 2 steps: # # 1. build the tree structure of IsochroneNode objects (one INode per # directory under the root directory of the revision but not following # known subdirectories), and gather the dates from the DB for already # known objects; for files, just keep all the dates in a global 'fdates' # dict; note that in this step, we will only recurse the directories # that are not already known. # # 2. compute the maxdate for each node of the tree that was not found in the DB. # Build the nodes structure root_date = provenance.directory_get_date_in_isochrone_frontier(directory) root = IsochroneNode(directory, dbdate=root_date) stack = [root] fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date} while stack: current = stack.pop() if current.dbdate is None or current.dbdate >= revision.date: # If current directory has an associated date in the isochrone frontier that # is greater or equal to the current revision's one, it should be ignored as # the revision is being processed out of order. if current.dbdate is not None and current.dbdate >= revision.date: current.invalidate() # Pre-query all known dates for directories in the current directory # for the provenance object to have them cached and (potentially) improve # performance. current.entry.retrieve_children(archive, minsize=minsize) ddates = provenance.directory_get_dates_in_isochrone_frontier( current.entry.dirs ) for dir in current.entry.dirs: # Recursively analyse subdirectory nodes node = current.add_directory(dir, date=ddates.get(dir.id, None)) stack.append(node) fdates.update(provenance.content_get_early_dates(current.entry.files)) # Precalculate max known date for each node in the graph (only directory nodes are # pushed to the stack). stack = [root] while stack: current = stack.pop() # Current directory node is known if it already has an assigned date (ie. it was # already seen as an isochrone frontier). - if current.known: + if current.dbdate is not None: assert current.maxdate is None current.maxdate = current.dbdate else: if any(x.maxdate is None for x in current.children): # at least one child of current has no maxdate yet # Current node needs to be analysed again after its children. stack.append(current) for child in current.children: if child.maxdate is None: # if child.maxdate is None, it must be processed stack.append(child) else: # all the files and directories under current have a maxdate, # we can infer the maxdate for current directory assert current.maxdate is None # if all content is already known, update current directory info. - current.maxdate = max( - [UTCMIN] - + [ - child.maxdate - for child in current.children - if child.maxdate is not None # unnecessary, but needed for mypy - ] - + [ - fdates.get(file.id, revision.date) - for file in current.entry.files - ] + current.maxdate = min( + max( + [UTCMIN] + + [ + child.maxdate + for child in current.children + if child.maxdate is not None # for mypy + ] + + [ + fdates.get(file.id, revision.date) + for file in current.entry.files + ] + ), + revision.date, ) - if current.maxdate <= revision.date: - current.known = ( - # true if all subdirectories are known - all(child.known for child in current.children) - # true if all files are in fdates, i.e. if all files were known - # *before building this isochrone graph node* - # Note: the 'all()' is lazy: will stop iterating as soon as - # possible - and all((file.id in fdates) for file in current.entry.files) - ) - else: - # at least one content is being processed out-of-order, then current - # node should be treated as unknown - current.maxdate = revision.date - current.known = False return root diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml index 2b2f523..b3dd843 100644 --- a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml @@ -1,401 +1,370 @@ # Isochrone graph for R00 - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" graph: entry: id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" name: "" maxdate: 1000000000.0 path: "" children: - entry: id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" name: "A" maxdate: 1000000000.0 path: "A" children: - entry: id: "36876d475197b5ad86ad592e8e28818171455f16" name: "B" maxdate: 1000000000.0 path: "A/B" children: - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" maxdate: 1000000000.0 path: "A/B/C" # Isochrone graph for R01 - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 path: "A/B/C" # Isochrone graph for R02 - rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R03 - rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" graph: entry: id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R04 - rev: "17ed10db0612c9b46ba340943cb6b48b25431419" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 - known: True path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 - known: True path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 - known: True path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R05 - rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" graph: entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "" maxdate: 1000000050.0 path: "" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 path: "D" # Isochrone graph for R06 - rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" graph: entry: id: "c86d2f588234098642ef6f33ca662a6a9de865bc" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" name: "D" maxdate: 1000000050.0 - known: True path: "D" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "D/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 - known: True path: "D/E/D" # Isochrone graph for R07 - rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" graph: entry: id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" dbdate: 1000000050.0 maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R08 - rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" graph: entry: id: "79e219827e12f40e7146cc6834ee04b617a8073a" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "9a7b5762e20b11735b93a635cda451c75bd31270" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" name: "D" maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R09 - rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" graph: entry: id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" name: "" maxdate: 1000000090.0 path: "" children: - entry: id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" name: "F" maxdate: 1000000090.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 path: "F/E/D" # Isochrone graph for R10 - rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" graph: entry: id: "8c61bb233c89936b310d8b269a35c24bff432227" name: "" maxdate: 1000000100.0 path: "" children: - entry: id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" name: "F" maxdate: 1000000100.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "F/E/D" # Isochrone graph for R11 - rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" graph: entry: id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" name: "" maxdate: 1000000110.0 path: "" children: - entry: id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" name: "G" maxdate: 1000000110.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "G/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/E/D" # Isochrone graph for R12 - rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" graph: entry: id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" name: "" maxdate: 1000000120.0 path: "" children: - entry: id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" name: "G" maxdate: 1000000120.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" maxdate: 1000000090.0 - known: True path: "G/H" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H/D" # Isochrone graph for R13 - rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" graph: entry: id: "148f08e057416af1e471abb3dcd594d27233085d" name: "" maxdate: 1000000130.0 path: "" children: - entry: id: "8084b999790aab88e5119915ea1083e747a3f42f" name: "G" maxdate: 1000000130.0 path: "G" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "I" maxdate: 1000000090.0 - known: True path: "G/I" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/I/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" maxdate: 1000000090.0 - known: True path: "G/H" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml index b05f986..b370e6f 100644 --- a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml @@ -1,401 +1,370 @@ # Isochrone graph for R00 - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" graph: entry: id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" name: "" maxdate: 1000000000.0 path: "" children: - entry: id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" name: "A" maxdate: 1000000000.0 path: "A" children: - entry: id: "36876d475197b5ad86ad592e8e28818171455f16" name: "B" maxdate: 1000000000.0 path: "A/B" children: - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" maxdate: 1000000000.0 path: "A/B/C" # Isochrone graph for R01 - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 path: "A/B/C" # Isochrone graph for R02 - rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R03 - rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" graph: entry: id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R04 - rev: "17ed10db0612c9b46ba340943cb6b48b25431419" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 - known: True path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 - known: True path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 - known: True path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R05 - rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" graph: entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "" maxdate: 1000000050.0 path: "" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 path: "D" # Isochrone graph for R06 - rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" graph: entry: id: "c86d2f588234098642ef6f33ca662a6a9de865bc" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" name: "D" maxdate: 1000000050.0 - known: True path: "D" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "D/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 - known: True path: "D/E/D" # Isochrone graph for R07 - rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" graph: entry: id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" dbdate: 1000000050.0 maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R08 - rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" graph: entry: id: "79e219827e12f40e7146cc6834ee04b617a8073a" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "9a7b5762e20b11735b93a635cda451c75bd31270" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" name: "D" maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R09 - rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" graph: entry: id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" name: "" maxdate: 1000000090.0 path: "" children: - entry: id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" name: "F" maxdate: 1000000090.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 path: "F/E/D" # Isochrone graph for R10 - rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" graph: entry: id: "8c61bb233c89936b310d8b269a35c24bff432227" name: "" maxdate: 1000000100.0 path: "" children: - entry: id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" name: "F" maxdate: 1000000100.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "F/E/D" # Isochrone graph for R11 - rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" graph: entry: id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" name: "" maxdate: 1000000110.0 path: "" children: - entry: id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" name: "G" maxdate: 1000000110.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "G/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/E/D" # Isochrone graph for R12 - rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" graph: entry: id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" name: "" maxdate: 1000000120.0 path: "" children: - entry: id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" name: "G" maxdate: 1000000120.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" maxdate: 1000000090.0 - known: True path: "G/H" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H/D" # Isochrone graph for R13 - rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" graph: entry: id: "148f08e057416af1e471abb3dcd594d27233085d" name: "" maxdate: 1000000130.0 path: "" children: - entry: id: "8084b999790aab88e5119915ea1083e747a3f42f" name: "G" maxdate: 1000000130.0 path: "G" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "I" maxdate: 1000000090.0 - known: True path: "G/I" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/I/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" maxdate: 1000000090.0 - known: True path: "G/H" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml index 45e3a2e..3639820 100644 --- a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml @@ -1,371 +1,344 @@ # Isochrone graph for R00 - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" graph: entry: id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" name: "" maxdate: 1000000000.0 path: "" children: - entry: id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" name: "A" maxdate: 1000000000.0 path: "A" children: - entry: id: "36876d475197b5ad86ad592e8e28818171455f16" name: "B" maxdate: 1000000000.0 path: "A/B" children: - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" maxdate: 1000000000.0 path: "A/B/C" # Isochrone graph for R01 - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 path: "A/B/C" # Isochrone graph for R02 - rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R03 - rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" graph: entry: id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R04 - rev: "17ed10db0612c9b46ba340943cb6b48b25431419" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 - known: True path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 - known: True path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 - known: True path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R05 - rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" graph: entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "" maxdate: 1000000050.0 path: "" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 path: "D" # Isochrone graph for R06 - rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" graph: entry: id: "c86d2f588234098642ef6f33ca662a6a9de865bc" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" name: "D" maxdate: 1000000050.0 - known: True path: "D" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "D/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 - known: True path: "D/E/D" # Isochrone graph for R07 - rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" graph: entry: id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R08 - rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" graph: entry: id: "79e219827e12f40e7146cc6834ee04b617a8073a" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "9a7b5762e20b11735b93a635cda451c75bd31270" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" name: "D" maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R09 - rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" graph: entry: id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" name: "" maxdate: 1000000090.0 path: "" children: - entry: id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" name: "F" maxdate: 1000000090.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 path: "F/E/D" # Isochrone graph for R10 - rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" graph: entry: id: "8c61bb233c89936b310d8b269a35c24bff432227" name: "" maxdate: 1000000100.0 path: "" children: - entry: id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" name: "F" maxdate: 1000000100.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "F/E/D" # Isochrone graph for R11 - rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" graph: entry: id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" name: "" maxdate: 1000000110.0 path: "" children: - entry: id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" name: "G" maxdate: 1000000110.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/E" # Isochrone graph for R12 - rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" graph: entry: id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" name: "" maxdate: 1000000120.0 path: "" children: - entry: id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" name: "G" maxdate: 1000000120.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H" # Isochrone graph for R13 - rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" graph: entry: id: "148f08e057416af1e471abb3dcd594d27233085d" name: "" maxdate: 1000000130.0 path: "" children: - entry: id: "8084b999790aab88e5119915ea1083e747a3f42f" name: "G" maxdate: 1000000130.0 path: "G" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "G/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "I" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/I" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml index cee448f..8ed1c0f 100644 --- a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml @@ -1,365 +1,339 @@ # Isochrone graph for R00 - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" graph: entry: id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" name: "" maxdate: 1000000000.0 path: "" children: - entry: id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" name: "A" maxdate: 1000000000.0 path: "A" children: - entry: id: "36876d475197b5ad86ad592e8e28818171455f16" name: "B" maxdate: 1000000000.0 path: "A/B" children: - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" maxdate: 1000000000.0 path: "A/B/C" # Isochrone graph for R01 - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 path: "A/B/C" # Isochrone graph for R02 - rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R03 - rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" graph: entry: id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R04 - rev: "17ed10db0612c9b46ba340943cb6b48b25431419" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000020.0 - known: True path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000020.0 - known: True path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000020.0 - known: True path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R05 - rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" graph: entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "" maxdate: 1000000050.0 path: "" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 path: "D" # Isochrone graph for R06 - rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" graph: entry: id: "c86d2f588234098642ef6f33ca662a6a9de865bc" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" name: "D" maxdate: 1000000050.0 - known: True path: "D" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" maxdate: 1000000050.0 - known: True path: "D/E" children: - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" maxdate: 1000000050.0 - known: True path: "D/E/D" # Isochrone graph for R07 - rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" graph: entry: id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" name: "E" dbdate: 1000000050.0 maxdate: 1000000050.0 - known: True path: "F/E" # Isochrone graph for R08 - rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" graph: entry: id: "79e219827e12f40e7146cc6834ee04b617a8073a" name: "" maxdate: 1000000050.0 - known: True path: "" children: - entry: id: "9a7b5762e20b11735b93a635cda451c75bd31270" name: "F" maxdate: 1000000050.0 - known: True path: "F" children: - entry: id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" name: "E" maxdate: 1000000050.0 - known: True path: "F/E" children: - entry: id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" name: "D" maxdate: 1000000050.0 - known: True path: "F/E/D" # Isochrone graph for R09 - rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" graph: entry: id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" name: "" maxdate: 1000000090.0 path: "" children: - entry: id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" name: "F" maxdate: 1000000090.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 path: "F/E/D" # Isochrone graph for R10 - rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" graph: entry: id: "8c61bb233c89936b310d8b269a35c24bff432227" name: "" maxdate: 1000000100.0 path: "" children: - entry: id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" name: "F" maxdate: 1000000100.0 path: "F" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" maxdate: 1000000090.0 - known: True path: "F/E" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "F/E/D" # Isochrone graph for R11 - rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" graph: entry: id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" name: "" maxdate: 1000000110.0 path: "" children: - entry: id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" name: "G" maxdate: 1000000110.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "E" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/E" # Isochrone graph for R12 - rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" graph: entry: id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" name: "" maxdate: 1000000120.0 path: "" children: - entry: id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" name: "G" maxdate: 1000000120.0 path: "G" children: - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H" # Isochrone graph for R13 - rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" graph: entry: id: "148f08e057416af1e471abb3dcd594d27233085d" name: "" maxdate: 1000000130.0 path: "" children: - entry: id: "8084b999790aab88e5119915ea1083e747a3f42f" name: "G" maxdate: 1000000130.0 path: "G" children: - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" maxdate: 1000000090.0 - known: True path: "G/D" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "I" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/I" - entry: id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" name: "H" dbdate: 1000000090.0 maxdate: 1000000090.0 - known: True path: "G/H" diff --git a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml index 147e560..a4aad93 100644 --- a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml +++ b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml @@ -1,185 +1,174 @@ # Isochrone graph for R00 - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" graph: entry: id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" name: "" maxdate: 1000000000.0 path: "" children: - entry: id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" name: "A" maxdate: 1000000000.0 path: "A" children: - entry: id: "36876d475197b5ad86ad592e8e28818171455f16" name: "B" maxdate: 1000000000.0 path: "A/B" children: - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" maxdate: 1000000000.0 path: "A/B/C" # Isochrone graph for R01 - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 path: "A/B/C" # Isochrone graph for R02 - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" graph: entry: id: "2afae58027276dad2bdced5a505e8d781a7add5b" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R03 - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000010.0 - known: True path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/B/C" # Isochrone graph for R04 - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" graph: entry: id: "2afae58027276dad2bdced5a505e8d781a7add5b" name: "" maxdate: 1000000010.0 - known: True path: "" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "A" maxdate: 1000000010.0 - known: True path: "A" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 - known: True path: "A/C" # Isochrone graph for R05 - rev: "1dfac0491892096948d6a02bf12a2fed4bf75743" graph: entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" maxdate: 1000000005.0 path: "" children: - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" maxdate: 1000000005.0 path: "A" children: - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" maxdate: 1000000005.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" maxdate: 1000000005.0 invalid: True path: "A/B/C" # Isochrone graph for R06 - rev: "53519b5a5e8cf12a4f81f82e489f95c1d04d5314" graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" maxdate: 1000000050.0 path: "" children: - entry: id: "d591b308488541aabffd854eae85a9bf83a9d9f5" name: "A" maxdate: 1000000050.0 path: "A" children: - entry: id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" name: "B" maxdate: 1000000050.0 path: "A/B" children: - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" dbdate: 1000000005.0 maxdate: 1000000005.0 - known: True path: "A/B/C" diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py index 364a564..11d5881 100644 --- a/swh/provenance/tests/test_isochrone_graph.py +++ b/swh/provenance/tests/test_isochrone_graph.py @@ -1,114 +1,113 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from copy import deepcopy from datetime import datetime, timezone from typing import Any, Dict import pytest import yaml from swh.model.hashutil import hash_to_bytes from swh.provenance.archive import ArchiveInterface from swh.provenance.graph import IsochroneNode, build_isochrone_graph from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import DirectoryEntry, RevisionEntry from swh.provenance.revision import revision_add from swh.provenance.tests.conftest import ( fill_storage, get_datafile, load_repo_data, ts2dt, ) def isochrone_graph_from_dict(d: Dict[str, Any], depth: int = 0) -> IsochroneNode: """Takes a dictionary representing a tree of IsochroneNode objects, and recursively builds the corresponding graph.""" d = deepcopy(d) d["entry"]["id"] = hash_to_bytes(d["entry"]["id"]) d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8") dbdate = d.get("dbdate", None) if dbdate is not None: dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc) children = d.get("children", []) node = IsochroneNode( entry=DirectoryEntry(**d["entry"]), dbdate=dbdate, depth=depth, ) node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc) - node.known = d.get("known", False) node.invalid = d.get("invalid", False) node.path = bytes(d["path"], encoding="utf-8") node.children = set( isochrone_graph_from_dict(child, depth=depth + 1) for child in children ) return node @pytest.mark.parametrize( "repo, lower, mindepth", ( ("cmdbts2", True, 1), ("cmdbts2", False, 1), ("cmdbts2", True, 2), ("cmdbts2", False, 2), ("out-of-order", True, 1), ), ) @pytest.mark.parametrize("batch", (True, False)) def test_isochrone_graph( provenance: ProvenanceInterface, archive: ArchiveInterface, repo: str, lower: bool, mindepth: int, batch: bool, ) -> None: # read data/README.md for more details on how these datasets are generated data = load_repo_data(repo) fill_storage(archive.storage, data) revisions = {rev["id"]: rev for rev in data["revision"]} filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml" with open(get_datafile(filename)) as file: for expected in yaml.full_load(file): print("# Processing revision", expected["rev"]) revision = revisions[hash_to_bytes(expected["rev"])] entry = RevisionEntry( id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], ) expected_graph = isochrone_graph_from_dict(expected["graph"]) print("Expected graph:", expected_graph) # Create graph for current revision and check it has the expected structure. assert entry.root is not None computed_graph = build_isochrone_graph( provenance, archive, entry, DirectoryEntry(entry.root), ) print("Computed graph:", computed_graph) assert computed_graph == expected_graph # Add current revision so that provenance info is kept up to date for the # following ones. revision_add( provenance, archive, [entry], lower=lower, mindepth=mindepth, commit=not batch, )