diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py index 4094b5f..17662a0 100644 --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -1,576 +1,583 @@ +from collections import Counter from datetime import datetime, timezone import logging import os import time from typing import Dict, Generator, Iterable, List, Optional, Tuple from typing_extensions import Protocol, runtime_checkable from swh.model.hashutil import hash_to_hex from .archive import ArchiveInterface from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry UTCMIN = datetime.min.replace(tzinfo=timezone.utc) @runtime_checkable class ProvenanceInterface(Protocol): raise_on_commit: bool = False def commit(self): """Commit currently ongoing transactions in the backend DB""" ... def content_add_to_directory( self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes ) -> None: ... def content_add_to_revision( self, revision: RevisionEntry, blob: FileEntry, prefix: bytes ) -> None: ... def content_find_first( self, blobid: bytes ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]: ... def content_find_all( self, blobid: bytes, limit: Optional[int] = None ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]: ... def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]: ... def content_get_early_dates( self, blobs: Iterable[FileEntry] ) -> Dict[bytes, datetime]: ... def content_set_early_date(self, blob: FileEntry, date: datetime) -> None: ... def directory_add_to_revision( self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes ) -> None: ... def directory_get_date_in_isochrone_frontier( self, directory: DirectoryEntry ) -> Optional[datetime]: ... def directory_get_dates_in_isochrone_frontier( self, dirs: Iterable[DirectoryEntry] ) -> Dict[bytes, datetime]: ... def directory_invalidate_in_isochrone_frontier( self, directory: DirectoryEntry ) -> None: ... def directory_set_date_in_isochrone_frontier( self, directory: DirectoryEntry, date: datetime ) -> None: ... def origin_get_id(self, origin: OriginEntry) -> int: ... def revision_add(self, revision: RevisionEntry) -> None: ... def revision_add_before_revision( self, relative: RevisionEntry, revision: RevisionEntry ) -> None: ... def revision_add_to_origin( self, origin: OriginEntry, revision: RevisionEntry ) -> None: ... def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]: ... def revision_get_preferred_origin(self, revision: RevisionEntry) -> int: ... def revision_in_history(self, revision: RevisionEntry) -> bool: ... def revision_set_preferred_origin( self, origin: OriginEntry, revision: RevisionEntry ) -> None: ... def revision_visited(self, revision: RevisionEntry) -> bool: ... def flatten_directory( archive: ArchiveInterface, provenance: ProvenanceInterface, directory: DirectoryEntry, ) -> None: """Recursively retrieve all the files of 'directory' and insert them in the 'provenance' database in the 'content_to_directory' table. """ stack = [(directory, b"")] while stack: current, prefix = stack.pop() current.retrieve_children(archive) for f_child in current.files: # Add content to the directory with the computed prefix. provenance.content_add_to_directory(directory, f_child, prefix) for d_child in current.dirs: # Recursively walk the child directory. stack.append((d_child, os.path.join(prefix, d_child.name))) def origin_add( provenance: ProvenanceInterface, archive: ArchiveInterface, origins: List[OriginEntry], ) -> None: start = time.time() for origin in origins: origin.retrieve_revisions(archive) for revision in origin.revisions: origin_add_revision(provenance, archive, origin, revision) done = time.time() provenance.commit() stop = time.time() logging.debug( "Origins " ";".join( [origin.url + ":" + hash_to_hex(origin.snapshot) for origin in origins] ) + f" were processed in {stop - start} secs (commit took {stop - done} secs)!" ) def origin_add_revision( provenance: ProvenanceInterface, archive: ArchiveInterface, origin: OriginEntry, revision: RevisionEntry, ) -> None: stack: List[Tuple[Optional[RevisionEntry], RevisionEntry]] = [(None, revision)] while stack: relative, current = stack.pop() # Check if current revision has no preferred origin and update if necessary. preferred = provenance.revision_get_preferred_origin(current) if preferred is None: provenance.revision_set_preferred_origin(origin, current) ######################################################################## if relative is None: # This revision is pointed directly by the origin. visited = provenance.revision_visited(current) provenance.revision_add_to_origin(origin, current) if not visited: stack.append((current, current)) else: # This revision is a parent of another one in the history of the # relative revision. for parent in current.parents(archive): visited = provenance.revision_visited(parent) if not visited: # The parent revision has never been seen before pointing # directly to an origin. known = provenance.revision_in_history(parent) if known: # The parent revision is already known in some other # revision's history. We should point it directly to # the origin and (eventually) walk its history. stack.append((None, parent)) else: # The parent revision was never seen before. We should # walk its history and associate it with the same # relative revision. provenance.revision_add_before_revision(relative, parent) stack.append((relative, parent)) else: # The parent revision already points to an origin, so its # history was properly processed before. We just need to # make sure it points to the current origin as well. provenance.revision_add_to_origin(origin, parent) def revision_add( provenance: ProvenanceInterface, archive: ArchiveInterface, revisions: List[RevisionEntry], trackall: bool = True, lower: bool = True, mindepth: int = 1, ) -> None: start = time.time() for revision in revisions: assert revision.date is not None assert revision.root is not None # Processed content starting from the revision's root directory. date = provenance.revision_get_early_date(revision) if date is None or revision.date < date: logging.debug( f"Processing revisions {hash_to_hex(revision.id)}" f" (known date {date} / revision date {revision.date})..." ) graph = build_isochrone_graph( archive, provenance, revision, DirectoryEntry(revision.root), ) # TODO: add file size filtering revision_process_content( archive, provenance, revision, graph, trackall=trackall, lower=lower, mindepth=mindepth, ) done = time.time() # TODO: improve this! Maybe using a max attempt counter? # Ideally Provenance class should guarantee that a commit never fails. while not provenance.commit(): logging.warning( "Could not commit revisions " + ";".join([hash_to_hex(revision.id) for revision in revisions]) + ". Retrying..." ) stop = time.time() logging.debug( f"Revisions {';'.join([hash_to_hex(revision.id) for revision in revisions])} " f" were processed in {stop - start} secs (commit took {stop - done} secs)!" ) # logging.critical( # ";".join([hash_to_hex(revision.id) for revision in revisions]) # + f",{stop - start},{stop - done}" # ) class IsochroneNode: def __init__( self, entry: DirectoryEntry, dbdate: Optional[datetime] = None, depth: int = 0, prefix: bytes = b"", ): self.entry = entry self.depth = depth # dbdate is the maxdate for this node that comes from the DB self._dbdate: Optional[datetime] = dbdate # maxdate is set by the maxdate computation algorithm self.maxdate: Optional[datetime] = None # known is True if this node is already known in the db; either because # the current directory actually exists in the database, or because all # the content of the current directory is known (subdirectories and files) self.known: bool = self.dbdate is not None self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name self.children: List[IsochroneNode] = [] @property def dbdate(self): # use a property to make this attribute (mostly) read-only return self._dbdate def invalidate(self): self._dbdate = None self.maxdate = None self.known = False def add_directory( self, child: DirectoryEntry, date: Optional[datetime] = None ) -> "IsochroneNode": # we should not be processing this node (ie add subdirectories or # files) if it's actually known by the provenance DB assert self.dbdate is None node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) self.children.append(node) return node def __str__(self): return ( f"<{self.entry}: " f"known={self.known}, maxdate={self.maxdate}, " f"dbdate={self.dbdate}, path={self.path}, " f"children=[{', '.join(str(child) for child in self.children)}]>" ) def __eq__(self, other): - sameDbDate = ( - self._dbdate is None and other._dbdate is None - ) or self._dbdate == other._dbdate - sameMaxdate = ( - self.maxdate is None and other.maxdate is None - ) or self.maxdate == other.maxdate return ( isinstance(other, IsochroneNode) - and (self.entry, self.depth, self.known, self.path) - == (other.entry, other.depth, other.known, other.path) - and sameDbDate - and sameMaxdate - and set(self.children) == set(other.children) + and ( + self.entry, + self.depth, + self._dbdate, + self.maxdate, + self.known, + self.path, + ) + == ( + other.entry, + other.depth, + other._dbdate, + other.maxdate, + other.known, + other.path, + ) + and Counter(self.children) == Counter(other.children) ) def __hash__(self): return hash( (self.entry, self.depth, self._dbdate, self.maxdate, self.known, self.path) ) def build_isochrone_graph( archive: ArchiveInterface, provenance: ProvenanceInterface, revision: RevisionEntry, directory: DirectoryEntry, ) -> IsochroneNode: assert revision.date is not None assert revision.root == directory.id # this function process a revision in 2 steps: # # 1. build the tree structure of IsochroneNode objects (one INode per # directory under the root directory of the revision but not following # known subdirectories), and gather the dates from the DB for already # known objects; for files, just keep all the dates in a global 'fdates' # dict; note that in this step, we will only recurse the directories # that are not already known. # # 2. compute the maxdate for each node of the tree that was not found in the DB. # Build the nodes structure root_date = provenance.directory_get_date_in_isochrone_frontier(directory) root = IsochroneNode(directory, dbdate=root_date) stack = [root] logging.debug( f"Recursively creating graph for revision {hash_to_hex(revision.id)}..." ) fdates: Dict[bytes, datetime] = {} # map {file_id: date} while stack: current = stack.pop() if current.dbdate is None or current.dbdate > revision.date: # If current directory has an associated date in the isochrone frontier that # is greater or equal to the current revision's one, it should be ignored as # the revision is being processed out of order. if current.dbdate is not None and current.dbdate > revision.date: logging.debug( f"Invalidating frontier on {hash_to_hex(current.entry.id)}" f" (date {current.dbdate})" f" when processing revision {hash_to_hex(revision.id)}" f" (date {revision.date})" ) provenance.directory_invalidate_in_isochrone_frontier(current.entry) current.invalidate() # Pre-query all known dates for directories in the current directory # for the provenance object to have them cached and (potentially) improve # performance. current.entry.retrieve_children(archive) ddates = provenance.directory_get_dates_in_isochrone_frontier( current.entry.dirs ) for dir in current.entry.dirs: # Recursively analyse subdirectory nodes node = current.add_directory(dir, date=ddates.get(dir.id, None)) stack.append(node) fdates.update(provenance.content_get_early_dates(current.entry.files)) logging.debug( f"Isochrone graph for revision {hash_to_hex(revision.id)} successfully created!" ) # Precalculate max known date for each node in the graph (only directory nodes are # pushed to the stack). logging.debug(f"Computing maxdates for revision {hash_to_hex(revision.id)}...") stack = [root] while stack: current = stack.pop() # Current directory node is known if it already has an assigned date (ie. it was # already seen as an isochrone frontier). if current.known: assert current.maxdate is None current.maxdate = current.dbdate else: if any(x.maxdate is None for x in current.children): # at least one child of current has no maxdate yet # Current node needs to be analysed again after its children. stack.append(current) for child in current.children: if child.maxdate is None: # if child.maxdate is None, it must be processed stack.append(child) else: # all the files and directories under current have a maxdate, # we can infer the maxdate for current directory assert current.maxdate is None # If all content is already known, update current directory info. current.maxdate = max( [UTCMIN] + [ child.maxdate for child in current.children if child.maxdate is not None # unnecessary, but needed for mypy ] + [ fdates.get(file.id, revision.date) for file in current.entry.files ] ) current.known = ( # true if all subdirectories are known all(child.known for child in current.children) # true if all files are in fdates, i.e. if all files were known # *before building this isochrone graph node* # Note: the 'all()' is lazy: will stop iterating as soon as possible and all((file.id in fdates) for file in current.entry.files) ) logging.debug( f"Maxdates for revision {hash_to_hex(revision.id)} successfully computed!" ) return root def revision_process_content( archive: ArchiveInterface, provenance: ProvenanceInterface, revision: RevisionEntry, graph: IsochroneNode, trackall: bool = True, lower: bool = True, mindepth: int = 1, ): assert revision.date is not None provenance.revision_add(revision) stack = [graph] while stack: current = stack.pop() if current.dbdate is not None: assert current.dbdate <= revision.date if trackall: # Current directory is an outer isochrone frontier for a previously # processed revision. It should be reused as is. provenance.directory_add_to_revision( revision, current.entry, current.path ) else: # Current directory is not an outer isochrone frontier for any previous # revision. It might be eligible for this one. if is_new_frontier( current, revision=revision, trackall=trackall, lower=lower, mindepth=mindepth, ): assert current.maxdate is not None # Outer frontier should be moved to current position in the isochrone # graph. This is the first time this directory is found in the isochrone # frontier. provenance.directory_set_date_in_isochrone_frontier( current.entry, current.maxdate ) if trackall: provenance.directory_add_to_revision( revision, current.entry, current.path ) flatten_directory(archive, provenance, current.entry) else: # No point moving the frontier here. Either there are no files or they # are being seen for the first time here. Add all blobs to current # revision updating date if necessary, and recursively analyse # subdirectories as candidates to the outer frontier. for blob in current.entry.files: date = provenance.content_get_early_date(blob) if date is None or revision.date < date: provenance.content_set_early_date(blob, revision.date) provenance.content_add_to_revision(revision, blob, current.path) for child in current.children: stack.append(child) def is_new_frontier( node: IsochroneNode, revision: RevisionEntry, trackall: bool = True, lower: bool = True, mindepth: int = 1, ) -> bool: assert node.maxdate is not None # for mypy assert revision.date is not None # idem if trackall: # The only real condition for a directory to be a frontier is that its # content is already known and its maxdate is less (or equal) than # current revision's date. Checking mindepth is meant to skip root # directories (or any arbitrary depth) to improve the result. The # option lower tries to maximize the reusage rate of previously defined # frontiers by keeping them low in the directory tree. return ( node.known and node.maxdate <= revision.date # all content is earlier than revision and node.depth >= mindepth # current node is deeper than the min allowed depth and (has_blobs(node) if lower else True) # there is at least one blob in it ) else: # If we are only tracking first occurrences, we want to ensure that all first # occurrences end up in the content_early_in_rev relation. Thus, we force for # every blob outside a frontier to have an extrictly earlier date. return ( node.maxdate < revision.date # all content is earlier than revision and node.depth >= mindepth # deeper than the min allowed depth and (has_blobs(node) if lower else True) # there is at least one blob ) def has_blobs(node: IsochroneNode) -> bool: # We may want to look for files in different ways to decide whether to define a # frontier or not: # 1. Only files in current node: return any(node.entry.files) # 2. Files anywhere in the isochrone graph # stack = [node] # while stack: # current = stack.pop() # if any( # map(lambda child: isinstance(child.entry, FileEntry), current.children)): # return True # else: # # All children are directory entries. # stack.extend(current.children) # return False # 3. Files in the intermediate directories between current node and any previously # defined frontier: # TODO: complete this case! # return any( # map(lambda child: isinstance(child.entry, FileEntry), node.children) # ) or all( # map( # lambda child: ( # not (isinstance(child.entry, DirectoryEntry) and child.date is None) # ) # or has_blobs(child), # node.children, # ) # ) diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml index f6d498a..2b2f523 100644 --- a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml @@ -1,476 +1,401 @@ # Isochrone graph for R00 -c0d8929936631ecbcf9147be6b8aa13b13b014e4: +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: entry: - id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" - name: "" - dbdate: null + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" maxdate: 1000000000.0 - known: False path: "" children: - entry: - id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" - name: "A" - dbdate: null + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" maxdate: 1000000000.0 - known: False path: "A" children: - entry: - id: "36876d475197b5ad86ad592e8e28818171455f16" - name: "B" - dbdate: null + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" maxdate: 1000000000.0 - known: False path: "A/B" children: - - entry: + - entry: id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" name: "C" - dbdate: null - maxdate: 1000000000.0 - known: False - path: "A/B/C" - children: [] + maxdate: 1000000000.0 + path: "A/B/C" # Isochrone graph for R01 -1444db96cbd8cd791abe83527becee73d3c64e86: - entry: +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" name: "" - dbdate: null - maxdate: 1000000010.0 - known: False - path: "" - children: - - entry: + maxdate: 1000000010.0 + path: "" + children: + - entry: id: "baca735bf8b8720131b4bfdb47c51631a9260348" name: "A" - dbdate: null - maxdate: 1000000010.0 - known: False - path: "A" - children: - - entry: + maxdate: 1000000010.0 + path: "A" + children: + - entry: id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" name: "B" - dbdate: null - maxdate: 1000000010.0 - known: False - path: "A/B" - children: - - entry: - id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" - name: "C" - dbdate: null maxdate: 1000000010.0 - known: False - path: "A/B/C" - children: [] + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" # Isochrone graph for R02 -0d45f1ee524db8f6f0b5a267afac4e733b4b2cee: +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: entry: id: "195601c98c28f04e0d19c218434738006990db72" name: "" - dbdate: null maxdate: 1000000020.0 - known: False path: "" children: - entry: - id: "d591b308488541aabffd854eae85a9bf83a9d9f5" - name: "A" - dbdate: null + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" maxdate: 1000000020.0 - known: False path: "A" children: - entry: - id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" - name: "B" - dbdate: null + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" maxdate: 1000000020.0 - known: False path: "A/B" children: - - entry: + - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" - dbdate: null - maxdate: 1000000010.0 - known: True - path: "A/B/C" - children: [] + maxdate: 1000000010.0 + known: True + path: "A/B/C" # Isochrone graph for R03 -540bd6155a3c50cc47b2e6f43aeaace67a696d1d: +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: entry: - id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" - name: "" - dbdate: null + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" maxdate: 1000000010.0 known: True path: "" children: - entry: - id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" - name: "A" - dbdate: null + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" maxdate: 1000000010.0 known: True path: "A" children: - entry: - id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" - name: "C" + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" dbdate: 1000000010.0 maxdate: 1000000010.0 known: True path: "A/C" - children: [] # Isochrone graph for R04 -17ed10db0612c9b46ba340943cb6b48b25431419: +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: entry: - id: "195601c98c28f04e0d19c218434738006990db72" - name: "" - dbdate: null + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" maxdate: 1000000020.0 known: True path: "" children: - entry: - id: "d591b308488541aabffd854eae85a9bf83a9d9f5" - name: "A" - dbdate: null + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" maxdate: 1000000020.0 known: True path: "A" children: - entry: - id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" - name: "B" - dbdate: null + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" maxdate: 1000000020.0 known: True path: "A/B" children: - - entry: + - entry: id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" name: "C" - dbdate: 1000000010.0 - maxdate: 1000000010.0 - known: True - path: "A/B/C" - children: [] + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" # Isochrone graph for R05 -c8bef45193355db33d64f375b4a4e4f23ac2a4f6: +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: entry: - id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" - name: "" - dbdate: null + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" maxdate: 1000000050.0 - known: False path: "" children: - entry: - id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" - name: "D" - dbdate: null + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" maxdate: 1000000050.0 - known: False path: "D" - children: [] # Isochrone graph for R06 -f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c: +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: entry: - id: "c86d2f588234098642ef6f33ca662a6a9de865bc" - name: "" - dbdate: null + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" maxdate: 1000000050.0 known: True path: "" children: - entry: - id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" - name: "D" - dbdate: null + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" maxdate: 1000000050.0 known: True path: "D" children: - entry: - id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" - name: "E" - dbdate: null + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" maxdate: 1000000050.0 known: True path: "D/E" children: - entry: - id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" - name: "D" - dbdate: null + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" maxdate: 1000000050.0 known: True path: "D/E/D" - children: [] # Isochrone graph for R07 -91ed6a03c80b61e0d63d328f7a4325230e7a0237: +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: entry: - id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" - name: "" - dbdate: null + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" maxdate: 1000000050.0 known: True path: "" children: - entry: - id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" - name: "F" - dbdate: null + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" maxdate: 1000000050.0 known: True path: "F" children: - entry: - id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" - name: "E" - dbdate: null + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" maxdate: 1000000050.0 known: True path: "F/E" children: - - entry: + - entry: id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" name: "D" - dbdate: 1000000050.0 - maxdate: 1000000050.0 - known: True - path: "F/E/D" - children: [] + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E/D" # Isochrone graph for R08 -a97e5c8a626510eefaa637091924cf800b1e8b06: +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: entry: - id: "79e219827e12f40e7146cc6834ee04b617a8073a" - name: "" - dbdate: null + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" maxdate: 1000000050.0 known: True path: "" children: - entry: - id: "9a7b5762e20b11735b93a635cda451c75bd31270" - name: "F" - dbdate: null + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" maxdate: 1000000050.0 known: True path: "F" children: - entry: - id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" - name: "E" - dbdate: null + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" maxdate: 1000000050.0 known: True path: "F/E" children: - - entry: + - entry: id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" name: "D" - dbdate: null - maxdate: 1000000050.0 - known: True - path: "F/E/D" - children: [] + maxdate: 1000000050.0 + known: True + path: "F/E/D" # Isochrone graph for R09 -3c5ad6be812b182ee2a01e84884b8ab7d384a4a0: +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: entry: - id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" - name: "" - dbdate: null + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" maxdate: 1000000090.0 - known: False path: "" children: - entry: - id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" - name: "F" - dbdate: null + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" maxdate: 1000000090.0 - known: False path: "F" children: - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "E" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" maxdate: 1000000090.0 - known: False path: "F/E" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: null - maxdate: 1000000090.0 - known: False - path: "F/E/D" - children: [] + maxdate: 1000000090.0 + path: "F/E/D" # Isochrone graph for R10 -b7c52e28d441ca0cb736fdbe49e39eae3847ad0f: +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: entry: - id: "8c61bb233c89936b310d8b269a35c24bff432227" - name: "" - dbdate: null + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" maxdate: 1000000100.0 - known: False path: "" children: - entry: - id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" - name: "F" - dbdate: null + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" maxdate: 1000000100.0 - known: False path: "F" children: - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "E" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" maxdate: 1000000090.0 known: True path: "F/E" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: null - maxdate: 1000000090.0 - known: True - path: "F/E/D" - children: [] + maxdate: 1000000090.0 + known: True + path: "F/E/D" # Isochrone graph for R11 -f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2: +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: entry: - id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" - name: "" - dbdate: null + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" maxdate: 1000000110.0 - known: False path: "" children: - entry: - id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" - name: "G" - dbdate: null + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" maxdate: 1000000110.0 - known: False path: "G" children: - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "E" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" maxdate: 1000000090.0 known: True path: "G/E" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: 1000000090.0 - maxdate: 1000000090.0 - known: True - path: "G/E/D" - children: [] + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E/D" # Isochrone graph for R12 -99bd98e1803343ecfabe4b05d0218475c2b1bf74: +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: entry: - id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" - name: "" - dbdate: null + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" maxdate: 1000000120.0 - known: False path: "" children: - entry: - id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" - name: "G" - dbdate: null + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" maxdate: 1000000120.0 - known: False path: "G" children: - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "H" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" maxdate: 1000000090.0 known: True path: "G/H" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: 1000000090.0 - maxdate: 1000000090.0 - known: True - path: "G/H/D" - children: [] + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" # Isochrone graph for R13 -10287882c7ed1b7c96f43da269e6a868b98291ff: +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: entry: - id: "148f08e057416af1e471abb3dcd594d27233085d" - name: "" - dbdate: null + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" maxdate: 1000000130.0 - known: False path: "" children: - entry: - id: "8084b999790aab88e5119915ea1083e747a3f42f" - name: "G" - dbdate: null + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" maxdate: 1000000130.0 - known: False path: "G" children: - entry: - id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" - name: "D" + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" dbdate: 1000000090.0 maxdate: 1000000090.0 known: True path: "G/D" - children: [] - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "I" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" maxdate: 1000000090.0 known: True path: "G/I" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: 1000000090.0 - maxdate: 1000000090.0 - known: True - path: "G/I/D" - children: [] + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I/D" - entry: - id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" - name: "H" - dbdate: null + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" maxdate: 1000000090.0 known: True path: "G/H" children: - - entry: + - entry: id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" name: "D" - dbdate: 1000000090.0 - maxdate: 1000000090.0 - known: True - path: "G/H/D" - children: [] + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml new file mode 100644 index 0000000..b05f986 --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml @@ -0,0 +1,401 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "G/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E/D" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + maxdate: 1000000090.0 + known: True + path: "G/I" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml new file mode 100644 index 0000000..45e3a2e --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml @@ -0,0 +1,371 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml new file mode 100644 index 0000000..cee448f --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml @@ -0,0 +1,365 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" diff --git a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml new file mode 100644 index 0000000..182a3e0 --- /dev/null +++ b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml @@ -0,0 +1,188 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "1c533587277731236616cac0d44f3b46c1da0f8a" + graph: + entry: + id: "2afae58027276dad2bdced5a505e8d781a7add5b" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R03 +- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R04 +- rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" + graph: + entry: + id: "2afae58027276dad2bdced5a505e8d781a7add5b" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R05 +- rev: "1dfac0491892096948d6a02bf12a2fed4bf75743" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + known: True # TODO: analyse this, as it might be a source of issues! + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R06 +- rev: "53519b5a5e8cf12a4f81f82e489f95c1d04d5314" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000050.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000050.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py index 8af6a3a..53e3400 100644 --- a/swh/provenance/tests/test_isochrone_graph.py +++ b/swh/provenance/tests/test_isochrone_graph.py @@ -1,88 +1,88 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from copy import deepcopy from datetime import datetime, timezone + import pytest import yaml from swh.model.hashutil import hash_to_bytes from swh.provenance.model import DirectoryEntry, RevisionEntry from swh.provenance.provenance import IsochroneNode, build_isochrone_graph, revision_add from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data from swh.provenance.tests.test_provenance_db import ts2dt def isochrone_graph_from_dict(d, depth=0) -> IsochroneNode: """Takes a dictionary representing a tree of IsochroneNode objects, and recursively builds the corresponding graph.""" - d = d.copy() + d = deepcopy(d) d["entry"]["id"] = hash_to_bytes(d["entry"]["id"]) d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8") - if d["dbdate"] is not None: - d["dbdate"] = datetime.fromtimestamp(d["dbdate"], timezone.utc) + dbdate = d.get("dbdate", None) + if dbdate is not None: + dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc) - if d["maxdate"] is not None: - d["maxdate"] = datetime.fromtimestamp(d["maxdate"], timezone.utc) + children = d.get("children", []) node = IsochroneNode( entry=DirectoryEntry(**d["entry"]), - dbdate=d["dbdate"], + dbdate=dbdate, depth=depth, ) - node.maxdate = d["maxdate"] - node.known = d["known"] + node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc) + node.known = d.get("known", False) node.path = bytes(d["path"], encoding="utf-8") node.children = [ - isochrone_graph_from_dict(child, depth=depth + 1) for child in d["children"] + isochrone_graph_from_dict(child, depth=depth + 1) for child in children ] return node @pytest.mark.parametrize( "repo, lower, mindepth", ( ("cmdbts2", True, 1), - # ("cmdbts2", False, 1), - # ("cmdbts2", True, 2), - # ("cmdbts2", False, 2), - # ("out-of-order", True, 1), + ("cmdbts2", False, 1), + ("cmdbts2", True, 2), + ("cmdbts2", False, 2), + ("out-of-order", True, 1), ), ) def test_isochrone_graph(provenance, swh_storage, archive, repo, lower, mindepth): # read data/README.md for more details on how these datasets are generated data = load_repo_data(repo) fill_storage(swh_storage, data) revisions = {rev["id"]: rev for rev in data["revision"]} filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml" with open(get_datafile(filename)) as file: - expected = yaml.full_load(file) - - for rev, graph_as_dict in expected.items(): - revision = revisions[hash_to_bytes(rev)] + for expected in yaml.full_load(file): + revision = revisions[hash_to_bytes(expected["rev"])] entry = RevisionEntry( id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], ) - expected_graph = isochrone_graph_from_dict(graph_as_dict) + expected_graph = isochrone_graph_from_dict(expected["graph"]) print("Expected", expected_graph) # Create graph for current revision and check it has the expected structure. computed_graph = build_isochrone_graph( archive, provenance, entry, DirectoryEntry(entry.root), ) print("Computed", computed_graph) assert computed_graph == expected_graph # Add current revision so that provenance info is kept up to date for the # following ones. revision_add(provenance, archive, [entry], lower=lower, mindepth=mindepth)