Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py
index cc2a92d..4e77eb6 100644
--- a/swh/provenance/graph.py
+++ b/swh/provenance/graph.py
@@ -1,275 +1,257 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from __future__ import annotations
from datetime import datetime, timezone
import os
from typing import Any, Dict, Optional, Set
from swh.core.statsd import statsd
from swh.model.hashutil import hash_to_hex
from swh.model.model import Sha1Git
from .archive import ArchiveInterface
from .interface import ProvenanceInterface
from .model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
UTCMIN = datetime.min.replace(tzinfo=timezone.utc)
class HistoryNode:
def __init__(
self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False
) -> None:
self.entry = entry
# A revision is `is_head` if it is directly pointed by an origin (ie. a head
# revision for some snapshot)
self.is_head = is_head
# A revision is `in_history` if it appears in the history graph of an already
# processed revision in the provenance database
self.in_history = in_history
# XXX: the current simplified version of the origin-revision layer algorithm
# does not use this previous two flags at all. They are kept for now but might
# be removed in the future (hence, RevisionEntry might be used instead of
# HistoryNode).
def __str__(self) -> str:
return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>"
def as_dict(self) -> Dict[str, Any]:
return {
"rev": hash_to_hex(self.entry.id),
"is_head": self.is_head,
"in_history": self.in_history,
}
class HistoryGraph:
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
def __init__(
self,
provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
) -> None:
self._head = HistoryNode(
revision,
is_head=provenance.revision_visited(revision),
in_history=provenance.revision_in_history(revision),
)
self._graph: Dict[HistoryNode, Set[HistoryNode]] = {}
stack = [self._head]
while stack:
current = stack.pop()
if current not in self._graph:
self._graph[current] = set()
current.entry.retrieve_parents(archive)
for parent in current.entry.parents:
node = HistoryNode(
parent,
is_head=provenance.revision_visited(parent),
in_history=provenance.revision_in_history(parent),
)
self._graph[current].add(node)
stack.append(node)
@property
def head(self) -> HistoryNode:
return self._head
@property
def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]:
return self._graph
def __str__(self) -> str:
return f"<HistoryGraph: head={self._head}, graph={self._graph}"
def as_dict(self) -> Dict[str, Any]:
return {
"head": self.head.as_dict(),
"graph": {
hash_to_hex(node.entry.id): sorted(
[parent.as_dict() for parent in parents],
key=lambda d: d["rev"],
)
for node, parents in self._graph.items()
},
}
class IsochroneNode:
def __init__(
self,
entry: DirectoryEntry,
dbdate: Optional[datetime] = None,
depth: int = 0,
prefix: bytes = b"",
) -> None:
self.entry = entry
self.depth = depth
# dbdate is the maxdate for this node that comes from the DB
self._dbdate: Optional[datetime] = dbdate
# maxdate is set by the maxdate computation algorithm
self.maxdate: Optional[datetime] = None
- # known is True if this node is already known in the db; either because
- # the current directory actually exists in the database, or because all
- # the content of the current directory is known (subdirectories and files)
- self.known = self.dbdate is not None
self.invalid = False
self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name
self.children: Set[IsochroneNode] = set()
@property
def dbdate(self) -> Optional[datetime]:
# use a property to make this attribute (mostly) read-only
return self._dbdate
def invalidate(self) -> None:
statsd.increment(
metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"}
)
self._dbdate = None
self.maxdate = None
- self.known = False
self.invalid = True
def add_directory(
self, child: DirectoryEntry, date: Optional[datetime] = None
) -> IsochroneNode:
# we should not be processing this node (ie add subdirectories or files) if it's
# actually known by the provenance DB
assert self.dbdate is None
node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path)
self.children.add(node)
return node
def __str__(self) -> str:
return (
- f"<{self.entry}: depth={self.depth}, "
- f"dbdate={self.dbdate}, maxdate={self.maxdate}, "
- f"known={self.known}, invalid={self.invalid}, path={self.path!r}, "
+ f"<{self.entry}: depth={self.depth}, dbdate={self.dbdate}, "
+ f"maxdate={self.maxdate}, invalid={self.invalid}, path={self.path!r}, "
f"children=[{', '.join(str(child) for child in self.children)}]>"
)
def __eq__(self, other: Any) -> bool:
return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__
def __hash__(self) -> int:
# only immutable attributes are considered to compute hash
return hash((self.entry, self.depth, self.path))
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_isochrone_graph"})
def build_isochrone_graph(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
directory: DirectoryEntry,
minsize: int = 0,
) -> IsochroneNode:
assert revision.date is not None
assert revision.root == directory.id
# this function process a revision in 2 steps:
#
# 1. build the tree structure of IsochroneNode objects (one INode per
# directory under the root directory of the revision but not following
# known subdirectories), and gather the dates from the DB for already
# known objects; for files, just keep all the dates in a global 'fdates'
# dict; note that in this step, we will only recurse the directories
# that are not already known.
#
# 2. compute the maxdate for each node of the tree that was not found in the DB.
# Build the nodes structure
root_date = provenance.directory_get_date_in_isochrone_frontier(directory)
root = IsochroneNode(directory, dbdate=root_date)
stack = [root]
fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date}
while stack:
current = stack.pop()
if current.dbdate is None or current.dbdate >= revision.date:
# If current directory has an associated date in the isochrone frontier that
# is greater or equal to the current revision's one, it should be ignored as
# the revision is being processed out of order.
if current.dbdate is not None and current.dbdate >= revision.date:
current.invalidate()
# Pre-query all known dates for directories in the current directory
# for the provenance object to have them cached and (potentially) improve
# performance.
current.entry.retrieve_children(archive, minsize=minsize)
ddates = provenance.directory_get_dates_in_isochrone_frontier(
current.entry.dirs
)
for dir in current.entry.dirs:
# Recursively analyse subdirectory nodes
node = current.add_directory(dir, date=ddates.get(dir.id, None))
stack.append(node)
fdates.update(provenance.content_get_early_dates(current.entry.files))
# Precalculate max known date for each node in the graph (only directory nodes are
# pushed to the stack).
stack = [root]
while stack:
current = stack.pop()
# Current directory node is known if it already has an assigned date (ie. it was
# already seen as an isochrone frontier).
- if current.known:
+ if current.dbdate is not None:
assert current.maxdate is None
current.maxdate = current.dbdate
else:
if any(x.maxdate is None for x in current.children):
# at least one child of current has no maxdate yet
# Current node needs to be analysed again after its children.
stack.append(current)
for child in current.children:
if child.maxdate is None:
# if child.maxdate is None, it must be processed
stack.append(child)
else:
# all the files and directories under current have a maxdate,
# we can infer the maxdate for current directory
assert current.maxdate is None
# if all content is already known, update current directory info.
- current.maxdate = max(
- [UTCMIN]
- + [
- child.maxdate
- for child in current.children
- if child.maxdate is not None # unnecessary, but needed for mypy
- ]
- + [
- fdates.get(file.id, revision.date)
- for file in current.entry.files
- ]
+ current.maxdate = min(
+ max(
+ [UTCMIN]
+ + [
+ child.maxdate
+ for child in current.children
+ if child.maxdate is not None # for mypy
+ ]
+ + [
+ fdates.get(file.id, revision.date)
+ for file in current.entry.files
+ ]
+ ),
+ revision.date,
)
- if current.maxdate <= revision.date:
- current.known = (
- # true if all subdirectories are known
- all(child.known for child in current.children)
- # true if all files are in fdates, i.e. if all files were known
- # *before building this isochrone graph node*
- # Note: the 'all()' is lazy: will stop iterating as soon as
- # possible
- and all((file.id in fdates) for file in current.entry.files)
- )
- else:
- # at least one content is being processed out-of-order, then current
- # node should be treated as unknown
- current.maxdate = revision.date
- current.known = False
return root
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
index 2b2f523..b3dd843 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml
@@ -1,401 +1,370 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "G/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E/D"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
maxdate: 1000000090.0
- known: True
path: "G/I"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
index b05f986..b370e6f 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml
@@ -1,401 +1,370 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "G/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E/D"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
maxdate: 1000000090.0
- known: True
path: "G/I"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
maxdate: 1000000090.0
- known: True
path: "G/H"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H/D"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
index 45e3a2e..3639820 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml
@@ -1,371 +1,344 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
index cee448f..8ed1c0f 100644
--- a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
+++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml
@@ -1,365 +1,339 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R03
- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d"
graph:
entry:
id: "cea28838ec1fb757e44b724fe1365d64c6a94e24"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "48007c961cc734d1f63886d0413a6dc605e3e2ea"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R04
- rev: "17ed10db0612c9b46ba340943cb6b48b25431419"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000020.0
- known: True
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000020.0
- known: True
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000020.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R05
- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6"
graph:
entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
path: "D"
# Isochrone graph for R06
- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c"
graph:
entry:
id: "c86d2f588234098642ef6f33ca662a6a9de865bc"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
maxdate: 1000000050.0
- known: True
path: "D/E"
children:
- entry:
id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e"
name: "D"
maxdate: 1000000050.0
- known: True
path: "D/E/D"
# Isochrone graph for R07
- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237"
graph:
entry:
id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a"
name: "E"
dbdate: 1000000050.0
maxdate: 1000000050.0
- known: True
path: "F/E"
# Isochrone graph for R08
- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06"
graph:
entry:
id: "79e219827e12f40e7146cc6834ee04b617a8073a"
name: ""
maxdate: 1000000050.0
- known: True
path: ""
children:
- entry:
id: "9a7b5762e20b11735b93a635cda451c75bd31270"
name: "F"
maxdate: 1000000050.0
- known: True
path: "F"
children:
- entry:
id: "81b84d8fd8ceebd47f51896d19ce1aa286629225"
name: "E"
maxdate: 1000000050.0
- known: True
path: "F/E"
children:
- entry:
id: "cb211f2d9dfee6c3968837a07960afd6ab09506c"
name: "D"
maxdate: 1000000050.0
- known: True
path: "F/E/D"
# Isochrone graph for R09
- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0"
graph:
entry:
id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0"
name: ""
maxdate: 1000000090.0
path: ""
children:
- entry:
id: "16cb311fc491b0b6dfade153191ee1c09d2152cf"
name: "F"
maxdate: 1000000090.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
path: "F/E/D"
# Isochrone graph for R10
- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f"
graph:
entry:
id: "8c61bb233c89936b310d8b269a35c24bff432227"
name: ""
maxdate: 1000000100.0
path: ""
children:
- entry:
id: "db2b00211f77c6c7f1f742020e483b506b82b5d6"
name: "F"
maxdate: 1000000100.0
path: "F"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
maxdate: 1000000090.0
- known: True
path: "F/E"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "F/E/D"
# Isochrone graph for R11
- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2"
graph:
entry:
id: "b29a1c3fee0057016af424c41d58a8811b8c3a41"
name: ""
maxdate: 1000000110.0
path: ""
children:
- entry:
id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1"
name: "G"
maxdate: 1000000110.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "E"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/E"
# Isochrone graph for R12
- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74"
graph:
entry:
id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5"
name: ""
maxdate: 1000000120.0
path: ""
children:
- entry:
id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f"
name: "G"
maxdate: 1000000120.0
path: "G"
children:
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
# Isochrone graph for R13
- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff"
graph:
entry:
id: "148f08e057416af1e471abb3dcd594d27233085d"
name: ""
maxdate: 1000000130.0
path: ""
children:
- entry:
id: "8084b999790aab88e5119915ea1083e747a3f42f"
name: "G"
maxdate: 1000000130.0
path: "G"
children:
- entry:
id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78"
name: "D"
maxdate: 1000000090.0
- known: True
path: "G/D"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "I"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/I"
- entry:
id: "8b4df27934ce48db6f4bdf326b3bce89d4571252"
name: "H"
dbdate: 1000000090.0
maxdate: 1000000090.0
- known: True
path: "G/H"
diff --git a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
index 147e560..a4aad93 100644
--- a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
+++ b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml
@@ -1,185 +1,174 @@
# Isochrone graph for R00
- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
graph:
entry:
id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1"
name: ""
maxdate: 1000000000.0
path: ""
children:
- entry:
id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b"
name: "A"
maxdate: 1000000000.0
path: "A"
children:
- entry:
id: "36876d475197b5ad86ad592e8e28818171455f16"
name: "B"
maxdate: 1000000000.0
path: "A/B"
children:
- entry:
id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3"
name: "C"
maxdate: 1000000000.0
path: "A/B/C"
# Isochrone graph for R01
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
path: "A/B/C"
# Isochrone graph for R02
- rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
graph:
entry:
id: "2afae58027276dad2bdced5a505e8d781a7add5b"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R03
- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000010.0
- known: True
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/B/C"
# Isochrone graph for R04
- rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
graph:
entry:
id: "2afae58027276dad2bdced5a505e8d781a7add5b"
name: ""
maxdate: 1000000010.0
- known: True
path: ""
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "A"
maxdate: 1000000010.0
- known: True
path: "A"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000010.0
maxdate: 1000000010.0
- known: True
path: "A/C"
# Isochrone graph for R05
- rev: "1dfac0491892096948d6a02bf12a2fed4bf75743"
graph:
entry:
id: "b3cf11b22c9f93c3c494cf90ab072f394155072d"
name: ""
maxdate: 1000000005.0
path: ""
children:
- entry:
id: "baca735bf8b8720131b4bfdb47c51631a9260348"
name: "A"
maxdate: 1000000005.0
path: "A"
children:
- entry:
id: "4b28979d88ed209a09c272bcc80f69d9b18339c2"
name: "B"
maxdate: 1000000005.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
maxdate: 1000000005.0
invalid: True
path: "A/B/C"
# Isochrone graph for R06
- rev: "53519b5a5e8cf12a4f81f82e489f95c1d04d5314"
graph:
entry:
id: "195601c98c28f04e0d19c218434738006990db72"
name: ""
maxdate: 1000000050.0
path: ""
children:
- entry:
id: "d591b308488541aabffd854eae85a9bf83a9d9f5"
name: "A"
maxdate: 1000000050.0
path: "A"
children:
- entry:
id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92"
name: "B"
maxdate: 1000000050.0
path: "A/B"
children:
- entry:
id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf"
name: "C"
dbdate: 1000000005.0
maxdate: 1000000005.0
- known: True
path: "A/B/C"
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
index 364a564..11d5881 100644
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -1,114 +1,113 @@
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from copy import deepcopy
from datetime import datetime, timezone
from typing import Any, Dict
import pytest
import yaml
from swh.model.hashutil import hash_to_bytes
from swh.provenance.archive import ArchiveInterface
from swh.provenance.graph import IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
load_repo_data,
ts2dt,
)
def isochrone_graph_from_dict(d: Dict[str, Any], depth: int = 0) -> IsochroneNode:
"""Takes a dictionary representing a tree of IsochroneNode objects, and
recursively builds the corresponding graph."""
d = deepcopy(d)
d["entry"]["id"] = hash_to_bytes(d["entry"]["id"])
d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8")
dbdate = d.get("dbdate", None)
if dbdate is not None:
dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc)
children = d.get("children", [])
node = IsochroneNode(
entry=DirectoryEntry(**d["entry"]),
dbdate=dbdate,
depth=depth,
)
node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc)
- node.known = d.get("known", False)
node.invalid = d.get("invalid", False)
node.path = bytes(d["path"], encoding="utf-8")
node.children = set(
isochrone_graph_from_dict(child, depth=depth + 1) for child in children
)
return node
@pytest.mark.parametrize(
"repo, lower, mindepth",
(
("cmdbts2", True, 1),
("cmdbts2", False, 1),
("cmdbts2", True, 2),
("cmdbts2", False, 2),
("out-of-order", True, 1),
),
)
@pytest.mark.parametrize("batch", (True, False))
def test_isochrone_graph(
provenance: ProvenanceInterface,
archive: ArchiveInterface,
repo: str,
lower: bool,
mindepth: int,
batch: bool,
) -> None:
# read data/README.md for more details on how these datasets are generated
data = load_repo_data(repo)
fill_storage(archive.storage, data)
revisions = {rev["id"]: rev for rev in data["revision"]}
filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml"
with open(get_datafile(filename)) as file:
for expected in yaml.full_load(file):
print("# Processing revision", expected["rev"])
revision = revisions[hash_to_bytes(expected["rev"])]
entry = RevisionEntry(
id=revision["id"],
date=ts2dt(revision["date"]),
root=revision["directory"],
)
expected_graph = isochrone_graph_from_dict(expected["graph"])
print("Expected graph:", expected_graph)
# Create graph for current revision and check it has the expected structure.
assert entry.root is not None
computed_graph = build_isochrone_graph(
provenance,
archive,
entry,
DirectoryEntry(entry.root),
)
print("Computed graph:", computed_graph)
assert computed_graph == expected_graph
# Add current revision so that provenance info is kept up to date for the
# following ones.
revision_add(
provenance,
archive,
[entry],
lower=lower,
mindepth=mindepth,
commit=not batch,
)

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:27 PM (6 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3251831

Event Timeline