diff --git a/swh/provenance/model.py b/swh/provenance/model.py --- a/swh/provenance/model.py +++ b/swh/provenance/model.py @@ -141,6 +141,15 @@ def __str__(self): return f"" + def __eq__(self, other): + return isinstance(other, DirectoryEntry) and (self.id, self.name) == ( + other.id, + other.name, + ) + + def __hash__(self): + return hash((self.id, self.name)) + class FileEntry: def __init__(self, id: bytes, name: bytes): @@ -149,3 +158,12 @@ def __str__(self): return f"" + + def __eq__(self, other): + return isinstance(other, FileEntry) and (self.id, self.name) == ( + other.id, + other.name, + ) + + def __hash__(self): + return hash((self.id, self.name)) diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -1,3 +1,4 @@ +from collections import Counter from datetime import datetime, timezone import logging import os @@ -288,12 +289,6 @@ self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name self.children: List[IsochroneNode] = [] - def __str__(self): - return ( - f"<{self.entry.__class__.__name__}[{self.entry.name}]: " - f"known={self.known}, maxdate={self.maxdate}, dbdate={self.dbdate}>" - ) - @property def dbdate(self): # use a property to make this attribute (mostly) read-only @@ -314,6 +309,41 @@ self.children.append(node) return node + def __str__(self): + return ( + f"<{self.entry}: " + f"known={self.known}, maxdate={self.maxdate}, " + f"dbdate={self.dbdate}, path={self.path}, " + f"children=[{', '.join(str(child) for child in self.children)}]>" + ) + + def __eq__(self, other): + return ( + isinstance(other, IsochroneNode) + and ( + self.entry, + self.depth, + self._dbdate, + self.maxdate, + self.known, + self.path, + ) + == ( + other.entry, + other.depth, + other._dbdate, + other.maxdate, + other.known, + other.path, + ) + and Counter(self.children) == Counter(other.children) + ) + + def __hash__(self): + return hash( + (self.entry, self.depth, self._dbdate, self.maxdate, self.known, self.path) + ) + def build_isochrone_graph( archive: ArchiveInterface, diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_1.yaml @@ -0,0 +1,401 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "G/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E/D" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + maxdate: 1000000090.0 + known: True + path: "G/I" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_lower_2.yaml @@ -0,0 +1,401 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "G/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E/D" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + maxdate: 1000000090.0 + known: True + path: "G/I" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + maxdate: 1000000090.0 + known: True + path: "G/H" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H/D" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_1.yaml @@ -0,0 +1,371 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" diff --git a/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/graphs_cmdbts2_upper_2.yaml @@ -0,0 +1,365 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "0d45f1ee524db8f6f0b5a267afac4e733b4b2cee" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R03 +- rev: "540bd6155a3c50cc47b2e6f43aeaace67a696d1d" + graph: + entry: + id: "cea28838ec1fb757e44b724fe1365d64c6a94e24" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "48007c961cc734d1f63886d0413a6dc605e3e2ea" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R04 +- rev: "17ed10db0612c9b46ba340943cb6b48b25431419" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000020.0 + known: True + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000020.0 + known: True + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000020.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R05 +- rev: "c8bef45193355db33d64f375b4a4e4f23ac2a4f6" + graph: + entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + path: "D" +# Isochrone graph for R06 +- rev: "f5c16cb16dc29d9e5b25bd3d4d1e252ac7d5493c" + graph: + entry: + id: "c86d2f588234098642ef6f33ca662a6a9de865bc" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "8a3993f4efa9385ce993775cab5ec4dc2c78d7f6" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + maxdate: 1000000050.0 + known: True + path: "D/E" + children: + - entry: + id: "12f1bc8ca9678ecc055bc65efd7fb4dd1f13457e" + name: "D" + maxdate: 1000000050.0 + known: True + path: "D/E/D" +# Isochrone graph for R07 +- rev: "91ed6a03c80b61e0d63d328f7a4325230e7a0237" + graph: + entry: + id: "641baf6738fa5ebb3c5eb39af45f62ff52f8cc62" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "b0ae56ed5ca7daa34fd7a91a28db443ab3c389a0" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "fa63f03d67d1a15563afe9f8ba97832dfb20f42a" + name: "E" + dbdate: 1000000050.0 + maxdate: 1000000050.0 + known: True + path: "F/E" +# Isochrone graph for R08 +- rev: "a97e5c8a626510eefaa637091924cf800b1e8b06" + graph: + entry: + id: "79e219827e12f40e7146cc6834ee04b617a8073a" + name: "" + maxdate: 1000000050.0 + known: True + path: "" + children: + - entry: + id: "9a7b5762e20b11735b93a635cda451c75bd31270" + name: "F" + maxdate: 1000000050.0 + known: True + path: "F" + children: + - entry: + id: "81b84d8fd8ceebd47f51896d19ce1aa286629225" + name: "E" + maxdate: 1000000050.0 + known: True + path: "F/E" + children: + - entry: + id: "cb211f2d9dfee6c3968837a07960afd6ab09506c" + name: "D" + maxdate: 1000000050.0 + known: True + path: "F/E/D" +# Isochrone graph for R09 +- rev: "3c5ad6be812b182ee2a01e84884b8ab7d384a4a0" + graph: + entry: + id: "53a71b331248f2144f4f012fd7e05f86b8ee62a0" + name: "" + maxdate: 1000000090.0 + path: "" + children: + - entry: + id: "16cb311fc491b0b6dfade153191ee1c09d2152cf" + name: "F" + maxdate: 1000000090.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + path: "F/E/D" +# Isochrone graph for R10 +- rev: "b7c52e28d441ca0cb736fdbe49e39eae3847ad0f" + graph: + entry: + id: "8c61bb233c89936b310d8b269a35c24bff432227" + name: "" + maxdate: 1000000100.0 + path: "" + children: + - entry: + id: "db2b00211f77c6c7f1f742020e483b506b82b5d6" + name: "F" + maxdate: 1000000100.0 + path: "F" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + maxdate: 1000000090.0 + known: True + path: "F/E" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "F/E/D" +# Isochrone graph for R11 +- rev: "f4b2d6d273a6f0d9f2b1299c668b7b7ea095a6a2" + graph: + entry: + id: "b29a1c3fee0057016af424c41d58a8811b8c3a41" + name: "" + maxdate: 1000000110.0 + path: "" + children: + - entry: + id: "74fb9789d162f02deabbdfbc3c8daa97f31559a1" + name: "G" + maxdate: 1000000110.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "E" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/E" +# Isochrone graph for R12 +- rev: "99bd98e1803343ecfabe4b05d0218475c2b1bf74" + graph: + entry: + id: "6b2d11dd7bc6c7d7dcf59afed80f57413d929cf5" + name: "" + maxdate: 1000000120.0 + path: "" + children: + - entry: + id: "5aa1d185e7e32bb53a16ba0db1b06d3a6243b36f" + name: "G" + maxdate: 1000000120.0 + path: "G" + children: + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" +# Isochrone graph for R13 +- rev: "10287882c7ed1b7c96f43da269e6a868b98291ff" + graph: + entry: + id: "148f08e057416af1e471abb3dcd594d27233085d" + name: "" + maxdate: 1000000130.0 + path: "" + children: + - entry: + id: "8084b999790aab88e5119915ea1083e747a3f42f" + name: "G" + maxdate: 1000000130.0 + path: "G" + children: + - entry: + id: "2cb3ae467165716d1d0e7fa85190d753c3b76d78" + name: "D" + maxdate: 1000000090.0 + known: True + path: "G/D" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "I" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/I" + - entry: + id: "8b4df27934ce48db6f4bdf326b3bce89d4571252" + name: "H" + dbdate: 1000000090.0 + maxdate: 1000000090.0 + known: True + path: "G/H" diff --git a/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/graphs_out-of-order_lower_1.yaml @@ -0,0 +1,188 @@ +# Isochrone graph for R00 +- rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" + graph: + entry: + id: "a4cb5e6b2831f7e8eef0e6e08e43d642c97303a1" + name: "" + maxdate: 1000000000.0 + path: "" + children: + - entry: + id: "1c8d9fd9afa7e5a2cf52a3db6f05dc5c3a1ca86b" + name: "A" + maxdate: 1000000000.0 + path: "A" + children: + - entry: + id: "36876d475197b5ad86ad592e8e28818171455f16" + name: "B" + maxdate: 1000000000.0 + path: "A/B" + children: + - entry: + id: "98f7a4a23d8df1fb1a5055facae2aff9b2d0a8b3" + name: "C" + maxdate: 1000000000.0 + path: "A/B/C" +# Isochrone graph for R01 +- rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + path: "A/B/C" +# Isochrone graph for R02 +- rev: "1c533587277731236616cac0d44f3b46c1da0f8a" + graph: + entry: + id: "2afae58027276dad2bdced5a505e8d781a7add5b" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R03 +- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R04 +- rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" + graph: + entry: + id: "2afae58027276dad2bdced5a505e8d781a7add5b" + name: "" + maxdate: 1000000010.0 + known: True + path: "" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/C" +# Isochrone graph for R05 +- rev: "1dfac0491892096948d6a02bf12a2fed4bf75743" + graph: + entry: + id: "b3cf11b22c9f93c3c494cf90ab072f394155072d" + name: "" + maxdate: 1000000010.0 + known: True # TODO: analyse this, as it might be a source of issues! + path: "" + children: + - entry: + id: "baca735bf8b8720131b4bfdb47c51631a9260348" + name: "A" + maxdate: 1000000010.0 + known: True + path: "A" + children: + - entry: + id: "4b28979d88ed209a09c272bcc80f69d9b18339c2" + name: "B" + maxdate: 1000000010.0 + known: True + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + maxdate: 1000000010.0 + known: True + path: "A/B/C" +# Isochrone graph for R06 +- rev: "53519b5a5e8cf12a4f81f82e489f95c1d04d5314" + graph: + entry: + id: "195601c98c28f04e0d19c218434738006990db72" + name: "" + maxdate: 1000000050.0 + path: "" + children: + - entry: + id: "d591b308488541aabffd854eae85a9bf83a9d9f5" + name: "A" + maxdate: 1000000050.0 + path: "A" + children: + - entry: + id: "0e540a8ebea2f5de3e62b92e2139902cf6f46e92" + name: "B" + maxdate: 1000000050.0 + path: "A/B" + children: + - entry: + id: "c9cabe7f49012e3fdef6ac6b929efb5654f583cf" + name: "C" + dbdate: 1000000010.0 + maxdate: 1000000010.0 + known: True + path: "A/B/C" diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/test_isochrone_graph.py @@ -0,0 +1,88 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from copy import deepcopy +from datetime import datetime, timezone + +import pytest +import yaml + +from swh.model.hashutil import hash_to_bytes +from swh.provenance.model import DirectoryEntry, RevisionEntry +from swh.provenance.provenance import IsochroneNode, build_isochrone_graph, revision_add +from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data +from swh.provenance.tests.test_provenance_db import ts2dt + + +def isochrone_graph_from_dict(d, depth=0) -> IsochroneNode: + """Takes a dictionary representing a tree of IsochroneNode objects, and + recursively builds the corresponding graph.""" + d = deepcopy(d) + + d["entry"]["id"] = hash_to_bytes(d["entry"]["id"]) + d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8") + + dbdate = d.get("dbdate", None) + if dbdate is not None: + dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc) + + children = d.get("children", []) + + node = IsochroneNode( + entry=DirectoryEntry(**d["entry"]), + dbdate=dbdate, + depth=depth, + ) + node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc) + node.known = d.get("known", False) + node.path = bytes(d["path"], encoding="utf-8") + node.children = [ + isochrone_graph_from_dict(child, depth=depth + 1) for child in children + ] + return node + + +@pytest.mark.parametrize( + "repo, lower, mindepth", + ( + ("cmdbts2", True, 1), + ("cmdbts2", False, 1), + ("cmdbts2", True, 2), + ("cmdbts2", False, 2), + ("out-of-order", True, 1), + ), +) +def test_isochrone_graph(provenance, swh_storage, archive, repo, lower, mindepth): + # read data/README.md for more details on how these datasets are generated + data = load_repo_data(repo) + fill_storage(swh_storage, data) + + revisions = {rev["id"]: rev for rev in data["revision"]} + filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml" + + with open(get_datafile(filename)) as file: + for expected in yaml.full_load(file): + revision = revisions[hash_to_bytes(expected["rev"])] + entry = RevisionEntry( + id=revision["id"], + date=ts2dt(revision["date"]), + root=revision["directory"], + ) + expected_graph = isochrone_graph_from_dict(expected["graph"]) + print("Expected", expected_graph) + + # Create graph for current revision and check it has the expected structure. + computed_graph = build_isochrone_graph( + archive, + provenance, + entry, + DirectoryEntry(entry.root), + ) + print("Computed", computed_graph) + assert computed_graph == expected_graph + + # Add current revision so that provenance info is kept up to date for the + # following ones. + revision_add(provenance, archive, [entry], lower=lower, mindepth=mindepth)