diff --git a/swh/provenance/model.py b/swh/provenance/model.py --- a/swh/provenance/model.py +++ b/swh/provenance/model.py @@ -70,10 +70,13 @@ return (x for x in self._parents_entries) def __str__(self): - return ( - f"" - ) + return f"" + + def __eq__(self, other): + return isinstance(other, RevisionEntry) and self.id == other.id + + def __hash__(self): + return hash(self.id) class DirectoryEntry: diff --git a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml @@ -0,0 +1,55 @@ +# History graph for snapshot with branches: R01 +- origin: "https://with-merges" + snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" + graphs: + - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + parents: + - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" +# History graph for snapshot with branches: R03 and R06 +- origin: "https://with-merges" + snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" + graphs: + - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" + parents: + - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" + parents: + - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + visited: True + - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + parents: + - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" + visited: True +# History graph for snapshot with branches: R05 and R06 +- origin: "https://with-merges" + snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" + graphs: + - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c" + parents: + - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" + parents: + - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" + visited: True + - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + visited: True +# History graph for snapshot with branches: R06 and R07 +- origin: "https://with-merges" + snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" + graphs: + - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + visited: True + - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" + parents: + - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" + visited: True +# History graph for snapshot with branches: R08 +- origin: "https://with-merges" + snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" + graphs: + - rev: "7c8f29237dded4f9d265e46ec7066503e7858e87" + parents: + - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c" + visited: True + - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + visited: True + - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" + visited: True diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/test_history_graph.py @@ -0,0 +1,62 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest +import yaml + +from swh.model.hashutil import hash_to_bytes +from swh.provenance.graph import HistoryNode, build_history_graph +from swh.provenance.model import OriginEntry, RevisionEntry +from swh.provenance.origin import origin_add_revision +from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data + + +def history_graph_from_dict(d) -> HistoryNode: + """Takes a dictionary representing a tree of HistoryNode objects, and + recursively builds the corresponding graph.""" + node = HistoryNode( + entry=RevisionEntry(hash_to_bytes(d["rev"])), + visited=d.get("visited", False), + in_history=d.get("in_history", False), + ) + node.parents = set( + history_graph_from_dict(parent) for parent in d.get("parents", []) + ) + return node + + +@pytest.mark.parametrize( + "repo, visit", + (("with-merges", "visits-01"),), +) +@pytest.mark.parametrize("batch", (True, False)) +def test_history_graph(provenance, swh_storage, archive, repo, visit, batch): + # read data/README.md for more details on how these datasets are generated + data = load_repo_data(repo) + fill_storage(swh_storage, data) + + filename = f"history_graphs_{repo}_{visit}.yaml" + + with open(get_datafile(filename)) as file: + for expected in yaml.full_load(file): + entry = OriginEntry(expected["origin"], hash_to_bytes(expected["snapshot"])) + provenance.origin_add(entry) + + for graph_as_dict in expected["graphs"]: + expected_graph = history_graph_from_dict(graph_as_dict) + print("Expected graph:", expected_graph) + + computed_graph = build_history_graph( + archive, + provenance, + RevisionEntry(hash_to_bytes(graph_as_dict["rev"])), + ) + print("Computed graph:", computed_graph) + assert computed_graph == expected_graph + + origin_add_revision(provenance, entry, computed_graph) + + if not batch: + provenance.commit()