diff --git a/swh/dataset/graph.py b/swh/dataset/graph.py --- a/swh/dataset/graph.py +++ b/swh/dataset/graph.py @@ -46,13 +46,13 @@ messages = {k: fix_objects(k, v) for k, v in messages.items()} - for visit in messages.get("origin_visit", []): - origin_id = origin_identifier({"url": visit["origin"]}) - visit_id = visit["visit"] + for visit_status in messages.get("origin_visit_status", []): + origin_id = origin_identifier({"url": visit_status["origin"]}) + visit_id = visit_status["visit"] if not node_set.add("{}:{}".format(origin_id, visit_id).encode()): continue write_node(("origin", origin_id)) - write_edge(("origin", origin_id), ("snapshot", visit["snapshot"])) + write_edge(("origin", origin_id), ("snapshot", visit_status["snapshot"])) for snapshot in messages.get("snapshot", []): if not node_set.add(snapshot["id"]): @@ -150,7 +150,7 @@ def export_edges(config, export_path, export_id, processes): """Run the edge exporter for each edge type.""" object_types = [ - "origin_visit", + "origin_visit_status", "snapshot", "release", "revision", diff --git a/swh/dataset/test/test_graph.py b/swh/dataset/test/test_graph.py --- a/swh/dataset/test/test_graph.py +++ b/swh/dataset/test/test_graph.py @@ -58,14 +58,13 @@ TEST_ORIGIN = {"url": "https://somewhere.org/den/fox"} TEST_ORIGIN_2 = {"url": "https://somewhere.org/den/fox/2"} -TEST_ORIGIN_VISIT = { +TEST_ORIGIN_VISIT_STATUS = { "origin": TEST_ORIGIN["url"], "visit": 1, "date": "2013-05-07 04:20:39.369271+00:00", "snapshot": None, # TODO "status": "ongoing", # TODO "metadata": {"foo": "bar"}, - "type": "git", } @@ -112,18 +111,18 @@ return hashlib.sha1(s.encode()).hexdigest() -def test_export_origin_visits(exporter): +def test_export_origin_visit_status(exporter): node_writer, edge_writer = exporter( { - "origin_visit": [ + "origin_visit_status": [ { - **TEST_ORIGIN_VISIT, - "origin": {"url": "ori1"}, + **TEST_ORIGIN_VISIT_STATUS, + "origin": "ori1", "snapshot": binhash("snp1"), }, { - **TEST_ORIGIN_VISIT, - "origin": {"url": "ori2"}, + **TEST_ORIGIN_VISIT_STATUS, + "origin": "ori2", "snapshot": binhash("snp2"), }, ] @@ -396,14 +395,14 @@ def test_export_duplicate_visit(exporter): node_writer, edge_writer = exporter( { - "origin_visit": [ - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 1}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 1}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 1}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 1}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 2}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 2}, - {**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 2}, + "origin_visit_status": [ + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori1", "visit": 1}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori2", "visit": 1}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori1", "visit": 1}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori2", "visit": 1}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori1", "visit": 2}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori2", "visit": 2}, + {**TEST_ORIGIN_VISIT_STATUS, "origin": "ori2", "visit": 2}, ], }, ) @@ -429,7 +428,7 @@ def test_sort_pipeline(tmp_path): short_type_mapping = { - "origin_visit": "ori", + "origin_visit_status": "ori", "snapshot": "snp", "release": "rel", "revision": "rev",