diff --git a/swh/graph/luigi.py b/swh/graph/luigi.py --- a/swh/graph/luigi.py +++ b/swh/graph/luigi.py @@ -641,6 +641,12 @@ header = next(csv_reader) assert header == ["origin_SWHID", "person_id"], header for (origin_swhid, person_id) in csv_reader: + if person_id == "null": + # FIXME: workaround for a bug in contribution graphs generated + # before 2022-12-01. Those were only used in tests and never + # published, so the conditional can be removed when this is + # productionized + continue (name, escaped_name) = person_id_to_names[int(person_id)] base64_name = base64.b64encode(name).decode("ascii") csv_writer.writerow((origin_swhid, base64_name, escaped_name)) diff --git a/swh/graph/tests/dataset/compressed/example-labelled.labelobl b/swh/graph/tests/dataset/compressed/example-labelled.labelobl new file mode 100644 index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ª¿.õ¤kºíè9Ñt \ No newline at end of file diff --git a/swh/graph/tests/dataset/compressed/example.indegree b/swh/graph/tests/dataset/compressed/example.indegree --- a/swh/graph/tests/dataset/compressed/example.indegree +++ b/swh/graph/tests/dataset/compressed/example.indegree @@ -1,4 +1,5 @@ -2 +3 16 -2 +4 +0 1 diff --git a/swh/graph/tests/dataset/compressed/example.labels.count.txt b/swh/graph/tests/dataset/compressed/example.labels.count.txt --- a/swh/graph/tests/dataset/compressed/example.labels.count.txt +++ b/swh/graph/tests/dataset/compressed/example.labels.count.txt @@ -1 +1 @@ -8 +9 diff --git a/swh/graph/tests/dataset/compressed/example.labels.csv.zst b/swh/graph/tests/dataset/compressed/example.labels.csv.zst index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ 09; 19 -> 18; + 21 -> 18; } subgraph cluster_snp { label="Snapshots"; node [shape=doubleoctagon]; 20 [label="snp:0x20"]; + 22 [label="snp:0x22"]; 20 -> 09; 20 -> 10; + + 22 -> 09; + 22 -> 10; + 22 -> 21; } subgraph cluster_ori { label="Origins"; node [shape=egg]; - 21 [label="ori:0x21"]; + ori1 [label="ori:8340"]; + ori2 [label="ori:8f50"]; - 21 -> 20; + ori1 -> 20; + ori2 -> 22; } } diff --git a/swh/graph/tests/dataset/orc/content/content-all.orc b/swh/graph/tests/dataset/orc/content/content-all.orc index 0000000000000000000000000000000000000000..0000000000000000000000000000000000000000 GIT binary patch literal 0 Hc$@ swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe +swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe +swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5uZXQ+,Jane Doe +swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe """ # noqa diff --git a/swh/graph/tests/test_toposort.py b/swh/graph/tests/test_toposort.py --- a/swh/graph/tests/test_toposort.py +++ b/swh/graph/tests/test_toposort.py @@ -11,16 +11,22 @@ DATA_DIR = Path(__file__).parents[0] / "dataset" +# FIXME: the order of sample ancestors should not be hardcoded +# FIXME: swh:1:snp:0000000000000000000000000000000000000022,3,1,swh has three possible +# sample ancestors; they should not be hardecoded here EXPECTED = """\ SWHID,ancestors,successors,sample_ancestor1,sample_ancestor2 swh:1:rev:0000000000000000000000000000000000000003,0,1,, -swh:1:rev:0000000000000000000000000000000000000009,1,3,swh:1:rev:0000000000000000000000000000000000000003, -swh:1:rel:0000000000000000000000000000000000000010,1,1,swh:1:rev:0000000000000000000000000000000000000009, +swh:1:rev:0000000000000000000000000000000000000009,1,4,swh:1:rev:0000000000000000000000000000000000000003, +swh:1:rel:0000000000000000000000000000000000000010,1,2,swh:1:rev:0000000000000000000000000000000000000009, swh:1:snp:0000000000000000000000000000000000000020,2,1,swh:1:rev:0000000000000000000000000000000000000009,swh:1:rel:0000000000000000000000000000000000000010 swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,1,0,swh:1:snp:0000000000000000000000000000000000000020, swh:1:rev:0000000000000000000000000000000000000013,1,1,swh:1:rev:0000000000000000000000000000000000000009, -swh:1:rev:0000000000000000000000000000000000000018,1,1,swh:1:rev:0000000000000000000000000000000000000013, +swh:1:rev:0000000000000000000000000000000000000018,1,2,swh:1:rev:0000000000000000000000000000000000000013, swh:1:rel:0000000000000000000000000000000000000019,1,0,swh:1:rev:0000000000000000000000000000000000000018, +swh:1:rel:0000000000000000000000000000000000000021,1,1,swh:1:rev:0000000000000000000000000000000000000018, +swh:1:snp:0000000000000000000000000000000000000022,3,1,swh:1:rev:0000000000000000000000000000000000000009,swh:1:rel:0000000000000000000000000000000000000010 +swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,1,0,swh:1:snp:0000000000000000000000000000000000000022, """ @@ -50,10 +56,12 @@ assert rows.pop() == "", "Missing trailing newline" - # The only two possible last lines + # The only three possible last lines assert rows[-1] in [ "swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,1,0" ",swh:1:snp:0000000000000000000000000000000000000020,", + "swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,1,0" + ",swh:1:snp:0000000000000000000000000000000000000022,", "swh:1:rel:0000000000000000000000000000000000000019,1,0" ",swh:1:rev:0000000000000000000000000000000000000018,", ]