diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ListOriginContributors.java b/java/src/main/java/org/softwareheritage/graph/utils/ListOriginContributors.java --- a/java/src/main/java/org/softwareheritage/graph/utils/ListOriginContributors.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ListOriginContributors.java @@ -60,7 +60,7 @@ */ HashMap pendingSuccessors = new HashMap<>(); - System.out.println("origin_SWHID,person_id"); + System.out.println("origin_id,person_id"); while (stdin.hasNextLine()) { String cells[] = stdin.nextLine().strip().split(",", -1); SWHID nodeSWHID = new SWHID(cells[0]); @@ -135,7 +135,7 @@ if (nodeSWHID.getType() == SwhType.ORI) { nodeContributors.forEach((contributorId) -> { - System.out.format("%s,%d\n", nodeSWHID.toString(), contributorId); + System.out.format("%d,%d\n", nodeId, contributorId); }); } diff --git a/swh/graph/luigi/origin_contributors.py b/swh/graph/luigi/origin_contributors.py --- a/swh/graph/luigi/origin_contributors.py +++ b/swh/graph/luigi/origin_contributors.py @@ -177,15 +177,15 @@ with pyzstd.open(tmp_output_path, "wt") as output_fd: csv_writer = csv.writer(output_fd, lineterminator="\n") # write header - csv_writer.writerow(("origin_SWHID", "person_base64", "person_escaped")) + csv_writer.writerow(("origin_id", "person_base64", "person_escaped")) # Open input for reads as CSV with pyzstd.open(self.origin_contributors_path, "rt") as input_fd: # TODO: remove that cast once we dropped Python 3.7 support csv_reader = csv.reader(cast(Iterable[str], input_fd)) header = next(csv_reader) - assert header == ["origin_SWHID", "person_id"], header - for (origin_swhid, person_id) in csv_reader: + assert header == ["origin_id", "person_id"], header + for (origin_id, person_id) in csv_reader: if person_id == "null": # FIXME: workaround for a bug in contribution graphs generated # before 2022-12-01. Those were only used in tests and never @@ -194,6 +194,6 @@ continue (name, escaped_name) = person_id_to_names[int(person_id)] base64_name = base64.b64encode(name).decode("ascii") - csv_writer.writerow((origin_swhid, base64_name, escaped_name)) + csv_writer.writerow((origin_id, base64_name, escaped_name)) tmp_output_path.replace(self.deanonymized_origin_contributors_path) diff --git a/swh/graph/tests/test_origin_contributors.py b/swh/graph/tests/test_origin_contributors.py --- a/swh/graph/tests/test_origin_contributors.py +++ b/swh/graph/tests/test_origin_contributors.py @@ -29,12 +29,12 @@ # FIXME: do not hardcode ids here; they should be dynamically loaded # from the test graph ORIGIN_CONTRIBUTORS = """\ -origin_SWHID,person_id -swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,0 -swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,2 -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,0 -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,1 -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,2 +origin_id,person_id +2,0 +2,2 +0,0 +0,1 +0,2 """ DEANONYMIZATION_TABLE = """\ @@ -51,12 +51,12 @@ """ DEANONYMIZED_ORIGIN_CONTRIBUTORS = """\ -origin_SWHID,person_base64,person_escaped -swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe -swh:1:ori:83404f995118bd25774f4ac14422a8f175e7a054,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5uZXQ+,Jane Doe -swh:1:ori:8f50d3f60eae370ddbf85c86219c55108a350165,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe +origin_id,person_base64,person_escaped +2,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe +2,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe +0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe +0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5uZXQ+,Jane Doe +0,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe """ # noqa