Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/luigi.py
Show First 20 Lines • Show All 635 Lines • ▼ Show 20 Lines | def run(self) -> None: | ||||
csv_writer.writerow(("origin_SWHID", "person_base64", "person_escaped")) | csv_writer.writerow(("origin_SWHID", "person_base64", "person_escaped")) | ||||
# Open input for reads as CSV | # Open input for reads as CSV | ||||
with pyzstd.open(self.origin_contributors_path, "rt") as input_fd: | with pyzstd.open(self.origin_contributors_path, "rt") as input_fd: | ||||
csv_reader = csv.reader(input_fd) | csv_reader = csv.reader(input_fd) | ||||
header = next(csv_reader) | header = next(csv_reader) | ||||
assert header == ["origin_SWHID", "person_id"], header | assert header == ["origin_SWHID", "person_id"], header | ||||
for (origin_swhid, person_id) in csv_reader: | for (origin_swhid, person_id) in csv_reader: | ||||
if person_id == "null": | |||||
# FIXME: workaround for a bug in contribution graphs generated | |||||
# before 2022-12-01. Those were only used in tests and never | |||||
# published, so the conditional can be removed when this is | |||||
# productionized | |||||
continue | |||||
(name, escaped_name) = person_id_to_names[int(person_id)] | (name, escaped_name) = person_id_to_names[int(person_id)] | ||||
base64_name = base64.b64encode(name).decode("ascii") | base64_name = base64.b64encode(name).decode("ascii") | ||||
csv_writer.writerow((origin_swhid, base64_name, escaped_name)) | csv_writer.writerow((origin_swhid, base64_name, escaped_name)) | ||||
tmp_output_path.replace(self.deanonymized_origin_contributors_path) | tmp_output_path.replace(self.deanonymized_origin_contributors_path) |