diff --git a/swh/graph/luigi/origin_contributors.py b/swh/graph/luigi/origin_contributors.py --- a/swh/graph/luigi/origin_contributors.py +++ b/swh/graph/luigi/origin_contributors.py @@ -82,7 +82,15 @@ run_script( f""" - psql '{self.storage_dsn}' -c "COPY (select encode(digest(fullname, 'sha256'), 'base64') as sha256_base64, encode(fullname, 'base64') as base64, encode(fullname, 'escape') as escaped from person) TO STDOUT CSV HEADER" | zstdmt -19 + psql '{self.storage_dsn}' -c "\ + COPY ( + SELECT + encode(digest(fullname, 'sha256'), 'base64') as sha256_base64, \ + encode(fullname, 'base64') as base64, \ + encode(fullname, 'escape') as escaped \ + FROM person \ + ) TO STDOUT CSV HEADER \ + " | zstdmt -19 """, # noqa self.deanonymization_table_path, ) @@ -95,7 +103,7 @@ This assumes that :file:`graph.persons.csv.zst` is anonymized (SHA256 of names instead of names); which may not be true depending on how the swh-dataset export - cas configured. + was configured. """ local_graph_path = luigi.PathParameter() diff --git a/swh/graph/luigi/utils.py b/swh/graph/luigi/utils.py --- a/swh/graph/luigi/utils.py +++ b/swh/graph/luigi/utils.py @@ -26,9 +26,10 @@ tmp_output_path = Path(f"{output_path}.tmp") - subprocess.run( - ["bash", "-c", f"{script.strip()} > {tmp_output_path}"], env=env, check=True - ) + with tmp_output_path.open("wb") as tmp_output: + subprocess.run( + ["bash", "-c", f"{script.strip()}"], stdout=tmp_output, env=env, check=True + ) # Atomically write the output file tmp_output_path.replace(output_path)