Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/tests/test_origin_contributors.py
Show All 24 Lines | |||||
from .test_toposort import EXPECTED as TOPOLOGICAL_ORDER | from .test_toposort import EXPECTED as TOPOLOGICAL_ORDER | ||||
DATA_DIR = Path(__file__).parents[0] / "dataset" | DATA_DIR = Path(__file__).parents[0] / "dataset" | ||||
# FIXME: do not hardcode ids here; they should be dynamically loaded | # FIXME: do not hardcode ids here; they should be dynamically loaded | ||||
# from the test graph | # from the test graph | ||||
ORIGIN_CONTRIBUTORS = """\ | ORIGIN_CONTRIBUTORS = """\ | ||||
origin_id,person_id | origin_id,contributor_id | ||||
2,0 | 2,0 | ||||
2,2 | 2,2 | ||||
0,0 | 0,0 | ||||
0,1 | 0,1 | ||||
0,2 | 0,2 | ||||
""" | """ | ||||
assert ( | assert ( | ||||
Show All 20 Lines | |||||
PERSONS = """\ | PERSONS = """\ | ||||
aZA9TeLhVzqVDQHQOd53UABAZYyek0tY3vTo6VSlA4U= | aZA9TeLhVzqVDQHQOd53UABAZYyek0tY3vTo6VSlA4U= | ||||
UaCrgAZBvn1LBd2sAinmdNvAX/G4sjo1aJA9GDd9UUs= | UaCrgAZBvn1LBd2sAinmdNvAX/G4sjo1aJA9GDd9UUs= | ||||
8qhF7WQ2bmeoRbZipAaqtNw6QdOCDcpggLWCQLzITsI= | 8qhF7WQ2bmeoRbZipAaqtNw6QdOCDcpggLWCQLzITsI= | ||||
""" | """ | ||||
DEANONYMIZED_ORIGIN_CONTRIBUTORS = """\ | DEANONYMIZED_ORIGIN_CONTRIBUTORS = """\ | ||||
origin_id,person_base64,person_escaped | origin_id,contributor_base64,contributor_escaped | ||||
2,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe <jdoe@example.com> | 2,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe <jdoe@example.com> | ||||
2,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe <jdoe@example.org> | 2,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe <jdoe@example.org> | ||||
0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe <jdoe@example.com> | 0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5jb20+,Jane Doe <jdoe@example.com> | ||||
0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5uZXQ+,Jane Doe <jdoe@example.net> | 0,SmFuZSBEb2UgPGpkb2VAZXhhbXBsZS5uZXQ+,Jane Doe <jdoe@example.net> | ||||
0,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe <jdoe@example.org> | 0,Sm9obiBEb2UgPGpkb2VAZXhhbXBsZS5vcmc+,John Doe <jdoe@example.org> | ||||
""" # noqa | """ # noqa | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | swh_storage.revision_add( | ||||
committer_date=tstz, | committer_date=tstz, | ||||
directory=b"\x00" * 20, | directory=b"\x00" * 20, | ||||
type=RevisionType.GIT, | type=RevisionType.GIT, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) | ||||
] | ] | ||||
) | ) | ||||
deanonymization_table_path = tmpdir / "person_sha256_to_names.csv.zst" | deanonymization_table_path = tmpdir / "contributor_sha256_to_names.csv.zst" | ||||
task = ExportDeanonymizationTable( | task = ExportDeanonymizationTable( | ||||
storage_dsn=swh_storage_postgresql.dsn, | storage_dsn=swh_storage_postgresql.dsn, | ||||
deanonymization_table_path=deanonymization_table_path, | deanonymization_table_path=deanonymization_table_path, | ||||
) | ) | ||||
task.run() | task.run() | ||||
Show All 10 Lines | def test_export_deanonymization_table(tmpdir, swh_storage_postgresql, swh_storage): | ||||
assert set(rows) == set(expected_rows) | assert set(rows) == set(expected_rows) | ||||
def test_deanonymize_origin_contributors(tmpdir): | def test_deanonymize_origin_contributors(tmpdir): | ||||
tmpdir = Path(tmpdir) | tmpdir = Path(tmpdir) | ||||
persons_path = tmpdir / "example.persons.csv.zst" | persons_path = tmpdir / "example.persons.csv.zst" | ||||
origin_contributors_path = tmpdir / "origin_contributors.csv.zst" | origin_contributors_path = tmpdir / "origin_contributors.csv.zst" | ||||
deanonymization_table_path = tmpdir / "person_sha256_to_names.csv.zst" | deanonymization_table_path = tmpdir / "contributor_sha256_to_names.csv.zst" | ||||
deanonymized_origin_contributors_path = ( | deanonymized_origin_contributors_path = ( | ||||
tmpdir / "sensitive" / "origin_contributors.deanonymized.csv.zst" | tmpdir / "sensitive" / "origin_contributors.deanonymized.csv.zst" | ||||
) | ) | ||||
subprocess.run( | subprocess.run( | ||||
["zstdmt", "-o", origin_contributors_path], | ["zstdmt", "-o", origin_contributors_path], | ||||
input=ORIGIN_CONTRIBUTORS.encode(), | input=ORIGIN_CONTRIBUTORS.encode(), | ||||
check=True, | check=True, | ||||
Show All 29 Lines |