tmp_path = PosixPath('/tmp/pytest-of-jenkins/pytest-0/test_sort_pipeline0')
def test_sort_pipeline(tmp_path):
short_type_mapping = {
"origin_visit_status": "ori",
"snapshot": "snp",
"release": "rel",
"revision": "rev",
"directory": "dir",
"content": "cnt",
}
input_nodes = [
f"swh:1:{short}:{hexhash(short + str(x))}"
for short in short_type_mapping.values()
for x in range(4)
]
input_edges = [
f"swh:1:ori:{hexhash('ori1')} swh:1:snp:{hexhash('snp1')}",
f"swh:1:ori:{hexhash('ori2')} swh:1:snp:{hexhash('snp2')}",
f"swh:1:ori:{hexhash('ori3')} swh:1:snp:{hexhash('snp3')}",
f"swh:1:ori:{hexhash('ori4')} swh:1:snp:{hexhash('snpX')}", # missing dest
f"swh:1:snp:{hexhash('snp1')} swh:1:rev:{hexhash('rev1')} {b64e('dup1')}",
f"swh:1:snp:{hexhash('snp1')} swh:1:rev:{hexhash('rev1')} {b64e('dup2')}",
f"swh:1:snp:{hexhash('snp3')} swh:1:cnt:{hexhash('cnt1')} {b64e('c1')}",
f"swh:1:snp:{hexhash('snp4')} swh:1:rel:{hexhash('rel1')} {b64e('r1')}",
f"swh:1:rel:{hexhash('rel1')} swh:1:rel:{hexhash('rel2')}",
f"swh:1:rel:{hexhash('rel2')} swh:1:rev:{hexhash('rev1')}",
f"swh:1:rel:{hexhash('rel3')} swh:1:rev:{hexhash('rev2')}",
f"swh:1:rel:{hexhash('rel4')} swh:1:dir:{hexhash('dir1')}",
f"swh:1:rev:{hexhash('rev1')} swh:1:rev:{hexhash('rev1')}", # dup
f"swh:1:rev:{hexhash('rev1')} swh:1:rev:{hexhash('rev1')}", # dup
f"swh:1:rev:{hexhash('rev1')} swh:1:rev:{hexhash('rev2')}",
f"swh:1:rev:{hexhash('rev2')} swh:1:rev:{hexhash('revX')}", # missing dest
f"swh:1:rev:{hexhash('rev3')} swh:1:rev:{hexhash('rev2')}",
f"swh:1:rev:{hexhash('rev4')} swh:1:dir:{hexhash('dir1')}",
f"swh:1:dir:{hexhash('dir1')} swh:1:cnt:{hexhash('cnt1')} {b64e('c1')} 42",
f"swh:1:dir:{hexhash('dir1')} swh:1:dir:{hexhash('dir1')} {b64e('d1')} 1337",
f"swh:1:dir:{hexhash('dir1')} swh:1:rev:{hexhash('rev1')} {b64e('r1')} 0",
]
for obj_type, short_obj_type in short_type_mapping.items():
p = tmp_path / obj_type
p.mkdir()
edges = [e for e in input_edges if e.startswith(f"swh:1:{short_obj_type}")]
zstwrite(p / "00.edges.csv.zst", edges[0::2])
zstwrite(p / "01.edges.csv.zst", edges[1::2])
nodes = [n for n in input_nodes if n.startswith(f"swh:1:{short_obj_type}")]
zstwrite(p / "00.nodes.csv.zst", nodes[0::2])
zstwrite(p / "01.nodes.csv.zst", nodes[1::2])
sort_graph_nodes(tmp_path, config={"sort_buffer_size": "1M"})
output_nodes = zstread(tmp_path / "graph.nodes.csv.zst").split("\n")
output_edges = zstread(tmp_path / "graph.edges.csv.zst").split("\n")
output_labels = zstread(tmp_path / "graph.labels.csv.zst").split("\n")
output_nodes = list(filter(bool, output_nodes))
output_edges = list(filter(bool, output_edges))
output_labels = list(filter(bool, output_labels))
expected_nodes = set(input_nodes) | set(e.split()[1] for e in input_edges)
> assert output_nodes == sorted(expected_nodes)
E AssertionError: assert ['swh:1:cnt:3...3a83451', ...] == ['swh:1:cnt:3...3a83451', ...]
E At index 20 diff: 'swh:1:snp:5ae12aa0aa4d37b9b1796c3f91503b82c4ac2870' != 'swh:1:rev:a989b5e94ccf96096f96c0deb678577b4975b1c7'
E Right contains 2 more items, first extra item: 'swh:1:snp:9f9717f81ea8210a66cdf125ed50b969a0db36b5'
E Full diff:
E [
E 'swh:1:cnt:3638eaedf5e00a7a05e95aeb18493b444132cbd0',
E 'swh:1:cnt:a1434daa01c8971e99349c95fc562daa4805f472',
E 'swh:1:cnt:e5f33bef4d3515b9418a74fadba5ed60d5dd0927',...
E
E ...Full output truncated (25 lines hidden), use '-vv' to show
.tox/py3/lib/python3.7/site-packages/swh/dataset/test/test_edges.py:546: AssertionError
TEST RESULT
TEST RESULT
- Run At
- Sep 9 2021, 10:38 AM