Changeset View
Changeset View
Standalone View
Standalone View
swh/dataset/test/test_graph.py
Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | TEST_RELEASE = { | ||||
"author": {"author": {"fullname": b"foo", "name": b"foo", "email": b""}}, | "author": {"author": {"fullname": b"foo", "name": b"foo", "email": b""}}, | ||||
"target_type": "revision", | "target_type": "revision", | ||||
"target": b"\x04" * 20, | "target": b"\x04" * 20, | ||||
"message": b"foo", | "message": b"foo", | ||||
"synthetic": False, | "synthetic": False, | ||||
} | } | ||||
TEST_ORIGIN = {"url": "https://somewhere.org/den/fox"} | TEST_ORIGIN = {"url": "https://somewhere.org/den/fox"} | ||||
TEST_ORIGIN_2 = {"url": "https://somewhere.org/den/fox/2"} | |||||
TEST_ORIGIN_VISIT = { | TEST_ORIGIN_VISIT = { | ||||
"origin": TEST_ORIGIN["url"], | "origin": TEST_ORIGIN["url"], | ||||
"visit": 1, | |||||
"date": "2013-05-07 04:20:39.369271+00:00", | "date": "2013-05-07 04:20:39.369271+00:00", | ||||
"snapshot": None, # TODO | "snapshot": None, # TODO | ||||
"status": "ongoing", # TODO | "status": "ongoing", # TODO | ||||
"metadata": {"foo": "bar"}, | "metadata": {"foo": "bar"}, | ||||
"type": "git", | "type": "git", | ||||
} | } | ||||
class FakeDiskSet(set): | |||||
""" | |||||
A set with an add() method that returns whether the item has been added | |||||
or was already there. Used to replace SQLiteSet in unittests. | |||||
""" | |||||
def add(self, v): | |||||
assert isinstance(v, bytes) | |||||
r = True | |||||
if v in self: | |||||
r = False | |||||
super().add(v) | |||||
return r | |||||
@pytest.fixture | @pytest.fixture | ||||
def exporter(): | def exporter(): | ||||
def wrapped(messages, config=None) -> Tuple[Mock, Mock]: | def wrapped(messages, config=None) -> Tuple[Mock, Mock]: | ||||
if config is None: | if config is None: | ||||
config = {} | config = {} | ||||
node_writer = Mock() | node_writer = Mock() | ||||
edge_writer = Mock() | edge_writer = Mock() | ||||
node_set = FakeDiskSet() | |||||
process_messages( | process_messages( | ||||
messages, config=config, node_writer=node_writer, edge_writer=edge_writer, | messages, | ||||
config=config, | |||||
node_writer=node_writer, | |||||
edge_writer=edge_writer, | |||||
node_set=node_set, | |||||
) | ) | ||||
return node_writer.write, edge_writer.write | return node_writer.write, edge_writer.write | ||||
return wrapped | return wrapped | ||||
def binhash(s): | def binhash(s): | ||||
return hashlib.sha1(s.encode()).digest() | return hashlib.sha1(s.encode()).digest() | ||||
▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | def test_export_content(exporter): | ||||
) | ) | ||||
assert node_writer.mock_calls == [ | assert node_writer.mock_calls == [ | ||||
call(f"swh:1:cnt:{hexhash('cnt1')}\n"), | call(f"swh:1:cnt:{hexhash('cnt1')}\n"), | ||||
call(f"swh:1:cnt:{hexhash('cnt2')}\n"), | call(f"swh:1:cnt:{hexhash('cnt2')}\n"), | ||||
] | ] | ||||
assert edge_writer.mock_calls == [] | assert edge_writer.mock_calls == [] | ||||
def test_export_duplicate_node(exporter): | |||||
node_writer, edge_writer = exporter( | |||||
{ | |||||
"content": [ | |||||
{**TEST_CONTENT, "sha1_git": binhash("cnt1")}, | |||||
{**TEST_CONTENT, "sha1_git": binhash("cnt1")}, | |||||
{**TEST_CONTENT, "sha1_git": binhash("cnt1")}, | |||||
], | |||||
}, | |||||
) | |||||
assert node_writer.mock_calls == [ | |||||
call(f"swh:1:cnt:{hexhash('cnt1')}\n"), | |||||
] | |||||
assert edge_writer.mock_calls == [] | |||||
def test_export_duplicate_visit(exporter): | |||||
node_writer, edge_writer = exporter( | |||||
{ | |||||
"origin_visit": [ | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 1}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 1}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 1}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 1}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori1"}, "visit": 2}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 2}, | |||||
{**TEST_ORIGIN_VISIT, "origin": {"url": "ori2"}, "visit": 2}, | |||||
], | |||||
}, | |||||
) | |||||
assert node_writer.mock_calls == [ | |||||
call(f"swh:1:ori:{hexhash('ori1')}\n"), | |||||
call(f"swh:1:ori:{hexhash('ori2')}\n"), | |||||
call(f"swh:1:ori:{hexhash('ori1')}\n"), | |||||
call(f"swh:1:ori:{hexhash('ori2')}\n"), | |||||
] | |||||
assert edge_writer.mock_calls == [] | |||||
def zstwrite(fp, lines): | def zstwrite(fp, lines): | ||||
with ZSTFile(fp, "w") as writer: | with ZSTFile(fp, "w") as writer: | ||||
for l in lines: | for l in lines: | ||||
writer.write(l + "\n") | writer.write(l + "\n") | ||||
def zstread(fp): | def zstread(fp): | ||||
with ZSTFile(fp, "r") as reader: | with ZSTFile(fp, "r") as reader: | ||||
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines |