Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show First 20 Lines • Show All 1,308 Lines • ▼ Show 20 Lines | def test_origin_add_one(self, swh_storage): | ||||
id2 = swh_storage.origin_add_one(data.origin) | id2 = swh_storage.origin_add_one(data.origin) | ||||
assert id == id2 | assert id == id2 | ||||
def test_origin_add(self, swh_storage): | def test_origin_add(self, swh_storage): | ||||
origin0 = swh_storage.origin_get([data.origin])[0] | origin0 = swh_storage.origin_get([data.origin])[0] | ||||
assert origin0 is None | assert origin0 is None | ||||
origin1, origin2 = swh_storage.origin_add([data.origin, data.origin2]) | stats = swh_storage.origin_add([data.origin, data.origin2]) | ||||
assert stats == {"origin:add": 2} | |||||
actual_origin = swh_storage.origin_get([{"url": data.origin["url"],}])[0] | actual_origin = swh_storage.origin_get([{"url": data.origin["url"],}])[0] | ||||
assert actual_origin["url"] == origin1["url"] | assert actual_origin["url"] == data.origin["url"] | ||||
actual_origin2 = swh_storage.origin_get([{"url": data.origin2["url"],}])[0] | actual_origin2 = swh_storage.origin_get([{"url": data.origin2["url"],}])[0] | ||||
assert actual_origin2["url"] == origin2["url"] | assert actual_origin2["url"] == data.origin2["url"] | ||||
if "id" in actual_origin: | |||||
del actual_origin["id"] | |||||
del actual_origin2["id"] | |||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert set(swh_storage.journal_writer.journal.objects) == set( | ||||
[ | |||||
("origin", Origin.from_dict(actual_origin)), | ("origin", Origin.from_dict(actual_origin)), | ||||
("origin", Origin.from_dict(actual_origin2)), | ("origin", Origin.from_dict(actual_origin2)), | ||||
] | ] | ||||
) | |||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()["origin"] == 2 | assert swh_storage.stat_counters()["origin"] == 2 | ||||
def test_origin_add_from_generator(self, swh_storage): | def test_origin_add_from_generator(self, swh_storage): | ||||
def _ori_gen(): | def _ori_gen(): | ||||
yield data.origin | yield data.origin | ||||
yield data.origin2 | yield data.origin2 | ||||
origin1, origin2 = swh_storage.origin_add(_ori_gen()) | stats = swh_storage.origin_add(_ori_gen()) | ||||
assert stats == {"origin:add": 2} | |||||
actual_origin = swh_storage.origin_get([{"url": data.origin["url"],}])[0] | actual_origin = swh_storage.origin_get([{"url": data.origin["url"],}])[0] | ||||
assert actual_origin["url"] == origin1["url"] | assert actual_origin["url"] == data.origin["url"] | ||||
actual_origin2 = swh_storage.origin_get([{"url": data.origin2["url"],}])[0] | actual_origin2 = swh_storage.origin_get([{"url": data.origin2["url"],}])[0] | ||||
assert actual_origin2["url"] == origin2["url"] | assert actual_origin2["url"] == data.origin2["url"] | ||||
if "id" in actual_origin: | if "id" in actual_origin: | ||||
del actual_origin["id"] | del actual_origin["id"] | ||||
del actual_origin2["id"] | del actual_origin2["id"] | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert set(swh_storage.journal_writer.journal.objects) == set( | ||||
[ | |||||
("origin", Origin.from_dict(actual_origin)), | ("origin", Origin.from_dict(actual_origin)), | ||||
("origin", Origin.from_dict(actual_origin2)), | ("origin", Origin.from_dict(actual_origin2)), | ||||
] | ] | ||||
) | |||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()["origin"] == 2 | assert swh_storage.stat_counters()["origin"] == 2 | ||||
def test_origin_add_twice(self, swh_storage): | def test_origin_add_twice(self, swh_storage): | ||||
add1 = swh_storage.origin_add([data.origin, data.origin2]) | add1 = swh_storage.origin_add([data.origin, data.origin2]) | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert set(swh_storage.journal_writer.journal.objects) == set( | ||||
[ | |||||
("origin", Origin.from_dict(data.origin)), | ("origin", Origin.from_dict(data.origin)), | ||||
("origin", Origin.from_dict(data.origin2)), | ("origin", Origin.from_dict(data.origin2)), | ||||
] | ] | ||||
) | |||||
assert add1 == {"origin:add": 2} | |||||
add2 = swh_storage.origin_add([data.origin, data.origin2]) | add2 = swh_storage.origin_add([data.origin, data.origin2]) | ||||
assert list(swh_storage.journal_writer.journal.objects) == [ | assert set(swh_storage.journal_writer.journal.objects) == set( | ||||
[ | |||||
("origin", Origin.from_dict(data.origin)), | ("origin", Origin.from_dict(data.origin)), | ||||
("origin", Origin.from_dict(data.origin2)), | ("origin", Origin.from_dict(data.origin2)), | ||||
] | ] | ||||
) | |||||
assert add1 == add2 | assert add2 == {"origin:add": 0} | ||||
def test_origin_add_validation(self, swh_storage): | def test_origin_add_validation(self, swh_storage): | ||||
"""Incorrect formatted origin should fail the validation | """Incorrect formatted origin should fail the validation | ||||
""" | """ | ||||
with pytest.raises(StorageArgumentException, match="url"): | with pytest.raises(StorageArgumentException, match="url"): | ||||
swh_storage.origin_add([{}]) | swh_storage.origin_add([{}]) | ||||
with pytest.raises( | with pytest.raises( | ||||
▲ Show 20 Lines • Show All 1,845 Lines • ▼ Show 20 Lines | def test_metadata_authority_add_get(self, swh_storage): | ||||
assert res is not data.metadata_authority | assert res is not data.metadata_authority | ||||
assert res == data.metadata_authority | assert res == data.metadata_authority | ||||
def test_origin_metadata_add(self, swh_storage): | def test_origin_metadata_add(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.origin_metadata_add(**data.origin_metadata) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | swh_storage.origin_metadata_add(**data.origin_metadata2) | ||||
result = swh_storage.origin_metadata_get(origin["url"], authority) | result = swh_storage.origin_metadata_get(origin["url"], authority) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata, data.origin_metadata2] == list( | assert [data.origin_metadata, data.origin_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x["discovery_date"],) | ||||
) | ) | ||||
def test_origin_metadata_add_duplicate(self, swh_storage): | def test_origin_metadata_add_duplicate(self, swh_storage): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
new_origin_metadata2 = { | new_origin_metadata2 = { | ||||
**data.origin_metadata2, | **data.origin_metadata2, | ||||
"format": "new-format", | "format": "new-format", | ||||
"metadata": b"new-metadata", | "metadata": b"new-metadata", | ||||
} | } | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.origin_metadata_add(**data.origin_metadata) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | swh_storage.origin_metadata_add(**data.origin_metadata2) | ||||
swh_storage.origin_metadata_add(**new_origin_metadata2) | swh_storage.origin_metadata_add(**new_origin_metadata2) | ||||
result = swh_storage.origin_metadata_get(origin["url"], authority) | result = swh_storage.origin_metadata_get(origin["url"], authority) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata, new_origin_metadata2] == list( | assert [data.origin_metadata, new_origin_metadata2] == list( | ||||
sorted(result["results"], key=lambda x: x["discovery_date"],) | sorted(result["results"], key=lambda x: x["discovery_date"],) | ||||
) | ) | ||||
def test_origin_metadata_add_dict(self, swh_storage): | def test_origin_metadata_add_dict(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
kwargs = data.origin_metadata.copy() | kwargs = data.origin_metadata.copy() | ||||
kwargs["metadata"] = {"foo": "bar"} | kwargs["metadata"] = {"foo": "bar"} | ||||
with pytest.raises(StorageArgumentException): | with pytest.raises(StorageArgumentException): | ||||
swh_storage.origin_metadata_add(**kwargs) | swh_storage.origin_metadata_add(**kwargs) | ||||
def test_origin_metadata_get(self, swh_storage): | def test_origin_metadata_get(self, swh_storage): | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority2 = data.metadata_authority2 | authority2 = data.metadata_authority2 | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
origin_url1 = data.origin["url"] | origin_url1 = data.origin["url"] | ||||
origin_url2 = data.origin2["url"] | origin_url2 = data.origin2["url"] | ||||
swh_storage.origin_add([data.origin]) | assert swh_storage.origin_add([data.origin, data.origin2]) == {"origin:add": 2} | ||||
swh_storage.origin_add([data.origin2]) | |||||
origin1_metadata1 = data.origin_metadata | origin1_metadata1 = data.origin_metadata | ||||
origin1_metadata2 = data.origin_metadata2 | origin1_metadata2 = data.origin_metadata2 | ||||
origin1_metadata3 = data.origin_metadata3 | origin1_metadata3 = data.origin_metadata3 | ||||
origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2} | origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2} | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
Show All 20 Lines | def test_origin_metadata_get(self, swh_storage): | ||||
result = swh_storage.origin_metadata_get(origin_url2, authority) | result = swh_storage.origin_metadata_get(origin_url2, authority) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [origin2_metadata] == list(result["results"],) | assert [origin2_metadata] == list(result["results"],) | ||||
def test_origin_metadata_get_after(self, swh_storage): | def test_origin_metadata_get_after(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.origin_metadata_add(**data.origin_metadata) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | swh_storage.origin_metadata_add(**data.origin_metadata2) | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.origin_metadata_get( | ||||
Show All 17 Lines | def test_origin_metadata_get_after(self, swh_storage): | ||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [] == result["results"] | assert [] == result["results"] | ||||
def test_origin_metadata_get_paginate(self, swh_storage): | def test_origin_metadata_get_paginate(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher = data.metadata_fetcher | fetcher = data.metadata_fetcher | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher) | swh_storage.metadata_fetcher_add(**fetcher) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata) | swh_storage.origin_metadata_add(**data.origin_metadata) | ||||
swh_storage.origin_metadata_add(**data.origin_metadata2) | swh_storage.origin_metadata_add(**data.origin_metadata2) | ||||
swh_storage.origin_metadata_get(origin["url"], authority) | swh_storage.origin_metadata_get(origin["url"], authority) | ||||
result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) | result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) | ||||
assert result["next_page_token"] is not None | assert result["next_page_token"] is not None | ||||
assert [data.origin_metadata] == result["results"] | assert [data.origin_metadata] == result["results"] | ||||
result = swh_storage.origin_metadata_get( | result = swh_storage.origin_metadata_get( | ||||
origin["url"], authority, limit=1, page_token=result["next_page_token"] | origin["url"], authority, limit=1, page_token=result["next_page_token"] | ||||
) | ) | ||||
assert result["next_page_token"] is None | assert result["next_page_token"] is None | ||||
assert [data.origin_metadata2] == result["results"] | assert [data.origin_metadata2] == result["results"] | ||||
def test_origin_metadata_get_paginate_same_date(self, swh_storage): | def test_origin_metadata_get_paginate_same_date(self, swh_storage): | ||||
origin = data.origin | origin = data.origin | ||||
fetcher1 = data.metadata_fetcher | fetcher1 = data.metadata_fetcher | ||||
fetcher2 = data.metadata_fetcher2 | fetcher2 = data.metadata_fetcher2 | ||||
authority = data.metadata_authority | authority = data.metadata_authority | ||||
swh_storage.origin_add([origin])[0] | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add(**fetcher1) | swh_storage.metadata_fetcher_add(**fetcher1) | ||||
swh_storage.metadata_fetcher_add(**fetcher2) | swh_storage.metadata_fetcher_add(**fetcher2) | ||||
swh_storage.metadata_authority_add(**authority) | swh_storage.metadata_authority_add(**authority) | ||||
origin_metadata2 = { | origin_metadata2 = { | ||||
**data.origin_metadata2, | **data.origin_metadata2, | ||||
"discovery_date": data.origin_metadata2["discovery_date"], | "discovery_date": data.origin_metadata2["discovery_date"], | ||||
▲ Show 20 Lines • Show All 556 Lines • Show Last 20 Lines |