Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/storage_tests.py
Show All 13 Lines | |||||
import attr | import attr | ||||
from hypothesis import HealthCheck, given, settings, strategies | from hypothesis import HealthCheck, given, settings, strategies | ||||
import pytest | import pytest | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.hypothesis_strategies import objects | from swh.model.hypothesis_strategies import objects | ||||
from swh.model.identifiers import SWHID | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
MetadataTargetType, | |||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
SkippedContent, | SkippedContent, | ||||
Snapshot, | Snapshot, | ||||
TargetType, | TargetType, | ||||
▲ Show 20 Lines • Show All 3,283 Lines • ▼ Show 20 Lines | def test_metadata_authority_add_zero(self, swh_storage, sample_data): | ||||
actual_authority = swh_storage.metadata_authority_get( | actual_authority = swh_storage.metadata_authority_get( | ||||
authority.type, authority.url | authority.type, authority.url | ||||
) | ) | ||||
assert actual_authority is None # does not exist | assert actual_authority is None # does not exist | ||||
swh_storage.metadata_authority_add([]) | swh_storage.metadata_authority_add([]) | ||||
def test_content_metadata_add(self, swh_storage, sample_data): | def test_content_metadata_add(self, swh_storage, sample_data): | ||||
content = sample_data.content | content_swhid = sample_data.swhid.content | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata = sample_data.content_metadata[:2] | content_metadata = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID( | |||||
object_type="content", object_id=hash_to_bytes(content.sha1_git) | |||||
) | |||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add(content_metadata) | swh_storage.raw_extrinsic_metadata_add(content_metadata) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( | assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( | ||||
content_metadata | content_metadata | ||||
) | ) | ||||
actual_objects = list(swh_storage.journal_writer.journal.objects) | actual_objects = list(swh_storage.journal_writer.journal.objects) | ||||
expected_objects = [ | expected_objects = [ | ||||
("metadata_authority", authority), | ("metadata_authority", authority), | ||||
("metadata_fetcher", fetcher), | ("metadata_fetcher", fetcher), | ||||
] + [("raw_extrinsic_metadata", item) for item in content_metadata] | ] + [("raw_extrinsic_metadata", item) for item in content_metadata] | ||||
for obj in expected_objects: | for obj in expected_objects: | ||||
assert obj in actual_objects | assert obj in actual_objects | ||||
def test_content_metadata_add_duplicate(self, swh_storage, sample_data): | def test_content_metadata_add_duplicate(self, swh_storage, sample_data): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
content = sample_data.content | content_swhid = sample_data.swhid.content | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID( | |||||
object_type="content", object_id=hash_to_bytes(content.sha1_git) | |||||
) | |||||
new_content_metadata2 = attr.evolve( | new_content_metadata2 = attr.evolve( | ||||
content_metadata2, format="new-format", metadata=b"new-metadata", | content_metadata2, format="new-format", metadata=b"new-metadata", | ||||
) | ) | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
expected_results1 = (content_metadata, new_content_metadata2) | expected_results1 = (content_metadata, new_content_metadata2) | ||||
expected_results2 = (content_metadata, content_metadata2) | expected_results2 = (content_metadata, content_metadata2) | ||||
assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( | assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( | ||||
expected_results1, # cassandra | expected_results1, # cassandra | ||||
expected_results2, # postgresql | expected_results2, # postgresql | ||||
) | ) | ||||
def test_content_metadata_get(self, swh_storage, sample_data): | def test_content_metadata_get(self, swh_storage, sample_data): | ||||
content, content2 = sample_data.contents[:2] | content, content2 = sample_data.contents[:2] | ||||
fetcher, fetcher2 = sample_data.fetchers[:2] | fetcher, fetcher2 = sample_data.fetchers[:2] | ||||
authority, authority2 = sample_data.authorities[:2] | authority, authority2 = sample_data.authorities[:2] | ||||
( | ( | ||||
content1_metadata1, | content1_metadata1, | ||||
content1_metadata2, | content1_metadata2, | ||||
content1_metadata3, | content1_metadata3, | ||||
) = sample_data.content_metadata[:3] | ) = sample_data.content_metadata[:3] | ||||
content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) | content1_swhid = sample_data.swhid.content | ||||
content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) | content2_swhid = sample_data.swhid.content2 | ||||
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid) | content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid) | ||||
swh_storage.metadata_authority_add([authority, authority2]) | swh_storage.metadata_authority_add([authority, authority2]) | ||||
swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
content1_metadata1, | content1_metadata1, | ||||
content1_metadata2, | content1_metadata2, | ||||
content1_metadata3, | content1_metadata3, | ||||
content2_metadata, | content2_metadata, | ||||
] | ] | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority) | ||||
MetadataTargetType.CONTENT, content1_swhid, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [content1_metadata1, content1_metadata2] == list( | assert [content1_metadata1, content1_metadata2] == list( | ||||
sorted(result.results, key=lambda x: x.discovery_date,) | sorted(result.results, key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2) | ||||
MetadataTargetType.CONTENT, content1_swhid, authority2 | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [content1_metadata3] == list( | assert [content1_metadata3] == list( | ||||
sorted(result.results, key=lambda x: x.discovery_date,) | sorted(result.results, key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority) | ||||
MetadataTargetType.CONTENT, content2_swhid, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [content2_metadata] == list(result.results,) | assert [content2_metadata] == list(result.results,) | ||||
def test_content_metadata_get_after(self, swh_storage, sample_data): | def test_content_metadata_get_after(self, swh_storage, sample_data): | ||||
content = sample_data.content | content_swhid = sample_data.swhid.content | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID(object_type="content", object_id=content.sha1_git) | |||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | |||||
content_swhid, | content_swhid, | ||||
authority, | authority, | ||||
after=content_metadata.discovery_date - timedelta(seconds=1), | after=content_metadata.discovery_date - timedelta(seconds=1), | ||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [content_metadata, content_metadata2] == list( | assert [content_metadata, content_metadata2] == list( | ||||
sorted(result.results, key=lambda x: x.discovery_date,) | sorted(result.results, key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | content_swhid, authority, after=content_metadata.discovery_date, | ||||
content_swhid, | |||||
authority, | |||||
after=content_metadata.discovery_date, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [content_metadata2] | assert result.results == [content_metadata2] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | content_swhid, authority, after=content_metadata2.discovery_date, | ||||
content_swhid, | |||||
authority, | |||||
after=content_metadata2.discovery_date, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [] | assert result.results == [] | ||||
def test_content_metadata_get_paginate(self, swh_storage, sample_data): | def test_content_metadata_get_paginate(self, swh_storage, sample_data): | ||||
content = sample_data.content | content_swhid = sample_data.swhid.content | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID(object_type="content", object_id=content.sha1_git) | |||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_get( | swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) | ||||
MetadataTargetType.CONTENT, content_swhid, authority | |||||
) | |||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | content_swhid, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
assert result.results == [content_metadata] | assert result.results == [content_metadata] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | content_swhid, authority, limit=1, page_token=result.next_page_token, | ||||
content_swhid, | |||||
authority, | |||||
limit=1, | |||||
page_token=result.next_page_token, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [content_metadata2] | assert result.results == [content_metadata2] | ||||
def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): | def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): | ||||
content = sample_data.content | content_swhid = sample_data.swhid.content | ||||
fetcher1, fetcher2 = sample_data.fetchers[:2] | fetcher1, fetcher2 = sample_data.fetchers[:2] | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID(object_type="content", object_id=content.sha1_git) | |||||
swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
new_content_metadata2 = attr.evolve( | new_content_metadata2 = attr.evolve( | ||||
content_metadata2, | content_metadata2, | ||||
discovery_date=content_metadata2.discovery_date, | discovery_date=content_metadata2.discovery_date, | ||||
fetcher=attr.evolve(fetcher2, metadata=None), | fetcher=attr.evolve(fetcher2, metadata=None), | ||||
) | ) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[content_metadata, new_content_metadata2] | [content_metadata, new_content_metadata2] | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | content_swhid, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
assert result.results == [content_metadata] | assert result.results == [content_metadata] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | content_swhid, authority, limit=1, page_token=result.next_page_token, | ||||
content_swhid, | |||||
authority, | |||||
limit=1, | |||||
page_token=result.next_page_token, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [new_content_metadata2] | assert result.results == [new_content_metadata2] | ||||
def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): | |||||
origin = sample_data.origin | |||||
fetcher = sample_data.metadata_fetcher | |||||
authority = sample_data.metadata_authority | |||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | |||||
swh_storage.metadata_fetcher_add([fetcher]) | |||||
swh_storage.metadata_authority_add([authority]) | |||||
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | |||||
with pytest.raises(StorageArgumentException, match="SWHID"): | |||||
swh_storage.raw_extrinsic_metadata_get( | |||||
MetadataTargetType.CONTENT, origin.url, authority | |||||
) | |||||
def test_origin_metadata_add(self, swh_storage, sample_data): | def test_origin_metadata_add(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) | ||||
MetadataTargetType.ORIGIN, origin.url, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ | assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ | ||||
origin_metadata, | origin_metadata, | ||||
origin_metadata2, | origin_metadata2, | ||||
] | ] | ||||
actual_objects = list(swh_storage.journal_writer.journal.objects) | actual_objects = list(swh_storage.journal_writer.journal.objects) | ||||
expected_objects = [ | expected_objects = [ | ||||
("metadata_authority", authority), | ("metadata_authority", authority), | ||||
("metadata_fetcher", fetcher), | ("metadata_fetcher", fetcher), | ||||
("raw_extrinsic_metadata", origin_metadata), | ("raw_extrinsic_metadata", origin_metadata), | ||||
("raw_extrinsic_metadata", origin_metadata2), | ("raw_extrinsic_metadata", origin_metadata2), | ||||
] | ] | ||||
for obj in expected_objects: | for obj in expected_objects: | ||||
assert obj in actual_objects | assert obj in actual_objects | ||||
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): | def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
new_origin_metadata2 = attr.evolve( | new_origin_metadata2 = attr.evolve( | ||||
origin_metadata2, format="new-format", metadata=b"new-metadata", | origin_metadata2, format="new-format", metadata=b"new-metadata", | ||||
) | ) | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) | ||||
MetadataTargetType.ORIGIN, origin.url, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
# which of the two behavior happens is backend-specific. | # which of the two behavior happens is backend-specific. | ||||
expected_results1 = (origin_metadata, new_origin_metadata2) | expected_results1 = (origin_metadata, new_origin_metadata2) | ||||
expected_results2 = (origin_metadata, origin_metadata2) | expected_results2 = (origin_metadata, origin_metadata2) | ||||
assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( | assert tuple(sorted(result.results, key=lambda x: x.discovery_date,)) in ( | ||||
expected_results1, # cassandra | expected_results1, # cassandra | ||||
expected_results2, # postgresql | expected_results2, # postgresql | ||||
) | ) | ||||
def test_origin_metadata_get(self, swh_storage, sample_data): | def test_origin_metadata_get(self, swh_storage, sample_data): | ||||
origin, origin2 = sample_data.origins[:2] | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
origin2 = sample_data.origin2 | |||||
origin2_swhid = sample_data.swhid.origin2 | |||||
fetcher, fetcher2 = sample_data.fetchers[:2] | fetcher, fetcher2 = sample_data.fetchers[:2] | ||||
authority, authority2 = sample_data.authorities[:2] | authority, authority2 = sample_data.authorities[:2] | ||||
( | ( | ||||
origin1_metadata1, | origin1_metadata1, | ||||
origin1_metadata2, | origin1_metadata2, | ||||
origin1_metadata3, | origin1_metadata3, | ||||
) = sample_data.origin_metadata[:3] | ) = sample_data.origin_metadata[:3] | ||||
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} | assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} | ||||
origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url) | origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid) | ||||
swh_storage.metadata_authority_add([authority, authority2]) | swh_storage.metadata_authority_add([authority, authority2]) | ||||
swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] | [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) | ||||
MetadataTargetType.ORIGIN, origin.url, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [origin1_metadata1, origin1_metadata2] == list( | assert [origin1_metadata1, origin1_metadata2] == list( | ||||
sorted(result.results, key=lambda x: x.discovery_date,) | sorted(result.results, key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2) | ||||
MetadataTargetType.ORIGIN, origin.url, authority2 | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [origin1_metadata3] == list( | assert [origin1_metadata3] == list( | ||||
sorted(result.results, key=lambda x: x.discovery_date,) | sorted(result.results, key=lambda x: x.discovery_date,) | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority) | ||||
MetadataTargetType.ORIGIN, origin2.url, authority | |||||
) | |||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert [origin2_metadata] == list(result.results,) | assert [origin2_metadata] == list(result.results,) | ||||
def test_origin_metadata_get_after(self, swh_storage, sample_data): | def test_origin_metadata_get_after(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, | origin_swhid, | ||||
origin.url, | |||||
authority, | authority, | ||||
after=origin_metadata.discovery_date - timedelta(seconds=1), | after=origin_metadata.discovery_date - timedelta(seconds=1), | ||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == [ | assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == [ | ||||
origin_metadata, | origin_metadata, | ||||
origin_metadata2, | origin_metadata2, | ||||
] | ] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, | origin_swhid, authority, after=origin_metadata.discovery_date, | ||||
origin.url, | |||||
authority, | |||||
after=origin_metadata.discovery_date, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [origin_metadata2] | assert result.results == [origin_metadata2] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, | origin_swhid, authority, after=origin_metadata2.discovery_date, | ||||
origin.url, | |||||
authority, | |||||
after=origin_metadata2.discovery_date, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [] | assert result.results == [] | ||||
def test_origin_metadata_get_paginate(self, swh_storage, sample_data): | def test_origin_metadata_get_paginate(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_get( | swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) | ||||
MetadataTargetType.ORIGIN, origin.url, authority | |||||
) | |||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin.url, authority, limit=1 | origin_swhid, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
assert result.results == [origin_metadata] | assert result.results == [origin_metadata] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, | origin_swhid, authority, limit=1, page_token=result.next_page_token, | ||||
origin.url, | |||||
authority, | |||||
limit=1, | |||||
page_token=result.next_page_token, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [origin_metadata2] | assert result.results == [origin_metadata2] | ||||
def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): | def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
origin_swhid = sample_data.swhid.origin | |||||
fetcher1, fetcher2 = sample_data.fetchers[:2] | fetcher1, fetcher2 = sample_data.fetchers[:2] | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
new_origin_metadata2 = attr.evolve( | new_origin_metadata2 = attr.evolve( | ||||
origin_metadata2, | origin_metadata2, | ||||
discovery_date=origin_metadata2.discovery_date, | discovery_date=origin_metadata2.discovery_date, | ||||
fetcher=attr.evolve(fetcher2, metadata=None), | fetcher=attr.evolve(fetcher2, metadata=None), | ||||
) | ) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin.url, authority, limit=1 | origin_swhid, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
assert result.results == [origin_metadata] | assert result.results == [origin_metadata] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, | origin_swhid, authority, limit=1, page_token=result.next_page_token, | ||||
origin.url, | |||||
authority, | |||||
limit=1, | |||||
page_token=result.next_page_token, | |||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results == [new_origin_metadata2] | assert result.results == [new_origin_metadata2] | ||||
def test_origin_metadata_add_missing_authority(self, swh_storage, sample_data): | def test_origin_metadata_add_missing_authority(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
Show All 10 Lines | def test_origin_metadata_add_missing_fetcher(self, swh_storage, sample_data): | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
with pytest.raises(StorageArgumentException, match="fetcher"): | with pytest.raises(StorageArgumentException, match="fetcher"): | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): | |||||
origin = sample_data.origin | |||||
authority = sample_data.metadata_authority | |||||
fetcher = sample_data.metadata_fetcher | |||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | |||||
content_metadata = sample_data.content_metadata[0] | |||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | |||||
swh_storage.metadata_fetcher_add([fetcher]) | |||||
swh_storage.metadata_authority_add([authority]) | |||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | |||||
with pytest.raises(StorageArgumentException, match="SWHID"): | |||||
swh_storage.raw_extrinsic_metadata_get( | |||||
MetadataTargetType.ORIGIN, content_metadata.target, authority, | |||||
) | |||||
class TestStorageGeneratedData: | class TestStorageGeneratedData: | ||||
def test_generate_content_get_data(self, swh_storage, swh_contents): | def test_generate_content_get_data(self, swh_storage, swh_contents): | ||||
contents_with_data = [c for c in swh_contents if c.status != "absent"] | contents_with_data = [c for c in swh_contents if c.status != "absent"] | ||||
# retrieve contents | # retrieve contents | ||||
for content in contents_with_data: | for content in contents_with_data: | ||||
actual_content_data = swh_storage.content_get_data(content.sha1) | actual_content_data = swh_storage.content_get_data(content.sha1) | ||||
▲ Show 20 Lines • Show All 132 Lines • Show Last 20 Lines |