Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/storage_tests.py
Show All 22 Lines | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Person, | Person, | ||||
RawExtrinsicMetadata, | |||||
Revision, | Revision, | ||||
SkippedContent, | SkippedContent, | ||||
Snapshot, | Snapshot, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.common import origin_url_to_sha1 as sha1 | from swh.storage.common import origin_url_to_sha1 as sha1 | ||||
from swh.storage.exc import HashCollision, StorageArgumentException | from swh.storage.exc import HashCollision, StorageArgumentException | ||||
from swh.storage.interface import ListOrder, PagedResult, StorageInterface | from swh.storage.interface import ListOrder, PagedResult, StorageInterface | ||||
from swh.storage.utils import content_hex_hashes, now, round_to_milliseconds | from swh.storage.utils import ( | ||||
content_hex_hashes, | |||||
now, | |||||
remove_keys, | |||||
round_to_milliseconds, | |||||
) | |||||
def transform_entries( | def transform_entries( | ||||
storage: StorageInterface, dir_: Directory, *, prefix: bytes = b"" | storage: StorageInterface, dir_: Directory, *, prefix: bytes = b"" | ||||
) -> Iterator[Dict[str, Any]]: | ) -> Iterator[Dict[str, Any]]: | ||||
"""Iterate through a directory's entries, and yields the items 'directory_ls' is | """Iterate through a directory's entries, and yields the items 'directory_ls' is | ||||
expected to return; including content metadata for file entries.""" | expected to return; including content metadata for file entries.""" | ||||
▲ Show 20 Lines • Show All 3,356 Lines • ▼ Show 20 Lines | def test_content_metadata_add_duplicate(self, swh_storage, sample_data): | ||||
content = sample_data.content | content = sample_data.content | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID( | content_swhid = SWHID( | ||||
object_type="content", object_id=hash_to_bytes(content.sha1_git) | object_type="content", object_id=hash_to_bytes(content.sha1_git) | ||||
) | ) | ||||
new_content_metadata2 = attr.evolve( | new_content_metadata2 = RawExtrinsicMetadata.from_dict( | ||||
content_metadata2, format="new-format", metadata=b"new-metadata", | { | ||||
**remove_keys(content_metadata2.to_dict(), ("id",)), # recompute id | |||||
"format": "new-format", | |||||
"metadata": b"new-metadata", | |||||
} | |||||
) | ) | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) | swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) | ||||
Show All 17 Lines | def test_content_metadata_get(self, swh_storage, sample_data): | ||||
( | ( | ||||
content1_metadata1, | content1_metadata1, | ||||
content1_metadata2, | content1_metadata2, | ||||
content1_metadata3, | content1_metadata3, | ||||
) = sample_data.content_metadata[:3] | ) = sample_data.content_metadata[:3] | ||||
content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) | content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) | ||||
content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) | content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) | ||||
content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid) | content2_metadata = RawExtrinsicMetadata.from_dict( | ||||
{ | |||||
**remove_keys(content1_metadata2.to_dict(), ("id",)), # recompute id | |||||
"target": str(content2_swhid), | |||||
} | |||||
) | |||||
swh_storage.metadata_authority_add([authority, authority2]) | swh_storage.metadata_authority_add([authority, authority2]) | ||||
swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[ | [ | ||||
content1_metadata1, | content1_metadata1, | ||||
content1_metadata2, | content1_metadata2, | ||||
▲ Show 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
content_swhid = SWHID(object_type="content", object_id=content.sha1_git) | content_swhid = SWHID(object_type="content", object_id=content.sha1_git) | ||||
swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
new_content_metadata2 = attr.evolve( | new_content_metadata2 = RawExtrinsicMetadata.from_dict( | ||||
content_metadata2, | { | ||||
discovery_date=content_metadata2.discovery_date, | **remove_keys(content_metadata2.to_dict(), ("id",)), # recompute id | ||||
fetcher=attr.evolve(fetcher2, metadata=None), | "discovery_date": content_metadata2.discovery_date, | ||||
"fetcher": attr.evolve(fetcher2, metadata=None).to_dict(), | |||||
} | |||||
) | ) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[content_metadata, new_content_metadata2] | [content_metadata, new_content_metadata2] | ||||
) | ) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | MetadataTargetType.CONTENT, content_swhid, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
assert result.results == [content_metadata] | assert result.results == [content_metadata] | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.CONTENT, | MetadataTargetType.CONTENT, | ||||
content_swhid, | content_swhid, | ||||
authority, | authority, | ||||
limit=1, | limit=1, | ||||
page_token=result.next_page_token, | page_token=result.next_page_token, | ||||
) | ) | ||||
assert result.next_page_token is None | assert result.next_page_token is None | ||||
assert result.results[0].to_dict() == new_content_metadata2.to_dict() | |||||
assert result.results == [new_content_metadata2] | assert result.results == [new_content_metadata2] | ||||
def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): | def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
content_metadata, content_metadata2 = sample_data.content_metadata[:2] | content_metadata, content_metadata2 = sample_data.content_metadata[:2] | ||||
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines | |||||
def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): | def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): | ||||
"""Duplicates should be silently updated.""" | """Duplicates should be silently updated.""" | ||||
origin = sample_data.origin | origin = sample_data.origin | ||||
fetcher = sample_data.metadata_fetcher | fetcher = sample_data.metadata_fetcher | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
new_origin_metadata2 = attr.evolve( | new_origin_metadata2 = RawExtrinsicMetadata.from_dict( | ||||
origin_metadata2, format="new-format", metadata=b"new-metadata", | { | ||||
**remove_keys(origin_metadata2.to_dict(), ("id",)), # recompute id | |||||
"format": "new-format", | |||||
"metadata": b"new-metadata", | |||||
} | |||||
) | ) | ||||
swh_storage.metadata_fetcher_add([fetcher]) | swh_storage.metadata_fetcher_add([fetcher]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) | ||||
swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) | ||||
Show All 18 Lines | def test_origin_metadata_get(self, swh_storage, sample_data): | ||||
( | ( | ||||
origin1_metadata1, | origin1_metadata1, | ||||
origin1_metadata2, | origin1_metadata2, | ||||
origin1_metadata3, | origin1_metadata3, | ||||
) = sample_data.origin_metadata[:3] | ) = sample_data.origin_metadata[:3] | ||||
assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} | assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} | ||||
origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url) | origin2_metadata = RawExtrinsicMetadata.from_dict( | ||||
{ | |||||
**remove_keys(origin1_metadata2.to_dict(), ("id",)), # recompute id | |||||
"target": origin2.url, | |||||
} | |||||
) | |||||
swh_storage.metadata_authority_add([authority, authority2]) | swh_storage.metadata_authority_add([authority, authority2]) | ||||
swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher, fetcher2]) | ||||
swh_storage.raw_extrinsic_metadata_add( | swh_storage.raw_extrinsic_metadata_add( | ||||
[origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] | [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 98 Lines • ▼ Show 20 Lines | def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): | ||||
fetcher1, fetcher2 = sample_data.fetchers[:2] | fetcher1, fetcher2 = sample_data.fetchers[:2] | ||||
authority = sample_data.metadata_authority | authority = sample_data.metadata_authority | ||||
origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] | ||||
assert swh_storage.origin_add([origin]) == {"origin:add": 1} | assert swh_storage.origin_add([origin]) == {"origin:add": 1} | ||||
swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) | ||||
swh_storage.metadata_authority_add([authority]) | swh_storage.metadata_authority_add([authority]) | ||||
new_origin_metadata2 = attr.evolve( | new_origin_metadata2 = RawExtrinsicMetadata.from_dict( | ||||
origin_metadata2, | { | ||||
discovery_date=origin_metadata2.discovery_date, | **remove_keys(origin_metadata2.to_dict(), ("id",)), # recompute id | ||||
fetcher=attr.evolve(fetcher2, metadata=None), | "discovery_date": origin_metadata2.discovery_date, | ||||
"fetcher": attr.evolve(fetcher2, metadata=None).to_dict(), | |||||
} | |||||
) | ) | ||||
swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) | swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) | ||||
result = swh_storage.raw_extrinsic_metadata_get( | result = swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.ORIGIN, origin.url, authority, limit=1 | MetadataTargetType.ORIGIN, origin.url, authority, limit=1 | ||||
) | ) | ||||
assert result.next_page_token is not None | assert result.next_page_token is not None | ||||
▲ Show 20 Lines • Show All 192 Lines • Show Last 20 Lines |