Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
Show All 13 Lines | |||||
from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | ||||
from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
BaseRow, | BaseRow, | ||||
ContentCtagsRow, | ContentCtagsRow, | ||||
ContentLanguageRow, | ContentLanguageRow, | ||||
ContentLicenseRow, | ContentLicenseRow, | ||||
ContentMetadataRow, | ContentMetadataRow, | ||||
ContentMimetypeRow, | ContentMimetypeRow, | ||||
DirectoryIntrinsicMetadataRow, | |||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
RevisionIntrinsicMetadataRow, | |||||
) | ) | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
def prepare_mimetypes_from_licenses( | def prepare_mimetypes_from_licenses( | ||||
fossology_licenses: List[ContentLicenseRow], | fossology_licenses: List[ContentLicenseRow], | ||||
) -> List[ContentMimetypeRow]: | ) -> List[ContentMimetypeRow]: | ||||
"""Fossology license needs some consistent data in db to run.""" | """Fossology license needs some consistent data in db to run.""" | ||||
▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | class StorageETypeTester: | ||||
def test_add__duplicate_twice( | def test_add__duplicate_twice( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
etype = self.endpoint_type | etype = self.endpoint_type | ||||
tool = data.tools[self.tool_name] | tool = data.tools[self.tool_name] | ||||
data_rev1 = self.row_class.from_dict( | data_dir1 = self.row_class.from_dict( | ||||
{ | { | ||||
"id": data.revision_id_2, | "id": data.directory_id_2, | ||||
**self.example_data[0], | **self.example_data[0], | ||||
"indexer_configuration_id": tool["id"], | "indexer_configuration_id": tool["id"], | ||||
} | } | ||||
) | ) | ||||
data_rev2 = self.row_class.from_dict( | data_dir2 = self.row_class.from_dict( | ||||
{ | { | ||||
"id": data.revision_id_2, | "id": data.directory_id_2, | ||||
**self.example_data[1], | **self.example_data[1], | ||||
"indexer_configuration_id": tool["id"], | "indexer_configuration_id": tool["id"], | ||||
} | } | ||||
) | ) | ||||
# when | # when | ||||
summary = endpoint(storage, etype, "add")([data_rev1]) | summary = endpoint(storage, etype, "add")([data_dir1]) | ||||
assert summary == expected_summary(1, etype) | assert summary == expected_summary(1, etype) | ||||
with pytest.raises(DuplicateId): | with pytest.raises(DuplicateId): | ||||
endpoint(storage, etype, "add")([data_rev2, data_rev2]) | endpoint(storage, etype, "add")([data_dir2, data_dir2]) | ||||
# then | # then | ||||
actual_data = list( | actual_data = list( | ||||
endpoint(storage, etype, "get")([data.revision_id_2, data.revision_id_1]) | endpoint(storage, etype, "get")([data.directory_id_2, data.directory_id_1]) | ||||
) | ) | ||||
expected_data = [ | expected_data = [ | ||||
self.row_class.from_dict( | self.row_class.from_dict( | ||||
{"id": data.revision_id_2, **self.example_data[0], "tool": tool} | {"id": data.directory_id_2, **self.example_data[0], "tool": tool} | ||||
) | ) | ||||
] | ] | ||||
assert actual_data == expected_data | assert actual_data == expected_data | ||||
def test_add( | def test_add( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
▲ Show 20 Lines • Show All 470 Lines • ▼ Show 20 Lines | example_data = [ | ||||
}, | }, | ||||
{ | { | ||||
"metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | ||||
}, | }, | ||||
] | ] | ||||
row_class = ContentMetadataRow | row_class = ContentMetadataRow | ||||
class TestIndexerStorageRevisionIntrinsicMetadata(StorageETypeTester): | class TestIndexerStorageDirectoryIntrinsicMetadata(StorageETypeTester): | ||||
"""Test Indexer Storage revision_intrinsic_metadata related methods""" | """Test Indexer Storage directory_intrinsic_metadata related methods""" | ||||
tool_name = "swh-metadata-detector" | tool_name = "swh-metadata-detector" | ||||
endpoint_type = "revision_intrinsic_metadata" | endpoint_type = "directory_intrinsic_metadata" | ||||
example_data = [ | example_data = [ | ||||
{ | { | ||||
"metadata": { | "metadata": { | ||||
"other": {}, | "other": {}, | ||||
"codeRepository": { | "codeRepository": { | ||||
"type": "git", | "type": "git", | ||||
"url": "https://github.com/moranegg/metadata_test", | "url": "https://github.com/moranegg/metadata_test", | ||||
}, | }, | ||||
"description": "Simple package.json test for indexer", | "description": "Simple package.json test for indexer", | ||||
"name": "test_metadata", | "name": "test_metadata", | ||||
"version": "0.0.1", | "version": "0.0.1", | ||||
}, | }, | ||||
"mappings": ["mapping1"], | "mappings": ["mapping1"], | ||||
}, | }, | ||||
{ | { | ||||
"metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | ||||
"mappings": ["mapping2"], | "mappings": ["mapping2"], | ||||
}, | }, | ||||
] | ] | ||||
row_class = RevisionIntrinsicMetadataRow | row_class = DirectoryIntrinsicMetadataRow | ||||
class TestIndexerStorageContentFossologyLicense(StorageETypeTester): | class TestIndexerStorageContentFossologyLicense(StorageETypeTester): | ||||
endpoint_type = "content_fossology_license" | endpoint_type = "content_fossology_license" | ||||
tool_name = "nomos" | tool_name = "nomos" | ||||
example_data = [ | example_data = [ | ||||
{"license": "Apache-2.0"}, | {"license": "Apache-2.0"}, | ||||
{"license": "BSD-2-Clause"}, | {"license": "BSD-2-Clause"}, | ||||
▲ Show 20 Lines • Show All 255 Lines • ▼ Show 20 Lines | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
# given | # given | ||||
tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
metadata = { | metadata = { | ||||
"version": None, | "version": None, | ||||
"name": None, | "name": None, | ||||
} | } | ||||
metadata_rev = RevisionIntrinsicMetadataRow( | metadata_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata, | metadata=metadata, | ||||
mappings=["mapping1"], | mappings=["mapping1"], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata_origin = OriginIntrinsicMetadataRow( | metadata_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata, | metadata=metadata, | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
mappings=["mapping1"], | mappings=["mapping1"], | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
# when | # when | ||||
storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.directory_intrinsic_metadata_add([metadata_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata_origin]) | storage.origin_intrinsic_metadata_add([metadata_origin]) | ||||
# then | # then | ||||
actual_metadata = list( | actual_metadata = list( | ||||
storage.origin_intrinsic_metadata_get([data.origin_url_1, "no://where"]) | storage.origin_intrinsic_metadata_get([data.origin_url_1, "no://where"]) | ||||
) | ) | ||||
expected_metadata = [ | expected_metadata = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata, | metadata=metadata, | ||||
tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
mappings=["mapping1"], | mappings=["mapping1"], | ||||
) | ) | ||||
] | ] | ||||
assert actual_metadata == expected_metadata | assert actual_metadata == expected_metadata | ||||
journal_objects = storage.journal_writer.journal.objects # type: ignore | journal_objects = storage.journal_writer.journal.objects # type: ignore | ||||
actual_journal_metadata = [ | actual_journal_metadata = [ | ||||
Show All 9 Lines | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
# given | # given | ||||
tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
metadata_v1: Dict[str, Any] = { | metadata_v1: Dict[str, Any] = { | ||||
"version": None, | "version": None, | ||||
"name": None, | "name": None, | ||||
} | } | ||||
metadata_rev_v1 = RevisionIntrinsicMetadataRow( | metadata_dir_v1 = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata_v1, | metadata=metadata_v1, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata_origin_v1 = OriginIntrinsicMetadataRow( | metadata_origin_v1 = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata_v1.copy(), | metadata=metadata_v1.copy(), | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
mappings=[], | mappings=[], | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
# given | # given | ||||
storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.directory_intrinsic_metadata_add([metadata_dir_v1]) | ||||
storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list( | actual_metadata = list( | ||||
storage.origin_intrinsic_metadata_get([data.origin_url_1]) | storage.origin_intrinsic_metadata_get([data.origin_url_1]) | ||||
) | ) | ||||
# then | # then | ||||
expected_metadata_v1 = [ | expected_metadata_v1 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata_v1, | metadata=metadata_v1, | ||||
tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
mappings=[], | mappings=[], | ||||
) | ) | ||||
] | ] | ||||
assert actual_metadata == expected_metadata_v1 | assert actual_metadata == expected_metadata_v1 | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update( | metadata_v2.update( | ||||
{ | { | ||||
"name": "test_update_duplicated_metadata", | "name": "test_update_duplicated_metadata", | ||||
"author": "MG", | "author": "MG", | ||||
} | } | ||||
) | ) | ||||
metadata_rev_v2 = attr.evolve(metadata_rev_v1, metadata=metadata_v2) | metadata_dir_v2 = attr.evolve(metadata_dir_v1, metadata=metadata_v2) | ||||
metadata_origin_v2 = OriginIntrinsicMetadataRow( | metadata_origin_v2 = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata_v2.copy(), | metadata=metadata_v2.copy(), | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
) | ) | ||||
storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | storage.directory_intrinsic_metadata_add([metadata_dir_v2]) | ||||
storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | ||||
actual_metadata = list( | actual_metadata = list( | ||||
storage.origin_intrinsic_metadata_get([data.origin_url_1]) | storage.origin_intrinsic_metadata_get([data.origin_url_1]) | ||||
) | ) | ||||
expected_metadata_v2 = [ | expected_metadata_v2 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata_v2, | metadata=metadata_v2, | ||||
tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
) | ) | ||||
] | ] | ||||
# metadata did change as the v2 was used to overwrite v1 | # metadata did change as the v2 was used to overwrite v1 | ||||
assert actual_metadata == expected_metadata_v2 | assert actual_metadata == expected_metadata_v2 | ||||
def test_origin_intrinsic_metadata_add__deadlock( | def test_origin_intrinsic_metadata_add__deadlock( | ||||
Show All 15 Lines | ) -> None: | ||||
example_data2: Dict[str, Any] = { | example_data2: Dict[str, Any] = { | ||||
"metadata": { | "metadata": { | ||||
"version": "v1.1.1", | "version": "v1.1.1", | ||||
"name": "foo", | "name": "foo", | ||||
}, | }, | ||||
"mappings": [], | "mappings": [], | ||||
} | } | ||||
metadata_rev_v1 = RevisionIntrinsicMetadataRow( | metadata_dir_v1 = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata={ | metadata={ | ||||
"version": None, | "version": None, | ||||
"name": None, | "name": None, | ||||
}, | }, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
data_v1 = [ | data_v1 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=origin, | id=origin, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
**example_data1, | **example_data1, | ||||
) | ) | ||||
for origin in origins | for origin in origins | ||||
] | ] | ||||
data_v2 = [ | data_v2 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=origin, | id=origin, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
**example_data2, | **example_data2, | ||||
) | ) | ||||
for origin in origins | for origin in origins | ||||
] | ] | ||||
# Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
# all items to be in the DB. | # all items to be in the DB. | ||||
data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
# given | # given | ||||
storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.directory_intrinsic_metadata_add([metadata_dir_v1]) | ||||
storage.origin_intrinsic_metadata_add(data_v1) | storage.origin_intrinsic_metadata_add(data_v1) | ||||
# when | # when | ||||
actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
expected_data_v1 = [ | expected_data_v1 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=origin, | id=origin, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
**example_data1, | **example_data1, | ||||
) | ) | ||||
for origin in origins | for origin in origins | ||||
] | ] | ||||
# then | # then | ||||
assert actual_data == expected_data_v1 | assert actual_data == expected_data_v1 | ||||
Show All 13 Lines | ) -> None: | ||||
t1.join() | t1.join() | ||||
t2.join() | t2.join() | ||||
actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
expected_data_v2 = [ | expected_data_v2 = [ | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=origin, | id=origin, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
**example_data2, | **example_data2, | ||||
) | ) | ||||
for origin in origins | for origin in origins | ||||
] | ] | ||||
actual_data.sort(key=lambda item: item.id) | actual_data.sort(key=lambda item: item.id) | ||||
assert len(actual_data) == len(expected_data_v1) == len(expected_data_v2) | assert len(actual_data) == len(expected_data_v1) == len(expected_data_v2) | ||||
for (item, expected_item_v1, expected_item_v2) in zip( | for (item, expected_item_v1, expected_item_v2) in zip( | ||||
actual_data, expected_data_v1, expected_data_v2 | actual_data, expected_data_v1, expected_data_v2 | ||||
): | ): | ||||
assert item in (expected_item_v1, expected_item_v2) | assert item in (expected_item_v1, expected_item_v2) | ||||
def test_origin_intrinsic_metadata_add__duplicate_twice( | def test_origin_intrinsic_metadata_add__duplicate_twice( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
# given | # given | ||||
tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
metadata = { | metadata = { | ||||
"developmentStatus": None, | "developmentStatus": None, | ||||
"name": None, | "name": None, | ||||
} | } | ||||
metadata_rev = RevisionIntrinsicMetadataRow( | metadata_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata, | metadata=metadata, | ||||
mappings=["mapping1"], | mappings=["mapping1"], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata_origin = OriginIntrinsicMetadataRow( | metadata_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata, | metadata=metadata, | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
mappings=["mapping1"], | mappings=["mapping1"], | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
# when | # when | ||||
storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.directory_intrinsic_metadata_add([metadata_dir]) | ||||
with pytest.raises(DuplicateId): | with pytest.raises(DuplicateId): | ||||
storage.origin_intrinsic_metadata_add([metadata_origin, metadata_origin]) | storage.origin_intrinsic_metadata_add([metadata_origin, metadata_origin]) | ||||
def test_origin_intrinsic_metadata_search_fulltext( | def test_origin_intrinsic_metadata_search_fulltext( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
# given | # given | ||||
tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
metadata1 = { | metadata1 = { | ||||
"author": "John Doe", | "author": "John Doe", | ||||
} | } | ||||
metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_1, | id=data.directory_id_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
) | ) | ||||
metadata2 = { | metadata2 = { | ||||
"author": "Jane Doe", | "author": "Jane Doe", | ||||
} | } | ||||
metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_2, | id=data.origin_url_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
# when | # when | ||||
storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
assert set([res.id for res in search(["Doe"])]) == set( | assert set([res.id for res in search(["Doe"])]) == set( | ||||
[data.origin_url_1, data.origin_url_2] | [data.origin_url_1, data.origin_url_2] | ||||
) | ) | ||||
assert [res.id for res in search(["John", "Doe"])] == [data.origin_url_1] | assert [res.id for res in search(["John", "Doe"])] == [data.origin_url_1] | ||||
Show All 13 Lines | ) -> None: | ||||
# for small values of nb_words). | # for small values of nb_words). | ||||
metadata1 = { | metadata1 = { | ||||
"author": [ | "author": [ | ||||
"Random Person", | "Random Person", | ||||
"John Doe", | "John Doe", | ||||
"Jane Doe", | "Jane Doe", | ||||
] | ] | ||||
} | } | ||||
metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_1, | id=data.directory_id_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
) | ) | ||||
metadata2 = { | metadata2 = { | ||||
"author": [ | "author": [ | ||||
"Random Person", | "Random Person", | ||||
"Jane Doe", | "Jane Doe", | ||||
] | ] | ||||
} | } | ||||
metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
) | ) | ||||
metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_2, | id=data.origin_url_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=[], | mappings=[], | ||||
indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
# when | # when | ||||
storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
assert [res.id for res in search(["Doe"])] == [ | assert [res.id for res in search(["Doe"])] == [ | ||||
data.origin_url_1, | data.origin_url_1, | ||||
data.origin_url_2, | data.origin_url_2, | ||||
] | ] | ||||
Show All 11 Lines | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
tool1_id = data.tools["swh-metadata-detector"]["id"] | tool1_id = data.tools["swh-metadata-detector"]["id"] | ||||
tool2_id = data.tools["swh-metadata-detector2"]["id"] | tool2_id = data.tools["swh-metadata-detector2"]["id"] | ||||
metadata1 = { | metadata1 = { | ||||
"@context": "foo", | "@context": "foo", | ||||
"author": "John Doe", | "author": "John Doe", | ||||
} | } | ||||
metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_1, | id=data.directory_id_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
indexer_configuration_id=tool1_id, | indexer_configuration_id=tool1_id, | ||||
) | ) | ||||
metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_1, | id=data.origin_url_1, | ||||
metadata=metadata1, | metadata=metadata1, | ||||
mappings=["npm"], | mappings=["npm"], | ||||
indexer_configuration_id=tool1_id, | indexer_configuration_id=tool1_id, | ||||
from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
) | ) | ||||
metadata2 = { | metadata2 = { | ||||
"@context": "foo", | "@context": "foo", | ||||
"author": "Jane Doe", | "author": "Jane Doe", | ||||
} | } | ||||
metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_2, | id=data.directory_id_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
) | ) | ||||
metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_2, | id=data.origin_url_2, | ||||
metadata=metadata2, | metadata=metadata2, | ||||
mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
metadata3 = { | metadata3 = { | ||||
"@context": "foo", | "@context": "foo", | ||||
} | } | ||||
metadata3_rev = RevisionIntrinsicMetadataRow( | metadata3_dir = DirectoryIntrinsicMetadataRow( | ||||
id=data.revision_id_3, | id=data.directory_id_3, | ||||
metadata=metadata3, | metadata=metadata3, | ||||
mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
) | ) | ||||
metadata3_origin = OriginIntrinsicMetadataRow( | metadata3_origin = OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_3, | id=data.origin_url_3, | ||||
metadata=metadata3, | metadata=metadata3, | ||||
mappings=["pkg-info"], | mappings=["pkg-info"], | ||||
indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
from_revision=data.revision_id_3, | from_directory=data.directory_id_3, | ||||
) | ) | ||||
storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
storage.revision_intrinsic_metadata_add([metadata3_rev]) | storage.directory_intrinsic_metadata_add([metadata3_dir]) | ||||
storage.origin_intrinsic_metadata_add([metadata3_origin]) | storage.origin_intrinsic_metadata_add([metadata3_origin]) | ||||
def test_origin_intrinsic_metadata_search_by_producer( | def test_origin_intrinsic_metadata_search_by_producer( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
self._fill_origin_intrinsic_metadata(swh_indexer_storage_with_data) | self._fill_origin_intrinsic_metadata(swh_indexer_storage_with_data) | ||||
tool1 = data.tools["swh-metadata-detector"] | tool1 = data.tools["swh-metadata-detector"] | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | ) -> None: | ||||
OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
id=data.origin_url_2, | id=data.origin_url_2, | ||||
metadata={ | metadata={ | ||||
"@context": "foo", | "@context": "foo", | ||||
"author": "Jane Doe", | "author": "Jane Doe", | ||||
}, | }, | ||||
mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
tool=tool2, | tool=tool2, | ||||
from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
) | ) | ||||
], | ], | ||||
next_page_token=None, | next_page_token=None, | ||||
) | ) | ||||
def test_origin_intrinsic_metadata_stats( | def test_origin_intrinsic_metadata_stats( | ||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
) -> None: | ) -> None: | ||||
▲ Show 20 Lines • Show All 144 Lines • Show Last 20 Lines |