Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
| Show All 13 Lines | |||||
| from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | ||||
| from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
| BaseRow, | BaseRow, | ||||
| ContentCtagsRow, | ContentCtagsRow, | ||||
| ContentLanguageRow, | ContentLanguageRow, | ||||
| ContentLicenseRow, | ContentLicenseRow, | ||||
| ContentMetadataRow, | ContentMetadataRow, | ||||
| ContentMimetypeRow, | ContentMimetypeRow, | ||||
| DirectoryIntrinsicMetadataRow, | |||||
| OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
| RevisionIntrinsicMetadataRow, | |||||
| ) | ) | ||||
| from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
| def prepare_mimetypes_from_licenses( | def prepare_mimetypes_from_licenses( | ||||
| fossology_licenses: List[ContentLicenseRow], | fossology_licenses: List[ContentLicenseRow], | ||||
| ) -> List[ContentMimetypeRow]: | ) -> List[ContentMimetypeRow]: | ||||
| """Fossology license needs some consistent data in db to run.""" | """Fossology license needs some consistent data in db to run.""" | ||||
| ▲ Show 20 Lines • Show All 252 Lines • ▼ Show 20 Lines | class StorageETypeTester: | ||||
| def test_add__duplicate_twice( | def test_add__duplicate_twice( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| etype = self.endpoint_type | etype = self.endpoint_type | ||||
| tool = data.tools[self.tool_name] | tool = data.tools[self.tool_name] | ||||
| data_rev1 = self.row_class.from_dict( | data_dir1 = self.row_class.from_dict( | ||||
| { | { | ||||
| "id": data.revision_id_2, | "id": data.directory_id_2, | ||||
| **self.example_data[0], | **self.example_data[0], | ||||
| "indexer_configuration_id": tool["id"], | "indexer_configuration_id": tool["id"], | ||||
| } | } | ||||
| ) | ) | ||||
| data_rev2 = self.row_class.from_dict( | data_dir2 = self.row_class.from_dict( | ||||
| { | { | ||||
| "id": data.revision_id_2, | "id": data.directory_id_2, | ||||
| **self.example_data[1], | **self.example_data[1], | ||||
| "indexer_configuration_id": tool["id"], | "indexer_configuration_id": tool["id"], | ||||
| } | } | ||||
| ) | ) | ||||
| # when | # when | ||||
| summary = endpoint(storage, etype, "add")([data_rev1]) | summary = endpoint(storage, etype, "add")([data_dir1]) | ||||
| assert summary == expected_summary(1, etype) | assert summary == expected_summary(1, etype) | ||||
| with pytest.raises(DuplicateId): | with pytest.raises(DuplicateId): | ||||
| endpoint(storage, etype, "add")([data_rev2, data_rev2]) | endpoint(storage, etype, "add")([data_dir2, data_dir2]) | ||||
| # then | # then | ||||
| actual_data = list( | actual_data = list( | ||||
| endpoint(storage, etype, "get")([data.revision_id_2, data.revision_id_1]) | endpoint(storage, etype, "get")([data.directory_id_2, data.directory_id_1]) | ||||
| ) | ) | ||||
| expected_data = [ | expected_data = [ | ||||
| self.row_class.from_dict( | self.row_class.from_dict( | ||||
| {"id": data.revision_id_2, **self.example_data[0], "tool": tool} | {"id": data.directory_id_2, **self.example_data[0], "tool": tool} | ||||
| ) | ) | ||||
| ] | ] | ||||
| assert actual_data == expected_data | assert actual_data == expected_data | ||||
| def test_add( | def test_add( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| ▲ Show 20 Lines • Show All 470 Lines • ▼ Show 20 Lines | example_data = [ | ||||
| }, | }, | ||||
| { | { | ||||
| "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | ||||
| }, | }, | ||||
| ] | ] | ||||
| row_class = ContentMetadataRow | row_class = ContentMetadataRow | ||||
| class TestIndexerStorageRevisionIntrinsicMetadata(StorageETypeTester): | class TestIndexerStorageDirectoryIntrinsicMetadata(StorageETypeTester): | ||||
| """Test Indexer Storage revision_intrinsic_metadata related methods""" | """Test Indexer Storage directory_intrinsic_metadata related methods""" | ||||
| tool_name = "swh-metadata-detector" | tool_name = "swh-metadata-detector" | ||||
| endpoint_type = "revision_intrinsic_metadata" | endpoint_type = "directory_intrinsic_metadata" | ||||
| example_data = [ | example_data = [ | ||||
| { | { | ||||
| "metadata": { | "metadata": { | ||||
| "other": {}, | "other": {}, | ||||
| "codeRepository": { | "codeRepository": { | ||||
| "type": "git", | "type": "git", | ||||
| "url": "https://github.com/moranegg/metadata_test", | "url": "https://github.com/moranegg/metadata_test", | ||||
| }, | }, | ||||
| "description": "Simple package.json test for indexer", | "description": "Simple package.json test for indexer", | ||||
| "name": "test_metadata", | "name": "test_metadata", | ||||
| "version": "0.0.1", | "version": "0.0.1", | ||||
| }, | }, | ||||
| "mappings": ["mapping1"], | "mappings": ["mapping1"], | ||||
| }, | }, | ||||
| { | { | ||||
| "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | "metadata": {"other": {}, "name": "test_metadata", "version": "0.0.1"}, | ||||
| "mappings": ["mapping2"], | "mappings": ["mapping2"], | ||||
| }, | }, | ||||
| ] | ] | ||||
| row_class = RevisionIntrinsicMetadataRow | row_class = DirectoryIntrinsicMetadataRow | ||||
| class TestIndexerStorageContentFossologyLicense(StorageETypeTester): | class TestIndexerStorageContentFossologyLicense(StorageETypeTester): | ||||
| endpoint_type = "content_fossology_license" | endpoint_type = "content_fossology_license" | ||||
| tool_name = "nomos" | tool_name = "nomos" | ||||
| example_data = [ | example_data = [ | ||||
| {"license": "Apache-2.0"}, | {"license": "Apache-2.0"}, | ||||
| {"license": "BSD-2-Clause"}, | {"license": "BSD-2-Clause"}, | ||||
| ▲ Show 20 Lines • Show All 255 Lines • ▼ Show 20 Lines | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| # given | # given | ||||
| tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
| metadata = { | metadata = { | ||||
| "version": None, | "version": None, | ||||
| "name": None, | "name": None, | ||||
| } | } | ||||
| metadata_rev = RevisionIntrinsicMetadataRow( | metadata_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata, | metadata=metadata, | ||||
| mappings=["mapping1"], | mappings=["mapping1"], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata_origin = OriginIntrinsicMetadataRow( | metadata_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata, | metadata=metadata, | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| mappings=["mapping1"], | mappings=["mapping1"], | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| # when | # when | ||||
| storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.directory_intrinsic_metadata_add([metadata_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata_origin]) | storage.origin_intrinsic_metadata_add([metadata_origin]) | ||||
| # then | # then | ||||
| actual_metadata = list( | actual_metadata = list( | ||||
| storage.origin_intrinsic_metadata_get([data.origin_url_1, "no://where"]) | storage.origin_intrinsic_metadata_get([data.origin_url_1, "no://where"]) | ||||
| ) | ) | ||||
| expected_metadata = [ | expected_metadata = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata, | metadata=metadata, | ||||
| tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| mappings=["mapping1"], | mappings=["mapping1"], | ||||
| ) | ) | ||||
| ] | ] | ||||
| assert actual_metadata == expected_metadata | assert actual_metadata == expected_metadata | ||||
| journal_objects = storage.journal_writer.journal.objects # type: ignore | journal_objects = storage.journal_writer.journal.objects # type: ignore | ||||
| actual_journal_metadata = [ | actual_journal_metadata = [ | ||||
| Show All 9 Lines | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| # given | # given | ||||
| tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
| metadata_v1: Dict[str, Any] = { | metadata_v1: Dict[str, Any] = { | ||||
| "version": None, | "version": None, | ||||
| "name": None, | "name": None, | ||||
| } | } | ||||
| metadata_rev_v1 = RevisionIntrinsicMetadataRow( | metadata_dir_v1 = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata_v1, | metadata=metadata_v1, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata_origin_v1 = OriginIntrinsicMetadataRow( | metadata_origin_v1 = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata_v1.copy(), | metadata=metadata_v1.copy(), | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| mappings=[], | mappings=[], | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| # given | # given | ||||
| storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.directory_intrinsic_metadata_add([metadata_dir_v1]) | ||||
| storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
| # when | # when | ||||
| actual_metadata = list( | actual_metadata = list( | ||||
| storage.origin_intrinsic_metadata_get([data.origin_url_1]) | storage.origin_intrinsic_metadata_get([data.origin_url_1]) | ||||
| ) | ) | ||||
| # then | # then | ||||
| expected_metadata_v1 = [ | expected_metadata_v1 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata_v1, | metadata=metadata_v1, | ||||
| tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| mappings=[], | mappings=[], | ||||
| ) | ) | ||||
| ] | ] | ||||
| assert actual_metadata == expected_metadata_v1 | assert actual_metadata == expected_metadata_v1 | ||||
| # given | # given | ||||
| metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
| metadata_v2.update( | metadata_v2.update( | ||||
| { | { | ||||
| "name": "test_update_duplicated_metadata", | "name": "test_update_duplicated_metadata", | ||||
| "author": "MG", | "author": "MG", | ||||
| } | } | ||||
| ) | ) | ||||
| metadata_rev_v2 = attr.evolve(metadata_rev_v1, metadata=metadata_v2) | metadata_dir_v2 = attr.evolve(metadata_dir_v1, metadata=metadata_v2) | ||||
| metadata_origin_v2 = OriginIntrinsicMetadataRow( | metadata_origin_v2 = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata_v2.copy(), | metadata=metadata_v2.copy(), | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
| ) | ) | ||||
| storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | storage.directory_intrinsic_metadata_add([metadata_dir_v2]) | ||||
| storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | ||||
| actual_metadata = list( | actual_metadata = list( | ||||
| storage.origin_intrinsic_metadata_get([data.origin_url_1]) | storage.origin_intrinsic_metadata_get([data.origin_url_1]) | ||||
| ) | ) | ||||
| expected_metadata_v2 = [ | expected_metadata_v2 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata_v2, | metadata=metadata_v2, | ||||
| tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
| from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| ) | ) | ||||
| ] | ] | ||||
| # metadata did change as the v2 was used to overwrite v1 | # metadata did change as the v2 was used to overwrite v1 | ||||
| assert actual_metadata == expected_metadata_v2 | assert actual_metadata == expected_metadata_v2 | ||||
| def test_origin_intrinsic_metadata_add__deadlock( | def test_origin_intrinsic_metadata_add__deadlock( | ||||
| Show All 15 Lines | ) -> None: | ||||
| example_data2: Dict[str, Any] = { | example_data2: Dict[str, Any] = { | ||||
| "metadata": { | "metadata": { | ||||
| "version": "v1.1.1", | "version": "v1.1.1", | ||||
| "name": "foo", | "name": "foo", | ||||
| }, | }, | ||||
| "mappings": [], | "mappings": [], | ||||
| } | } | ||||
| metadata_rev_v1 = RevisionIntrinsicMetadataRow( | metadata_dir_v1 = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata={ | metadata={ | ||||
| "version": None, | "version": None, | ||||
| "name": None, | "name": None, | ||||
| }, | }, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| data_v1 = [ | data_v1 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=origin, | id=origin, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| **example_data1, | **example_data1, | ||||
| ) | ) | ||||
| for origin in origins | for origin in origins | ||||
| ] | ] | ||||
| data_v2 = [ | data_v2 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=origin, | id=origin, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| **example_data2, | **example_data2, | ||||
| ) | ) | ||||
| for origin in origins | for origin in origins | ||||
| ] | ] | ||||
| # Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
| # all items to be in the DB. | # all items to be in the DB. | ||||
| data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
| data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
| # given | # given | ||||
| storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.directory_intrinsic_metadata_add([metadata_dir_v1]) | ||||
| storage.origin_intrinsic_metadata_add(data_v1) | storage.origin_intrinsic_metadata_add(data_v1) | ||||
| # when | # when | ||||
| actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
| expected_data_v1 = [ | expected_data_v1 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=origin, | id=origin, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
| **example_data1, | **example_data1, | ||||
| ) | ) | ||||
| for origin in origins | for origin in origins | ||||
| ] | ] | ||||
| # then | # then | ||||
| assert actual_data == expected_data_v1 | assert actual_data == expected_data_v1 | ||||
| Show All 13 Lines | ) -> None: | ||||
| t1.join() | t1.join() | ||||
| t2.join() | t2.join() | ||||
| actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
| expected_data_v2 = [ | expected_data_v2 = [ | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=origin, | id=origin, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| tool=data.tools["swh-metadata-detector"], | tool=data.tools["swh-metadata-detector"], | ||||
| **example_data2, | **example_data2, | ||||
| ) | ) | ||||
| for origin in origins | for origin in origins | ||||
| ] | ] | ||||
| actual_data.sort(key=lambda item: item.id) | actual_data.sort(key=lambda item: item.id) | ||||
| assert len(actual_data) == len(expected_data_v1) == len(expected_data_v2) | assert len(actual_data) == len(expected_data_v1) == len(expected_data_v2) | ||||
| for (item, expected_item_v1, expected_item_v2) in zip( | for (item, expected_item_v1, expected_item_v2) in zip( | ||||
| actual_data, expected_data_v1, expected_data_v2 | actual_data, expected_data_v1, expected_data_v2 | ||||
| ): | ): | ||||
| assert item in (expected_item_v1, expected_item_v2) | assert item in (expected_item_v1, expected_item_v2) | ||||
| def test_origin_intrinsic_metadata_add__duplicate_twice( | def test_origin_intrinsic_metadata_add__duplicate_twice( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| # given | # given | ||||
| tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
| metadata = { | metadata = { | ||||
| "developmentStatus": None, | "developmentStatus": None, | ||||
| "name": None, | "name": None, | ||||
| } | } | ||||
| metadata_rev = RevisionIntrinsicMetadataRow( | metadata_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata, | metadata=metadata, | ||||
| mappings=["mapping1"], | mappings=["mapping1"], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata_origin = OriginIntrinsicMetadataRow( | metadata_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata, | metadata=metadata, | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| mappings=["mapping1"], | mappings=["mapping1"], | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| # when | # when | ||||
| storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.directory_intrinsic_metadata_add([metadata_dir]) | ||||
| with pytest.raises(DuplicateId): | with pytest.raises(DuplicateId): | ||||
| storage.origin_intrinsic_metadata_add([metadata_origin, metadata_origin]) | storage.origin_intrinsic_metadata_add([metadata_origin, metadata_origin]) | ||||
| def test_origin_intrinsic_metadata_search_fulltext( | def test_origin_intrinsic_metadata_search_fulltext( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| # given | # given | ||||
| tool_id = data.tools["swh-metadata-detector"]["id"] | tool_id = data.tools["swh-metadata-detector"]["id"] | ||||
| metadata1 = { | metadata1 = { | ||||
| "author": "John Doe", | "author": "John Doe", | ||||
| } | } | ||||
| metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_1, | id=data.directory_id_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
| ) | ) | ||||
| metadata2 = { | metadata2 = { | ||||
| "author": "Jane Doe", | "author": "Jane Doe", | ||||
| } | } | ||||
| metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_2, | id=data.origin_url_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| # when | # when | ||||
| storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
| storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
| # then | # then | ||||
| search = storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
| assert set([res.id for res in search(["Doe"])]) == set( | assert set([res.id for res in search(["Doe"])]) == set( | ||||
| [data.origin_url_1, data.origin_url_2] | [data.origin_url_1, data.origin_url_2] | ||||
| ) | ) | ||||
| assert [res.id for res in search(["John", "Doe"])] == [data.origin_url_1] | assert [res.id for res in search(["John", "Doe"])] == [data.origin_url_1] | ||||
| Show All 13 Lines | ) -> None: | ||||
| # for small values of nb_words). | # for small values of nb_words). | ||||
| metadata1 = { | metadata1 = { | ||||
| "author": [ | "author": [ | ||||
| "Random Person", | "Random Person", | ||||
| "John Doe", | "John Doe", | ||||
| "Jane Doe", | "Jane Doe", | ||||
| ] | ] | ||||
| } | } | ||||
| metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_1, | id=data.directory_id_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
| ) | ) | ||||
| metadata2 = { | metadata2 = { | ||||
| "author": [ | "author": [ | ||||
| "Random Person", | "Random Person", | ||||
| "Jane Doe", | "Jane Doe", | ||||
| ] | ] | ||||
| } | } | ||||
| metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| ) | ) | ||||
| metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_2, | id=data.origin_url_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=[], | mappings=[], | ||||
| indexer_configuration_id=tool_id, | indexer_configuration_id=tool_id, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| # when | # when | ||||
| storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
| storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
| # then | # then | ||||
| search = storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
| assert [res.id for res in search(["Doe"])] == [ | assert [res.id for res in search(["Doe"])] == [ | ||||
| data.origin_url_1, | data.origin_url_1, | ||||
| data.origin_url_2, | data.origin_url_2, | ||||
| ] | ] | ||||
| Show All 11 Lines | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| tool1_id = data.tools["swh-metadata-detector"]["id"] | tool1_id = data.tools["swh-metadata-detector"]["id"] | ||||
| tool2_id = data.tools["swh-metadata-detector2"]["id"] | tool2_id = data.tools["swh-metadata-detector2"]["id"] | ||||
| metadata1 = { | metadata1 = { | ||||
| "@context": "foo", | "@context": "foo", | ||||
| "author": "John Doe", | "author": "John Doe", | ||||
| } | } | ||||
| metadata1_rev = RevisionIntrinsicMetadataRow( | metadata1_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_1, | id=data.directory_id_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| indexer_configuration_id=tool1_id, | indexer_configuration_id=tool1_id, | ||||
| ) | ) | ||||
| metadata1_origin = OriginIntrinsicMetadataRow( | metadata1_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_1, | id=data.origin_url_1, | ||||
| metadata=metadata1, | metadata=metadata1, | ||||
| mappings=["npm"], | mappings=["npm"], | ||||
| indexer_configuration_id=tool1_id, | indexer_configuration_id=tool1_id, | ||||
| from_revision=data.revision_id_1, | from_directory=data.directory_id_1, | ||||
| ) | ) | ||||
| metadata2 = { | metadata2 = { | ||||
| "@context": "foo", | "@context": "foo", | ||||
| "author": "Jane Doe", | "author": "Jane Doe", | ||||
| } | } | ||||
| metadata2_rev = RevisionIntrinsicMetadataRow( | metadata2_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_2, | id=data.directory_id_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
| indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
| ) | ) | ||||
| metadata2_origin = OriginIntrinsicMetadataRow( | metadata2_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_2, | id=data.origin_url_2, | ||||
| metadata=metadata2, | metadata=metadata2, | ||||
| mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
| indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| metadata3 = { | metadata3 = { | ||||
| "@context": "foo", | "@context": "foo", | ||||
| } | } | ||||
| metadata3_rev = RevisionIntrinsicMetadataRow( | metadata3_dir = DirectoryIntrinsicMetadataRow( | ||||
| id=data.revision_id_3, | id=data.directory_id_3, | ||||
| metadata=metadata3, | metadata=metadata3, | ||||
| mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
| indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
| ) | ) | ||||
| metadata3_origin = OriginIntrinsicMetadataRow( | metadata3_origin = OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_3, | id=data.origin_url_3, | ||||
| metadata=metadata3, | metadata=metadata3, | ||||
| mappings=["pkg-info"], | mappings=["pkg-info"], | ||||
| indexer_configuration_id=tool2_id, | indexer_configuration_id=tool2_id, | ||||
| from_revision=data.revision_id_3, | from_directory=data.directory_id_3, | ||||
| ) | ) | ||||
| storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.directory_intrinsic_metadata_add([metadata1_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
| storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.directory_intrinsic_metadata_add([metadata2_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
| storage.revision_intrinsic_metadata_add([metadata3_rev]) | storage.directory_intrinsic_metadata_add([metadata3_dir]) | ||||
| storage.origin_intrinsic_metadata_add([metadata3_origin]) | storage.origin_intrinsic_metadata_add([metadata3_origin]) | ||||
| def test_origin_intrinsic_metadata_search_by_producer( | def test_origin_intrinsic_metadata_search_by_producer( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| storage, data = swh_indexer_storage_with_data | storage, data = swh_indexer_storage_with_data | ||||
| self._fill_origin_intrinsic_metadata(swh_indexer_storage_with_data) | self._fill_origin_intrinsic_metadata(swh_indexer_storage_with_data) | ||||
| tool1 = data.tools["swh-metadata-detector"] | tool1 = data.tools["swh-metadata-detector"] | ||||
| ▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | ) -> None: | ||||
| OriginIntrinsicMetadataRow( | OriginIntrinsicMetadataRow( | ||||
| id=data.origin_url_2, | id=data.origin_url_2, | ||||
| metadata={ | metadata={ | ||||
| "@context": "foo", | "@context": "foo", | ||||
| "author": "Jane Doe", | "author": "Jane Doe", | ||||
| }, | }, | ||||
| mappings=["npm", "gemspec"], | mappings=["npm", "gemspec"], | ||||
| tool=tool2, | tool=tool2, | ||||
| from_revision=data.revision_id_2, | from_directory=data.directory_id_2, | ||||
| ) | ) | ||||
| ], | ], | ||||
| next_page_token=None, | next_page_token=None, | ||||
| ) | ) | ||||
| def test_origin_intrinsic_metadata_stats( | def test_origin_intrinsic_metadata_stats( | ||||
| self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | ||||
| ) -> None: | ) -> None: | ||||
| ▲ Show 20 Lines • Show All 144 Lines • Show Last 20 Lines | |||||