diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -345,6 +345,7 @@ matches = idx_storage.origin_intrinsic_metadata_search_fulltext( conjunction=[fulltext], limit=limit ) + matches = [match.to_dict() for match in matches] results = [] origins = storage.origin_get([match["id"] for match in matches]) for origin, match in zip(origins, matches): @@ -378,7 +379,7 @@ match = _first_element(idx_storage.origin_intrinsic_metadata_get(origins)) result = {} if match: - result = match["metadata"] + result = match.metadata return result diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -9,6 +9,8 @@ import pytest from requests.utils import parse_header_links +from swh.indexer.storage.model import OriginIntrinsicMetadataRow +from swh.model.hashutil import hash_to_bytes from swh.model.model import Origin, OriginVisit, OriginVisitStatus from swh.storage.exc import StorageAPIError, StorageDBError from swh.storage.utils import now @@ -16,6 +18,12 @@ from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits from swh.web.common.utils import reverse +from swh.web.tests.data import ( + INDEXER_TOOL, + ORIGIN_MASTER_REVISION, + ORIGIN_METADATA_KEY, + ORIGIN_METADATA_VALUE, +) from swh.web.tests.strategies import new_origin, new_snapshots, origin, visit_dates from swh.web.tests.utils import check_api_get_responses @@ -545,131 +553,82 @@ assert len(rv.data) == 1000 -@given(origin()) -def test_api_origin_metadata_search(api_client, mocker, origin): - mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") - oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext - oimsft.side_effect = lambda conjunction, limit: [ - { - "from_revision": ( - b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" - ), - "metadata": {"author": "Jane Doe"}, - "id": origin["url"], - "tool": { - "configuration": { - "context": ["NpmMapping", "CodemetaMapping"], - "type": "local", - }, - "id": 3, - "name": "swh-metadata-detector", - "version": "0.0.1", - }, - } - ] +def test_api_origin_metadata_search(api_client): - url = reverse("api-1-origin-metadata-search", query_params={"fulltext": "Jane Doe"}) + url = reverse( + "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE} + ) rv = check_api_get_responses(api_client, url, status_code=200) + expected_data = [ { - "url": origin["url"], + "url": origin_url, "metadata": { - "metadata": {"author": "Jane Doe"}, - "from_revision": ("7026b7c1a2af56521e951c01ed20f255fa054238"), + "from_revision": master_rev, "tool": { - "configuration": { - "context": ["NpmMapping", "CodemetaMapping"], - "type": "local", - }, - "id": 3, - "name": "swh-metadata-detector", - "version": "0.0.1", + "name": INDEXER_TOOL["tool_name"], + "version": INDEXER_TOOL["tool_version"], + "configuration": INDEXER_TOOL["tool_configuration"], + "id": INDEXER_TOOL["id"], }, + "metadata": {ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}, + "mappings": [], }, } + for origin_url, master_rev in ORIGIN_MASTER_REVISION.items() ] assert rv.data == expected_data - oimsft.assert_called_with(conjunction=["Jane Doe"], limit=70) -@given(origin()) -def test_api_origin_metadata_search_limit(api_client, mocker, origin): +def test_api_origin_metadata_search_limit(api_client, mocker): mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") oimsft = mock_idx_storage.origin_intrinsic_metadata_search_fulltext oimsft.side_effect = lambda conjunction, limit: [ - { - "from_revision": ( - b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" - ), - "metadata": {"author": "Jane Doe"}, - "id": origin["url"], - "tool": { - "configuration": { - "context": ["NpmMapping", "CodemetaMapping"], - "type": "local", - }, - "id": 3, - "name": "swh-metadata-detector", - "version": "0.0.1", - }, - } + OriginIntrinsicMetadataRow( + id=origin_url, + from_revision=hash_to_bytes(master_rev), + indexer_configuration_id=INDEXER_TOOL["id"], + metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}, + mappings=[], + ) + for origin_url, master_rev in ORIGIN_MASTER_REVISION.items() ] - url = reverse("api-1-origin-metadata-search", query_params={"fulltext": "Jane Doe"}) + url = reverse( + "api-1-origin-metadata-search", query_params={"fulltext": ORIGIN_METADATA_VALUE} + ) rv = check_api_get_responses(api_client, url, status_code=200) - assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=["Jane Doe"], limit=70) + assert len(rv.data) == len(ORIGIN_MASTER_REVISION) + oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=70) url = reverse( "api-1-origin-metadata-search", - query_params={"fulltext": "Jane Doe", "limit": 10}, + query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 10}, ) rv = check_api_get_responses(api_client, url, status_code=200) - assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=["Jane Doe"], limit=10) + assert len(rv.data) == len(ORIGIN_MASTER_REVISION) + oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=10) url = reverse( "api-1-origin-metadata-search", - query_params={"fulltext": "Jane Doe", "limit": 987}, + query_params={"fulltext": ORIGIN_METADATA_VALUE, "limit": 987}, ) rv = check_api_get_responses(api_client, url, status_code=200) - assert len(rv.data) == 1 - oimsft.assert_called_with(conjunction=["Jane Doe"], limit=100) + assert len(rv.data) == len(ORIGIN_MASTER_REVISION) + oimsft.assert_called_with(conjunction=[ORIGIN_METADATA_VALUE], limit=100) @given(origin()) -def test_api_origin_intrinsic_metadata(api_client, mocker, origin): - mock_idx_storage = mocker.patch("swh.web.common.archive.idx_storage") - oimg = mock_idx_storage.origin_intrinsic_metadata_get - oimg.side_effect = lambda origin_urls: [ - { - "from_revision": ( - b"p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed " b"\xf2U\xfa\x05B8" - ), - "metadata": {"author": "Jane Doe"}, - "id": origin["url"], - "tool": { - "configuration": { - "context": ["NpmMapping", "CodemetaMapping"], - "type": "local", - }, - "id": 3, - "name": "swh-metadata-detector", - "version": "0.0.1", - }, - } - ] +def test_api_origin_intrinsic_metadata(api_client, origin): url = reverse( "api-origin-intrinsic-metadata", url_args={"origin_url": origin["url"]} ) rv = check_api_get_responses(api_client, url, status_code=200) - oimg.assert_called_with([origin["url"]]) - - expected_data = {"author": "Jane Doe"} + expected_data = {ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE} assert rv.data == expected_data diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -15,6 +15,7 @@ from swh.indexer.fossology_license import FossologyLicenseIndexer from swh.indexer.mimetype import MimetypeIndexer from swh.indexer.storage import get_indexer_storage +from swh.indexer.storage.model import OriginIntrinsicMetadataRow from swh.loader.git.from_disk import GitLoaderFromArchive from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.model.model import Content, Directory, Origin, OriginVisit, OriginVisitStatus @@ -114,6 +115,11 @@ # Lightweight git repositories that will be loaded to generate # input data for tests _TEST_ORIGINS = [ + { + "type": "git", + "url": "https://github.com/memononen/libtess2", + "archives": ["libtess2.zip"], + }, { "type": "git", "url": "https://github.com/wcoder/highlightjs-line-numbers.js", @@ -122,11 +128,6 @@ "highlightjs-line-numbers.js_visit2.zip", ], }, - { - "type": "git", - "url": "https://github.com/memononen/libtess2", - "archives": ["libtess2.zip"], - }, { "type": "git", "url": "repo_with_submodules", @@ -155,6 +156,18 @@ contents.add(pbm_content.sha1) +INDEXER_TOOL = { + "tool_name": "swh-web tests", + "tool_version": "1.0", + "tool_configuration": {}, +} + +ORIGIN_METADATA_KEY = "vcs" +ORIGIN_METADATA_VALUE = "git" + +ORIGIN_MASTER_REVISION = {} + + # Tests data initialization def _init_tests_data(): # To hold reference to the memory storage @@ -165,6 +178,13 @@ search.initialize() search.origin_update({"url": origin["url"]} for origin in _TEST_ORIGINS) + # Create indexer storage instance that will be shared by indexers + idx_storage = get_indexer_storage("memory") + + # Declare a test tool for origin intrinsic metadata tests + idx_tool = idx_storage.indexer_configuration_add([INDEXER_TOOL])[0] + INDEXER_TOOL["id"] = idx_tool["id"] + # Load git repositories from archives for origin in _TEST_ORIGINS: for i, archive_ in enumerate(origin["archives"]): @@ -223,6 +243,19 @@ target_type = branch_data.target_type.value if target_type == "revision": revisions.add(branch_data.target) + if b"master" in branch_name: + # Add some origin intrinsic metadata for tests + origin_metadata = OriginIntrinsicMetadataRow( + id=origin["url"], + from_revision=branch_data.target, + indexer_configuration_id=idx_tool["id"], + metadata={ORIGIN_METADATA_KEY: ORIGIN_METADATA_VALUE}, + mappings=[], + ) + idx_storage.origin_intrinsic_metadata_add([origin_metadata]) + ORIGIN_MASTER_REVISION[origin["url"]] = hash_to_hex( + branch_data.target + ) elif target_type == "release": release = storage.release_get([branch_data.target])[0] revisions.add(release.target) @@ -281,9 +314,6 @@ _contents[sha1] = content_metadata contents.append(content_metadata) - # Create indexer storage instance that will be shared by indexers - idx_storage = get_indexer_storage("memory") - # Add the empty directory to the test archive storage.directory_add([Directory(entries=())])