diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -3,11 +3,6 @@ # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information - -from swh.model.identifiers import ( - CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT -) - from swh.web.common import service from swh.web.common.utils import resolve_swh_persistent_id from swh.web.api.apidoc import api_doc, format_docstring @@ -60,16 +55,7 @@ swh_id_parsed = swh_id_resolved['swh_id_parsed'] object_type = swh_id_parsed.object_type object_id = swh_id_parsed.object_id - if object_type == CONTENT: - service.lookup_content('sha1_git:%s' % object_id) - elif object_type == DIRECTORY: - service.lookup_directory(object_id) - elif object_type == RELEASE: - service.lookup_release(object_id) - elif object_type == REVISION: - service.lookup_revision(object_id) - elif object_type == SNAPSHOT: - service.lookup_snapshot(object_id) + service.lookup_object(object_type, object_id) # id is well-formed and the pointed object exists swh_id_data = swh_id_parsed._asdict() swh_id_data['browse_url'] = swh_id_resolved['browse_url'] diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -6,14 +6,18 @@ import os from collections import defaultdict +from typing import Any, Dict from swh.model import hashutil from swh.storage.algos import diff, revisions_walker +from swh.model.identifiers import ( + CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT +) from swh.web.common import converters from swh.web.common import query -from swh.web.common.exc import NotFoundExc +from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.common.origin_visits import get_origin_visit from swh.web import config @@ -1103,3 +1107,41 @@ # first check if the provided revision is valid lookup_revision(rev_start) return _RevisionsWalkerProxy(rev_walker_type, rev_start, *args, **kwargs) + + +def lookup_object(object_type: str, object_id: str) -> Dict[str, Any]: + """ + Utility function for looking up an object in the archive by its type + and id. + + Args: + object_type (str): the type of object to lookup, either *content*, + *directory*, *release*, *revision* or *snapshot* + object_id (str): the *sha1_git* checksum identifier in hexadecimal + form of the object to lookup + + Returns: + Dict[str, Any]: A dictionary describing the object or a list of + dictionary for the directory object type. + + Raises: + NotFoundExc: if the object could not be found in the archive + BadInputExc: if the object identifier is invalid + """ + if object_type == CONTENT: + return lookup_content(f'sha1_git:{object_id}') + elif object_type == DIRECTORY: + return { + 'id': object_id, + 'content': list(lookup_directory(object_id)) + } + elif object_type == RELEASE: + return lookup_release(object_id) + elif object_type == REVISION: + return lookup_revision(object_id) + elif object_type == SNAPSHOT: + return lookup_snapshot(object_id) + + raise BadInputExc(('Invalid swh object type! Valid types are ' + f'{CONTENT}, {DIRECTORY}, {RELEASE} ' + f'{REVISION} or {SNAPSHOT}.')) diff --git a/swh/web/tests/common/test_service.py b/swh/web/tests/common/test_service.py --- a/swh/web/tests/common/test_service.py +++ b/swh/web/tests/common/test_service.py @@ -12,17 +12,20 @@ from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model.from_disk import DentryPerms +from swh.model.identifiers import ( + CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT +) from swh.web.common import service from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.tests.data import random_sha1, random_content from swh.web.tests.strategies import ( - content, contents, unknown_contents, + content, unknown_content, contents, unknown_contents, contents_with_ctags, origin, new_origin, visit_dates, directory, - release, revision, unknown_revision, revisions, - ancestor_revisions, non_ancestor_revisions, invalid_sha1, sha256, - revision_with_submodules, empty_directory, - new_revision + unknown_directory, release, unknown_release, revision, unknown_revision, + revisions, ancestor_revisions, non_ancestor_revisions, invalid_sha1, + sha256, revision_with_submodules, empty_directory, new_revision, + snapshot, unknown_snapshot ) from swh.web.tests.conftest import ctags_json_missing, fossology_missing @@ -800,3 +803,76 @@ ) == (revision, service.lookup_directory_with_revision(revision, dir_entry['name'], with_data=True)) + + +@given(content(), directory(), release(), revision(), snapshot()) +def test_lookup_known_objects(archive_data, content, directory, release, + revision, snapshot): + expected = archive_data.content_find(content) + assert service.lookup_object(CONTENT, content['sha1_git']) == expected + + expected = archive_data.directory_get(directory) + assert service.lookup_object(DIRECTORY, directory) == expected + + expected = archive_data.release_get(release) + assert service.lookup_object(RELEASE, release) == expected + + expected = archive_data.revision_get(revision) + assert service.lookup_object(REVISION, revision) == expected + + expected = archive_data.snapshot_get(snapshot) + assert service.lookup_object(SNAPSHOT, snapshot) == expected + + +@given(unknown_content(), unknown_directory(), unknown_release(), + unknown_revision(), unknown_snapshot()) +def test_lookup_unknown_objects(unknown_content, unknown_directory, + unknown_release, unknown_revision, + unknown_snapshot): + with pytest.raises(NotFoundExc) as e: + service.lookup_object(CONTENT, unknown_content['sha1_git']) + assert e.match(r'Content.*not found') + + with pytest.raises(NotFoundExc) as e: + service.lookup_object(DIRECTORY, unknown_directory) + assert e.match(r'Directory.*not found') + + with pytest.raises(NotFoundExc) as e: + service.lookup_object(RELEASE, unknown_release) + assert e.match(r'Release.*not found') + + with pytest.raises(NotFoundExc) as e: + service.lookup_object(REVISION, unknown_revision) + assert e.match(r'Revision.*not found') + + with pytest.raises(NotFoundExc) as e: + service.lookup_object(SNAPSHOT, unknown_snapshot) + assert e.match(r'Snapshot.*not found') + + +@given(invalid_sha1()) +def test_lookup_invalid_objects(invalid_sha1): + + with pytest.raises(BadInputExc) as e: + service.lookup_object('foo', invalid_sha1) + assert e.match('Invalid swh object type') + + with pytest.raises(BadInputExc) as e: + service.lookup_object(CONTENT, invalid_sha1) + assert e.match('Invalid hash') + + with pytest.raises(BadInputExc) as e: + service.lookup_object(DIRECTORY, invalid_sha1) + assert e.match('Invalid checksum') + + with pytest.raises(BadInputExc) as e: + service.lookup_object(RELEASE, invalid_sha1) + assert e.match('Invalid checksum') + + with pytest.raises(BadInputExc) as e: + service.lookup_object(REVISION, invalid_sha1) + assert e.match('Invalid checksum') + + with pytest.raises(BadInputExc) as e: + service.lookup_object(SNAPSHOT, invalid_sha1) + assert e.match('Invalid checksum') diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -16,7 +16,7 @@ from hypothesis import settings, HealthCheck from rest_framework.test import APIClient -from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import ALGORITHMS, hash_to_bytes from swh.web.common import converters from swh.web.tests.data import get_tests_data, override_storages @@ -147,6 +147,13 @@ not method_name.startswith('_')): setattr(self, method_name, _call_storage_method(method)) + def content_find(self, content): + cnt_ids_bytes = {algo_hash: hash_to_bytes(content[algo_hash]) + for algo_hash in ALGORITHMS + if content.get(algo_hash)} + cnt = self.storage.content_find(cnt_ids_bytes) + return converters.from_content(cnt[0]) if cnt else cnt + def content_get_metadata(self, cnt_id): cnt_id_bytes = hash_to_bytes(cnt_id) metadata = next(self.storage.content_get_metadata([cnt_id_bytes])) @@ -159,6 +166,12 @@ cnt = next(self.storage.content_get([cnt_id_bytes])) return converters.from_content(cnt) + def directory_get(self, dir_id): + return { + 'id': dir_id, + 'content': self.directory_ls(dir_id) + } + def directory_ls(self, dir_id): cnt_id_bytes = hash_to_bytes(dir_id) dir_content = map(converters.from_directory_entry,