diff --git a/swh/web/api/views/identifiers.py b/swh/web/api/views/identifiers.py --- a/swh/web/api/views/identifiers.py +++ b/swh/web/api/views/identifiers.py @@ -1,16 +1,16 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.web.common import service, utils -from swh.web.common.utils import ( - resolve_swh_persistent_id, - get_persistent_identifier -) from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route +from swh.web.common import service from swh.web.common.exc import LargePayloadExc +from swh.web.common.identifiers import ( + resolve_swh_persistent_id, get_persistent_identifier, + group_swh_persistent_identifiers +) @api_route(r'/resolve/(?P.*)/', @@ -109,7 +109,7 @@ response = {str(pid): {'known': False} for pid in persistent_ids} # group pids by their type - pids_by_type = utils.group_swh_persistent_identifiers(persistent_ids) + pids_by_type = group_swh_persistent_identifiers(persistent_ids) # search for hashes not present in the storage missing_hashes = service.lookup_missing_hashes(pids_by_type) diff --git a/swh/web/browse/identifiers.py b/swh/web/browse/identifiers.py --- a/swh/web/browse/identifiers.py +++ b/swh/web/browse/identifiers.py @@ -1,11 +1,11 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import redirect -from swh.web.common.utils import resolve_swh_persistent_id +from swh.web.common.identifiers import resolve_swh_persistent_id from swh.web.common.exc import handle_view_exception diff --git a/swh/web/browse/utils.py b/swh/web/browse/utils.py --- a/swh/web/browse/utils.py +++ b/swh/web/browse/utils.py @@ -19,10 +19,10 @@ from swh.model.identifiers import persistent_identifier from swh.web.common import highlightjs, service from swh.web.common.exc import NotFoundExc, http_status_code_message +from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.origin_visits import get_origin_visit from swh.web.common.utils import ( - reverse, format_utc_iso_date, get_swh_persistent_id, - swh_object_icons, rst_to_html + reverse, format_utc_iso_date, swh_object_icons, rst_to_html ) from swh.web.config import get_config diff --git a/swh/web/common/identifiers.py b/swh/web/common/identifiers.py new file mode 100644 --- /dev/null +++ b/swh/web/common/identifiers.py @@ -0,0 +1,168 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from django.http import QueryDict + +from swh.model.exceptions import ValidationError +from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import ( + persistent_identifier, parse_persistent_identifier, + CONTENT, DIRECTORY, ORIGIN, RELEASE, REVISION, SNAPSHOT +) + +from swh.web.common.exc import BadInputExc +from swh.web.common.utils import reverse + + +def get_swh_persistent_id(object_type, object_id, scheme_version=1): + """ + Returns the persistent identifier for a swh object based on: + + * the object type + * the object id + * the swh identifiers scheme version + + Args: + object_type (str): the swh object type + (content/directory/release/revision/snapshot) + object_id (str): the swh object id (hexadecimal representation + of its hash value) + scheme_version (int): the scheme version of the swh + persistent identifiers + + Returns: + str: the swh object persistent identifier + + Raises: + BadInputExc: if the provided parameters do not enable to + generate a valid identifier + """ + try: + swh_id = persistent_identifier(object_type, object_id, scheme_version) + except ValidationError as e: + raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % + (object_id, e)) + else: + return swh_id + + +def resolve_swh_persistent_id(swh_id, query_params=None): + """ + Try to resolve a Software Heritage persistent id into an url for + browsing the pointed object. + + Args: + swh_id (str): a Software Heritage persistent identifier + query_params (django.http.QueryDict): optional dict filled with + query parameters to append to the browse url + + Returns: + dict: a dict with the following keys: + + * **swh_id_parsed (swh.model.identifiers.PersistentId)**: + the parsed identifier + * **browse_url (str)**: the url for browsing the pointed object + """ + swh_id_parsed = get_persistent_identifier(swh_id) + object_type = swh_id_parsed.object_type + object_id = swh_id_parsed.object_id + browse_url = None + query_dict = QueryDict('', mutable=True) + if query_params and len(query_params) > 0: + for k in sorted(query_params.keys()): + query_dict[k] = query_params[k] + if 'origin' in swh_id_parsed.metadata: + query_dict['origin'] = swh_id_parsed.metadata['origin'] + if object_type == CONTENT: + query_string = 'sha1_git:' + object_id + fragment = '' + if 'lines' in swh_id_parsed.metadata: + lines = swh_id_parsed.metadata['lines'].split('-') + fragment += '#L' + lines[0] + if len(lines) > 1: + fragment += '-L' + lines[1] + browse_url = reverse('browse-content', + url_args={'query_string': query_string}, + query_params=query_dict) + fragment + elif object_type == DIRECTORY: + browse_url = reverse('browse-directory', + url_args={'sha1_git': object_id}, + query_params=query_dict) + elif object_type == RELEASE: + browse_url = reverse('browse-release', + url_args={'sha1_git': object_id}, + query_params=query_dict) + elif object_type == REVISION: + browse_url = reverse('browse-revision', + url_args={'sha1_git': object_id}, + query_params=query_dict) + elif object_type == SNAPSHOT: + browse_url = reverse('browse-snapshot', + url_args={'snapshot_id': object_id}, + query_params=query_dict) + elif object_type == ORIGIN: + raise BadInputExc(('Origin PIDs (Persistent Identifiers) are not ' + 'publicly resolvable because they are for ' + 'internal usage only')) + + return {'swh_id_parsed': swh_id_parsed, + 'browse_url': browse_url} + + +def get_persistent_identifier(persistent_id): + """Check if a persistent identifier is valid. + + Args: + persistent_id: A string representing a Software Heritage + persistent identifier. + + Raises: + BadInputExc: if the provided persistent identifier can + not be parsed. + + Return: + A persistent identifier object. + """ + try: + pid_object = parse_persistent_identifier(persistent_id) + except ValidationError as ve: + raise BadInputExc('Error when parsing identifier: %s' % + ' '.join(ve.messages)) + else: + return pid_object + + +def group_swh_persistent_identifiers(persistent_ids): + """ + Groups many Software Heritage persistent identifiers into a + dictionary depending on their type. + + Args: + persistent_ids (list): a list of Software Heritage persistent + identifier objects + + Returns: + A dictionary with: + keys: persistent identifier types + values: list(bytes) persistent identifiers id + + Raises: + BadInputExc: if one of the provided persistent identifier can + not be parsed. + """ + pids_by_type = { + CONTENT: [], + DIRECTORY: [], + REVISION: [], + RELEASE: [], + SNAPSHOT: [] + } + + for pid in persistent_ids: + obj_id = pid.object_id + obj_type = pid.object_type + pids_by_type[obj_type].append(hash_to_bytes(obj_id)) + + return pids_by_type diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py --- a/swh/web/common/utils.py +++ b/swh/web/common/utils.py @@ -26,13 +26,6 @@ from rest_framework.authentication import SessionAuthentication -from swh.model.exceptions import ValidationError -from swh.model.hashutil import hash_to_bytes -from swh.model.identifiers import ( - persistent_identifier, parse_persistent_identifier, - CONTENT, DIRECTORY, ORIGIN, RELEASE, REVISION, SNAPSHOT -) - from swh.web.common.exc import BadInputExc from swh.web.config import get_config @@ -203,101 +196,6 @@ return path_info -def get_swh_persistent_id(object_type, object_id, scheme_version=1): - """ - Returns the persistent identifier for a swh object based on: - - * the object type - * the object id - * the swh identifiers scheme version - - Args: - object_type (str): the swh object type - (content/directory/release/revision/snapshot) - object_id (str): the swh object id (hexadecimal representation - of its hash value) - scheme_version (int): the scheme version of the swh - persistent identifiers - - Returns: - str: the swh object persistent identifier - - Raises: - BadInputExc: if the provided parameters do not enable to - generate a valid identifier - """ - try: - swh_id = persistent_identifier(object_type, object_id, scheme_version) - except ValidationError as e: - raise BadInputExc('Invalid object (%s) for swh persistent id. %s' % - (object_id, e)) - else: - return swh_id - - -def resolve_swh_persistent_id(swh_id, query_params=None): - """ - Try to resolve a Software Heritage persistent id into an url for - browsing the pointed object. - - Args: - swh_id (str): a Software Heritage persistent identifier - query_params (django.http.QueryDict): optional dict filled with - query parameters to append to the browse url - - Returns: - dict: a dict with the following keys: - - * **swh_id_parsed (swh.model.identifiers.PersistentId)**: - the parsed identifier - * **browse_url (str)**: the url for browsing the pointed object - """ - swh_id_parsed = get_persistent_identifier(swh_id) - object_type = swh_id_parsed.object_type - object_id = swh_id_parsed.object_id - browse_url = None - query_dict = QueryDict('', mutable=True) - if query_params and len(query_params) > 0: - for k in sorted(query_params.keys()): - query_dict[k] = query_params[k] - if 'origin' in swh_id_parsed.metadata: - query_dict['origin'] = swh_id_parsed.metadata['origin'] - if object_type == CONTENT: - query_string = 'sha1_git:' + object_id - fragment = '' - if 'lines' in swh_id_parsed.metadata: - lines = swh_id_parsed.metadata['lines'].split('-') - fragment += '#L' + lines[0] - if len(lines) > 1: - fragment += '-L' + lines[1] - browse_url = reverse('browse-content', - url_args={'query_string': query_string}, - query_params=query_dict) + fragment - elif object_type == DIRECTORY: - browse_url = reverse('browse-directory', - url_args={'sha1_git': object_id}, - query_params=query_dict) - elif object_type == RELEASE: - browse_url = reverse('browse-release', - url_args={'sha1_git': object_id}, - query_params=query_dict) - elif object_type == REVISION: - browse_url = reverse('browse-revision', - url_args={'sha1_git': object_id}, - query_params=query_dict) - elif object_type == SNAPSHOT: - browse_url = reverse('browse-snapshot', - url_args={'snapshot_id': object_id}, - query_params=query_dict) - elif object_type == ORIGIN: - raise BadInputExc(('Origin PIDs (Persistent Identifiers) are not ' - 'publicly resolvable because they are for ' - 'internal usage only')) - - return {'swh_id_parsed': swh_id_parsed, - 'browse_url': browse_url} - - def parse_rst(text, report_level=2): """ Parse a reStructuredText string with docutils. @@ -395,63 +293,6 @@ return branch -def get_persistent_identifier(persistent_id): - """Check if a persistent identifier is valid. - - Args: - persistent_id: A string representing a Software Heritage - persistent identifier. - - Raises: - BadInputExc: if the provided persistent identifier can - not be parsed. - - Return: - A persistent identifier object. - """ - try: - pid_object = parse_persistent_identifier(persistent_id) - except ValidationError as ve: - raise BadInputExc('Error when parsing identifier: %s' % - ' '.join(ve.messages)) - else: - return pid_object - - -def group_swh_persistent_identifiers(persistent_ids): - """ - Groups many Software Heritage persistent identifiers into a - dictionary depending on their type. - - Args: - persistent_ids (list): a list of Software Heritage persistent - identifier objects - - Returns: - A dictionary with: - keys: persistent identifier types - values: list(bytes) persistent identifiers id - - Raises: - BadInputExc: if one of the provided persistent identifier can - not be parsed. - """ - pids_by_type = { - CONTENT: [], - DIRECTORY: [], - REVISION: [], - RELEASE: [], - SNAPSHOT: [] - } - - for pid in persistent_ids: - obj_id = pid.object_id - obj_type = pid.object_type - pids_by_type[obj_type].append(hash_to_bytes(obj_id)) - - return pids_by_type - - class _NoHeaderHTMLTranslator(HTMLTranslator): """ Docutils translator subclass to customize the generation of HTML diff --git a/swh/web/misc/badges.py b/swh/web/misc/badges.py --- a/swh/web/misc/badges.py +++ b/swh/web/misc/badges.py @@ -19,7 +19,8 @@ ) from swh.web.common import service from swh.web.common.exc import BadInputExc, NotFoundExc -from swh.web.common.utils import reverse, resolve_swh_persistent_id +from swh.web.common.identifiers import resolve_swh_persistent_id +from swh.web.common.utils import reverse _orange = '#f36a24' diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -12,8 +12,8 @@ _re_encode_content ) from swh.web.common.exc import NotFoundExc -from swh.web.common.utils import reverse, get_swh_persistent_id -from swh.web.common.utils import gen_path_info +from swh.web.common.identifiers import get_swh_persistent_id +from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import ( assert_contains, assert_not_contains, assert_template_used ) diff --git a/swh/web/tests/browse/views/test_directory.py b/swh/web/tests/browse/views/test_directory.py --- a/swh/web/tests/browse/views/test_directory.py +++ b/swh/web/tests/browse/views/test_directory.py @@ -7,8 +7,8 @@ from hypothesis import given -from swh.web.common.utils import reverse, get_swh_persistent_id -from swh.web.common.utils import gen_path_info +from swh.web.common.identifiers import get_swh_persistent_id +from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import assert_contains, assert_template_used from swh.web.tests.strategies import ( directory, directory_with_subdirs, invalid_sha1, diff --git a/swh/web/tests/browse/views/test_origin.py b/swh/web/tests/browse/views/test_origin.py --- a/swh/web/tests/browse/views/test_origin.py +++ b/swh/web/tests/browse/views/test_origin.py @@ -17,9 +17,9 @@ from swh.model.model import Snapshot from swh.web.browse.utils import process_snapshot_branches from swh.web.common.exc import NotFoundExc +from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.utils import ( - reverse, gen_path_info, format_utc_iso_date, - parse_timestamp, get_swh_persistent_id + reverse, gen_path_info, format_utc_iso_date, parse_timestamp ) from swh.web.tests.data import get_content, random_sha1 from swh.web.tests.django_asserts import assert_contains, assert_template_used diff --git a/swh/web/tests/browse/views/test_release.py b/swh/web/tests/browse/views/test_release.py --- a/swh/web/tests/browse/views/test_release.py +++ b/swh/web/tests/browse/views/test_release.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,9 +7,8 @@ from hypothesis import given -from swh.web.common.utils import ( - reverse, format_utc_iso_date, get_swh_persistent_id -) +from swh.web.common.identifiers import get_swh_persistent_id +from swh.web.common.utils import reverse, format_utc_iso_date from swh.web.tests.django_asserts import assert_contains, assert_template_used from swh.web.tests.strategies import ( release, origin_with_releases, unknown_release diff --git a/swh/web/tests/browse/views/test_revision.py b/swh/web/tests/browse/views/test_revision.py --- a/swh/web/tests/browse/views/test_revision.py +++ b/swh/web/tests/browse/views/test_revision.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,9 +6,9 @@ from django.utils.html import escape from hypothesis import given +from swh.web.common.identifiers import get_swh_persistent_id from swh.web.common.utils import ( - reverse, format_utc_iso_date, get_swh_persistent_id, - parse_timestamp + reverse, format_utc_iso_date, parse_timestamp ) from swh.web.tests.django_asserts import assert_contains, assert_template_used from swh.web.tests.strategies import ( diff --git a/swh/web/tests/common/test_identifiers.py b/swh/web/tests/common/test_identifiers.py new file mode 100644 --- /dev/null +++ b/swh/web/tests/common/test_identifiers.py @@ -0,0 +1,27 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.web.common.exc import BadInputExc +from swh.web.common.identifiers import get_swh_persistent_id + + +def test_get_swh_persistent_id(): + swh_object_type = 'content' + sha1_git = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' + + expected_swh_id = 'swh:1:cnt:' + sha1_git + + assert get_swh_persistent_id( + swh_object_type, sha1_git) == expected_swh_id + + with pytest.raises(BadInputExc) as e: + get_swh_persistent_id('foo', sha1_git) + assert e.match('Invalid object') + + with pytest.raises(BadInputExc) as e: + get_swh_persistent_id(swh_object_type, 'not a valid id') + assert e.match('Invalid object') diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -1,14 +1,11 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime -import pytest - from swh.web.common import utils -from swh.web.common.exc import BadInputExc def test_shorten_path_noop(): @@ -96,24 +93,6 @@ assert path_info == expected_result -def test_get_swh_persistent_id(): - swh_object_type = 'content' - sha1_git = 'aafb16d69fd30ff58afdd69036a26047f3aebdc6' - - expected_swh_id = 'swh:1:cnt:' + sha1_git - - assert (utils.get_swh_persistent_id(swh_object_type, sha1_git) == - expected_swh_id) - - with pytest.raises(BadInputExc) as e: - utils.get_swh_persistent_id('foo', sha1_git) - assert e.match('Invalid object') - - with pytest.raises(BadInputExc) as e: - utils.get_swh_persistent_id(swh_object_type, 'not a valid id') - assert e.match('Invalid object') - - def test_rst_to_html(): rst = ( 'Section\n' diff --git a/swh/web/tests/misc/test_badges.py b/swh/web/tests/misc/test_badges.py --- a/swh/web/tests/misc/test_badges.py +++ b/swh/web/tests/misc/test_badges.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,7 +11,8 @@ CONTENT, DIRECTORY, ORIGIN, RELEASE, REVISION, SNAPSHOT ) from swh.web.common import service -from swh.web.common.utils import reverse, resolve_swh_persistent_id +from swh.web.common.identifiers import resolve_swh_persistent_id +from swh.web.common.utils import reverse from swh.web.misc.badges import _badge_config, _get_logo_data from swh.web.tests.django_asserts import assert_contains from swh.web.tests.strategies import (