diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -24,14 +24,11 @@ Union, ) import urllib.parse -import warnings import attr from attrs_strict import type_validator -from .collections import ImmutableDict from .exceptions import ValidationError -from .fields.hashes import validate_sha1 from .hashutil import MultiHash, hash_git_data, hash_to_bytes, hash_to_hex @@ -71,6 +68,7 @@ CONTENT = "content" RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata" + SWHID_NAMESPACE = "swh" SWHID_VERSION = 1 SWHID_TYPES = ["snp", "rel", "rev", "dir", "cnt"] @@ -726,27 +724,6 @@ return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest() -_object_type_map = { - ORIGIN: {"short_name": "ori", "key_id": "id"}, - SNAPSHOT: {"short_name": "snp", "key_id": "id"}, - RELEASE: {"short_name": "rel", "key_id": "id"}, - REVISION: {"short_name": "rev", "key_id": "id"}, - DIRECTORY: {"short_name": "dir", "key_id": "id"}, - CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, - RAW_EXTRINSIC_METADATA: {"short_name": "emd", "key_id": "id"}, -} - -_swhid_type_map = { - "ori": ORIGIN, - "snp": SNAPSHOT, - "rel": RELEASE, - "rev": REVISION, - "dir": DIRECTORY, - "cnt": CONTENT, - "emd": RAW_EXTRINSIC_METADATA, -} - - # type of the "object_type" attribute of the SWHID class; either # ObjectType or ExtendedObjectType _TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) @@ -1094,150 +1071,6 @@ """the type of object the identifier points to""" -@attr.s(frozen=True) -class SWHID: - """ - Deprecated alternative to QualifiedSWHID. - - Args: - namespace (str): the namespace of the identifier, defaults to ``swh`` - scheme_version (int): the scheme version of the identifier, - defaults to 1 - object_type (str): the type of object the identifier points to, - either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` - object_id (str): object's identifier - metadata (dict): optional dict filled with metadata related to - pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Once created, it contains the following attributes: - - Attributes: - namespace (str): the namespace of the identifier - scheme_version (int): the scheme version of the identifier - object_type (str): the type of object the identifier points to - object_id (str): hexadecimal representation of the object hash - metadata (dict): metadata related to the pointed object - - To get the raw SWHID string from an instance of this named tuple, - use the :func:`str` function:: - - swhid = SWHID( - object_type='content', - object_id='8ff44f081d43176474b267de5451f2c2e88089d0' - ) - swhid_str = str(swhid) - # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' - """ - - namespace = attr.ib(type=str, default=SWHID_NAMESPACE) - scheme_version = attr.ib(type=int, default=SWHID_VERSION) - object_type = attr.ib(type=str, default="") - object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore - metadata = attr.ib( - type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() - ) - - def __attrs_post_init__(self): - warnings.warn( - "swh.model.identifiers.SWHID is deprecated; " - "use swh.model.identifiers.QualifiedSWHID instead.", - DeprecationWarning, - ) - - @namespace.validator - def check_namespace(self, attribute, value): - if value != SWHID_NAMESPACE: - raise ValidationError( - "Invalid SWHID: invalid namespace: %(namespace)s", - params={"namespace": value}, - ) - - @scheme_version.validator - def check_scheme_version(self, attribute, value): - if value != SWHID_VERSION: - raise ValidationError( - "Invalid SWHID: invalid version: %(version)s", params={"version": value} - ) - - @object_type.validator - def check_object_type(self, attribute, value): - if value not in _object_type_map: - raise ValidationError( - "Invalid SWHID: invalid type: %(object_type)s)", - params={"object_type": value}, - ) - - @object_id.validator - def check_object_id(self, attribute, value): - try: - validate_sha1(value) # can raise if invalid hash - except ValidationError: - raise ValidationError( - "Invalid SWHID: invalid checksum: %(object_id)s", - params={"object_id": value}, - ) from None - - @metadata.validator - def check_qualifiers(self, attribute, value): - for k in value: - if k not in SWHID_QUALIFIERS: - raise ValidationError( - "Invalid SWHID: unknown qualifier: %(qualifier)s", - params={"qualifier": k}, - ) - - def to_dict(self) -> Dict[str, Any]: - return attr.asdict(self) - - def __str__(self) -> str: - o = _object_type_map.get(self.object_type) - assert o - swhid = SWHID_SEP.join( - [self.namespace, str(self.scheme_version), o["short_name"], self.object_id] - ) - if self.metadata: - for k, v in self.metadata.items(): - swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) - return swhid - - -def swhid( - object_type: str, - object_id: Union[str, Dict[str, Any]], - scheme_version: int = 1, - metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), -) -> str: - """Compute :ref:`persistent-identifiers` - - Args: - object_type: object's type, either ``content``, ``directory``, - ``release``, ``revision`` or ``snapshot`` - object_id: object's identifier - scheme_version: SWHID scheme version, defaults to 1 - metadata: metadata related to the pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Returns: - the SWHID of the object - - """ - if isinstance(object_id, dict): - o = _object_type_map[object_type] - object_id = object_id[o["key_id"]] - swhid = SWHID( - scheme_version=scheme_version, - object_type=object_type, - object_id=object_id, - metadata=metadata, # type: ignore # mypy can't properly unify types - ) - return str(swhid) - - def _parse_swhid(swhid: str) -> Dict[str, Any]: """Parse a Software Heritage identifier (SWHID) from string (see: :ref:`persistent-identifiers`.) @@ -1276,24 +1109,3 @@ parts["scheme_version"] = int(parts["scheme_version"]) parts["object_id"] = hash_to_bytes(parts["object_id"]) return parts - - -def parse_swhid(swhid: str) -> SWHID: - """Parse a Software Heritage identifier (SWHID) from string (see: - :ref:`persistent-identifiers`.) - - Args: - swhid (str): A persistent identifier - - Raises: - swh.model.exceptions.ValidationError: if passed string is not a valid SWHID - - """ - parts = _parse_swhid(swhid) - return SWHID( - parts["namespace"], - parts["scheme_version"], - _swhid_type_map[parts["object_type"]], - hash_to_hex(parts["object_id"]), - parts["qualifiers"], - ) diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -16,12 +16,6 @@ from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, SWHID_QUALIFIERS, CoreSWHID, ExtendedObjectType, @@ -891,317 +885,6 @@ normalize_timestamp(dict_input) -class TestSwhid(unittest.TestCase): - @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") - def test_swhid(self): - _snapshot_id = _x("c7c108084bc0bf3d81436bf980b46e98bd338453") - _release_id = "22ece559cc7cc2364edc5e5593d63ae8bd229f9f" - _revision_id = "309cf2674ee7a0749978cf8265ab91a60aea0f7d" - _directory_id = "d198bc9d7a6bcf6db04f476d29314f157507d505" - _content_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - _snapshot = {"id": _snapshot_id} - _release = {"id": _release_id} - _revision = {"id": _revision_id} - _directory = {"id": _directory_id} - _content = {"sha1_git": _content_id} - - for full_type, _hash, expected_swhid, version, _meta in [ - ( - SNAPSHOT, - _snapshot_id, - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - None, - {}, - ), - ( - RELEASE, - _release_id, - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - 1, - {}, - ), - ( - REVISION, - _revision_id, - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - None, - {}, - ), - ( - DIRECTORY, - _directory_id, - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - None, - {}, - ), - ( - CONTENT, - _content_id, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - 1, - {}, - ), - ( - SNAPSHOT, - _snapshot, - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - None, - {}, - ), - ( - RELEASE, - _release, - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - 1, - {}, - ), - ( - REVISION, - _revision, - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - None, - {}, - ), - ( - DIRECTORY, - _directory, - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - None, - {}, - ), - ( - CONTENT, - _content, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - 1, - {}, - ), - ( - CONTENT, - _content, - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1", - 1, - {"origin": "1"}, - ), - ]: - if version: - actual_value = identifiers.swhid( - full_type, _hash, version, metadata=_meta - ) - else: - actual_value = identifiers.swhid(full_type, _hash, metadata=_meta) - - self.assertEqual(actual_value, expected_swhid) - - def test_swhid_wrong_input(self): - _snapshot_id = "notahash4bc0bf3d81436bf980b46e98bd338453" - _snapshot = {"id": _snapshot_id} - - for _type, _hash in [ - (SNAPSHOT, _snapshot_id), - (SNAPSHOT, _snapshot), - ("lines", "42"), - ]: - with self.assertRaises(ValidationError): - identifiers.swhid(_type, _hash) - - @pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") - def test_parse_swhid(self): - for swhid, _type, _version, _hash in [ - ( - "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - CONTENT, - 1, - "94a9ed024d3859793618152ea559a168bbcbb5e2", - ), - ( - "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - DIRECTORY, - 1, - "d198bc9d7a6bcf6db04f476d29314f157507d505", - ), - ( - "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - REVISION, - 1, - "309cf2674ee7a0749978cf8265ab91a60aea0f7d", - ), - ( - "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - RELEASE, - 1, - "22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - ), - ( - "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - SNAPSHOT, - 1, - "c7c108084bc0bf3d81436bf980b46e98bd338453", - ), - ]: - with pytest.warns(DeprecationWarning): - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata={}, - ) - actual_result = identifiers.parse_swhid(swhid) - self.assertEqual(actual_result, expected_result) - self.assertEqual(str(expected_result), swhid) - - for swhid, _type, _version, _hash, _metadata in [ - ( - "swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa - CONTENT, - 1, - "9c95815d9e9d91b8dae8e05d8bbc696fe19f796b", - {"lines": "1-18", "origin": "https://github.com/python/cpython"}, - ), - ( - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa - DIRECTORY, - 1, - "0b6959356d30f1a4e9b7f6bca59b9a336464c03d", - {"origin": "deb://Debian/packages/linuxdoc-tools"}, - ), - ]: - with pytest.warns(DeprecationWarning): - expected_result = SWHID( - namespace="swh", - scheme_version=_version, - object_type=_type, - object_id=_hash, - metadata=_metadata, - ) - actual_result = identifiers.parse_swhid(swhid) - self.assertEqual(actual_result, expected_result) - self.assertEqual( - expected_result.to_dict(), - { - "namespace": "swh", - "scheme_version": _version, - "object_type": _type, - "object_id": _hash, - "metadata": _metadata, - }, - ) - self.assertEqual(str(expected_result), swhid) - - -@pytest.mark.parametrize( - "invalid_swhid", - [ - "swh:1:cnt", - "swh:1:", - "swh:", - "swh:1:cnt:", - "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed", - "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", - "swh:1:snp:foo", - # wrong qualifier: ori should be origin - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - # wrong qualifier: anc should be anchor - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anc=1;visit=1;path=/", # noqa - # wrong qualifier: vis should be visit - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;vis=1;path=/", # noqa - # wrong qualifier: pa should be path - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=1;visit=1;pa=/", # noqa - # wrong qualifier: line should be lines - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;line=10;origin=something;anchor=1;visit=1;path=/", # noqa - # wrong qualifier value: it contains space before of after - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin= https://some-url", # noqa - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ", # noqa - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=something;anchor=some-anchor ;visit=1", # noqa - # invalid swhid: whitespaces - "swh :1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh: 1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh: 1: dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;ori=something;anchor=1;visit=1;path=/", # noqa - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d", - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d; origin=blah", - "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - # other whitespaces - "swh\t:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1\n:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1:\rdir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d\f;lines=12", - "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;lines=12\v", - ], -) -def test_parse_swhid_parsing_error(invalid_swhid): - with pytest.raises(ValidationError): - identifiers.parse_swhid(invalid_swhid) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -@pytest.mark.parametrize( - "ns,version,type,id", - [ - ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",), - ], -) -def test_SWHID_class_validation_error(ns, version, type, id): - with pytest.raises(ValidationError): - SWHID( - namespace=ns, scheme_version=version, object_type=type, object_id=id, - ) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -def test_SWHID_hash(): - object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - - assert hash(SWHID(object_type="directory", object_id=object_id)) == hash( - SWHID(object_type="directory", object_id=object_id) - ) - - assert hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - ) == hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - ) - - # Different order of the dictionary, so the underlying order of the tuple in - # ImmutableDict is different. - assert hash( - SWHID( - object_type="directory", - object_id=object_id, - metadata={"origin": "https://example.com", "lines": "42"}, - ) - ) == hash( - SWHID( - object_type="directory", - object_id=object_id, - metadata={"lines": "42", "origin": "https://example.com"}, - ) - ) - - -@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning") -def test_SWHID_eq(): - object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - - assert SWHID(object_type="directory", object_id=object_id) == SWHID( - object_type="directory", object_id=object_id - ) - - assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - - assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) - - # SWHIDs that are outright invalid, no matter the context INVALID_SWHIDS = [ "swh:1:cnt",