diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -11,7 +11,18 @@ from functools import lru_cache import hashlib import re -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union +from typing import ( + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, + Union, +) import warnings import attr @@ -735,35 +746,21 @@ } -@attr.s(frozen=True, kw_only=True) -class CoreSWHID: - """ - Dataclass holding the relevant info associated to a SoftWare Heritage - persistent IDentifier (SWHID). - - Unlike `QualifiedSWHID`, it is restricted to core SWHIDs, ie. SWHIDs - with no qualifiers. - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id +# type of the "object_type" attribute of the SWHID class; either +# ObjectType or ExtendedObjectType +_TObjectType = TypeVar("_TObjectType", ObjectType, ExtendedObjectType) - To get the raw SWHID string from an instance of this class, - use the :func:`str` function: +# the SWHID class itself (this is used so that X.from_string() can return X +# for all X subclass of _BaseSWHID) +_TSWHID = TypeVar("_TSWHID", bound="_BaseSWHID") - >>> swhid = CoreSWHID( - ... object_type=ObjectType.CONTENT, - ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), - ... ) - >>> str(swhid) - 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' - And vice-versa with :meth:`CoreSWHID.from_string`: +@attr.s(frozen=True, kw_only=True) +class _BaseSWHID(Generic[_TObjectType]): + """Common base class for CoreSWHID, QualifiedSWHID, and ExtendedSWHID. - >>> swhid == CoreSWHID.from_string( - ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0" - ... ) - True - """ + This is an "abstract" class and should not be instantiated directly; + it only exists to deduplicate code between these three SWHID classes.""" namespace = attr.ib(type=str, default=SWHID_NAMESPACE) """the namespace of the identifier, defaults to ``swh``""" @@ -771,9 +768,8 @@ scheme_version = attr.ib(type=int, default=SWHID_VERSION) """the scheme version of the identifier, defaults to 1""" - object_type = attr.ib( - type=ObjectType, validator=type_validator(), converter=ObjectType - ) + # overridden by subclasses + object_type: _TObjectType """the type of object the identifier points to""" object_id = attr.ib(type=bytes, validator=type_validator()) @@ -813,16 +809,52 @@ ) @classmethod - def from_string(cls, s: str) -> CoreSWHID: + def from_string(cls: Type[_TSWHID], s: str) -> _TSWHID: parts = _parse_swhid(s) if parts.pop("qualifiers"): raise ValidationError("CoreSWHID does not support qualifiers.") try: - return CoreSWHID(**parts) + return cls(**parts) except ValueError as e: raise ValidationError(*e.args) from None +@attr.s(frozen=True, kw_only=True) +class CoreSWHID(_BaseSWHID[ObjectType]): + """ + Dataclass holding the relevant info associated to a SoftWare Heritage + persistent IDentifier (SWHID). + + Unlike `QualifiedSWHID`, it is restricted to core SWHIDs, ie. SWHIDs + with no qualifiers. + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + To get the raw SWHID string from an instance of this class, + use the :func:`str` function: + + >>> swhid = CoreSWHID( + ... object_type=ObjectType.CONTENT, + ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), + ... ) + >>> str(swhid) + 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' + + And vice-versa with :meth:`CoreSWHID.from_string`: + + >>> swhid == CoreSWHID.from_string( + ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0" + ... ) + True + """ + + object_type = attr.ib( + type=ObjectType, validator=type_validator(), converter=ObjectType + ) + """the type of object the identifier points to""" + + def _parse_core_swhid(swhid: Union[str, CoreSWHID, None]) -> Optional[CoreSWHID]: """Alias of CoreSWHID.from_string to make mypy happy...... @@ -845,7 +877,7 @@ @attr.s(frozen=True, kw_only=True) -class QualifiedSWHID: +class QualifiedSWHID(_BaseSWHID[ObjectType]): """ Dataclass holding the relevant info associated to a SoftWare Heritage persistent IDentifier (SWHID) @@ -872,20 +904,11 @@ True """ - namespace = attr.ib(type=str, default=SWHID_NAMESPACE) - """the namespace of the identifier, defaults to ``swh``""" - - scheme_version = attr.ib(type=int, default=SWHID_VERSION) - """the scheme version of the identifier, defaults to 1""" - object_type = attr.ib( type=ObjectType, validator=type_validator(), converter=ObjectType ) """the type of object the identifier points to""" - object_id = attr.ib(type=bytes, validator=type_validator()) - """object's identifier""" - # qualifiers: origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) @@ -921,29 +944,6 @@ ) """lines: line number(s) of interest, usually within a content object""" - @namespace.validator - def check_namespace(self, attribute, value): - if value != SWHID_NAMESPACE: - raise ValidationError( - "Invalid SWHID: invalid namespace: %(namespace)s", - params={"namespace": value}, - ) - - @scheme_version.validator - def check_scheme_version(self, attribute, value): - if value != SWHID_VERSION: - raise ValidationError( - "Invalid SWHID: invalid version: %(version)s", params={"version": value} - ) - - @object_id.validator - def check_object_id(self, attribute, value): - if len(value) != 20: - raise ValidationError( - "Invalid SWHID: invalid checksum: %(object_id)s", - params={"object_id": hash_to_hex(value)}, - ) - @visit.validator def check_visit(self, attribute, value): if value and value.object_type != ObjectType.SNAPSHOT: @@ -1006,7 +1006,7 @@ @attr.s(frozen=True, kw_only=True) -class ExtendedSWHID: +class ExtendedSWHID(_BaseSWHID[ExtendedObjectType]): """ Dataclass holding the relevant info associated to a SoftWare Heritage persistent IDentifier (SWHID). @@ -1035,12 +1035,6 @@ True """ - namespace = attr.ib(type=str, default=SWHID_NAMESPACE) - """the namespace of the identifier, defaults to ``swh``""" - - scheme_version = attr.ib(type=int, default=SWHID_VERSION) - """the scheme version of the identifier, defaults to 1""" - object_type = attr.ib( type=ExtendedObjectType, validator=type_validator(), @@ -1048,32 +1042,6 @@ ) """the type of object the identifier points to""" - object_id = attr.ib(type=bytes, validator=type_validator()) - """object's identifier""" - - @namespace.validator - def check_namespace(self, attribute, value): - if value != SWHID_NAMESPACE: - raise ValidationError( - "Invalid SWHID: invalid namespace: %(namespace)s", - params={"namespace": value}, - ) - - @scheme_version.validator - def check_scheme_version(self, attribute, value): - if value != SWHID_VERSION: - raise ValidationError( - "Invalid SWHID: invalid version: %(version)s", params={"version": value} - ) - - @object_id.validator - def check_object_id(self, attribute, value): - if len(value) != 20: - raise ValidationError( - "Invalid SWHID: invalid checksum: %(object_id)s", - params={"object_id": hash_to_hex(value)}, - ) - def __str__(self) -> str: return SWHID_SEP.join( [