Changeset View
Changeset View
Standalone View
Standalone View
swh/model/identifiers.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import binascii | import binascii | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
from functools import lru_cache | from functools import lru_cache | ||||
from typing import Any, Dict, NamedTuple, Union | from typing import Any, Dict, Union | ||||
import attr | |||||
from deprecated import deprecated | from deprecated import deprecated | ||||
from .collections import ImmutableDict | from .collections import ImmutableDict | ||||
from .exceptions import ValidationError | from .exceptions import ValidationError | ||||
from .fields.hashes import validate_sha1 | from .fields.hashes import validate_sha1 | ||||
from .hashutil import hash_git_data, hash_to_hex, MultiHash | from .hashutil import hash_git_data, hash_to_hex, MultiHash | ||||
▲ Show 20 Lines • Show All 624 Lines • ▼ Show 20 Lines | _object_type_map = { | ||||
SNAPSHOT: {"short_name": "snp", "key_id": "id"}, | SNAPSHOT: {"short_name": "snp", "key_id": "id"}, | ||||
RELEASE: {"short_name": "rel", "key_id": "id"}, | RELEASE: {"short_name": "rel", "key_id": "id"}, | ||||
REVISION: {"short_name": "rev", "key_id": "id"}, | REVISION: {"short_name": "rev", "key_id": "id"}, | ||||
DIRECTORY: {"short_name": "dir", "key_id": "id"}, | DIRECTORY: {"short_name": "dir", "key_id": "id"}, | ||||
CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, | CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, | ||||
} | } | ||||
_SWHID = NamedTuple( | @attr.s(frozen=True) | ||||
"SWHID", | class SWHID: | ||||
[ | |||||
("namespace", str), | |||||
("scheme_version", int), | |||||
("object_type", str), | |||||
("object_id", str), | |||||
("metadata", ImmutableDict[str, Any]), | |||||
], | |||||
) | |||||
class SWHID(_SWHID): | |||||
""" | """ | ||||
Named tuple holding the relevant info associated to a SoftWare Heritage | Named tuple holding the relevant info associated to a SoftWare Heritage | ||||
persistent IDentifier (SWHID) | persistent IDentifier (SWHID) | ||||
Args: | Args: | ||||
namespace (str): the namespace of the identifier, defaults to ``swh`` | namespace (str): the namespace of the identifier, defaults to ``swh`` | ||||
scheme_version (int): the scheme version of the identifier, | scheme_version (int): the scheme version of the identifier, | ||||
defaults to 1 | defaults to 1 | ||||
Show All 21 Lines | use the :func:`str` function:: | ||||
swhid = SWHID( | swhid = SWHID( | ||||
object_type='content', | object_type='content', | ||||
object_id='8ff44f081d43176474b267de5451f2c2e88089d0' | object_id='8ff44f081d43176474b267de5451f2c2e88089d0' | ||||
) | ) | ||||
swhid_str = str(swhid) | swhid_str = str(swhid) | ||||
# 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' | # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' | ||||
""" | """ | ||||
__slots__ = () | namespace = attr.ib(type=str, default="swh") | ||||
scheme_version = attr.ib(type=int, default=1) | |||||
def __new__( | object_type = attr.ib(type=str, default="") | ||||
cls, | object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore | ||||
namespace: str = SWHID_NAMESPACE, | metadata = attr.ib( | ||||
scheme_version: int = SWHID_VERSION, | type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | ||||
object_type: str = "", | |||||
object_id: str = "", | |||||
metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), | |||||
): | |||||
o = _object_type_map.get(object_type) | |||||
if not o: | |||||
raise ValidationError( | |||||
"Wrong input: Supported types are %s" % (list(_object_type_map.keys())) | |||||
) | ) | ||||
if namespace != SWHID_NAMESPACE: | |||||
@namespace.validator | |||||
def check_namespace(self, attribute, value): | |||||
if value != SWHID_NAMESPACE: | |||||
raise ValidationError( | raise ValidationError( | ||||
"Wrong format: only supported namespace is '%s'" % SWHID_NAMESPACE | "Wrong format: only supported namespace is '%s'" % SWHID_NAMESPACE | ||||
) | ) | ||||
if scheme_version != SWHID_VERSION: | |||||
@scheme_version.validator | |||||
def check_scheme_version(self, attribute, value): | |||||
if value != SWHID_VERSION: | |||||
raise ValidationError( | raise ValidationError( | ||||
"Wrong format: only supported version is %d" % SWHID_VERSION | "Wrong format: only supported version is %d" % SWHID_VERSION | ||||
) | ) | ||||
# internal swh representation resolution | @object_type.validator | ||||
if isinstance(object_id, dict): | def check_object_type(self, attribute, value): | ||||
object_id = object_id[o["key_id"]] | if value not in _object_type_map: | ||||
raise ValidationError( | |||||
validate_sha1(object_id) # can raise if invalid hash | "Wrong input: Supported types are %s" % (list(_object_type_map.keys())) | ||||
object_id = hash_to_hex(object_id) | |||||
return super().__new__( | |||||
cls, | |||||
namespace, | |||||
scheme_version, | |||||
object_type, | |||||
object_id, | |||||
ImmutableDict(metadata), | |||||
) | ) | ||||
@object_id.validator | |||||
def check_object_id(self, attribute, value): | |||||
validate_sha1(value) # can raise if invalid hash | |||||
def __str__(self) -> str: | def __str__(self) -> str: | ||||
o = _object_type_map.get(self.object_type) | o = _object_type_map.get(self.object_type) | ||||
assert o | assert o | ||||
swhid = SWHID_SEP.join( | swhid = SWHID_SEP.join( | ||||
[self.namespace, str(self.scheme_version), o["short_name"], self.object_id] | [self.namespace, str(self.scheme_version), o["short_name"], self.object_id] | ||||
) | ) | ||||
if self.metadata: | if self.metadata: | ||||
for k, v in self.metadata.items(): | for k, v in self.metadata.items(): | ||||
swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) | swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) | ||||
return swhid | return swhid | ||||
@deprecated("Use swh.model.identifiers.SWHID instead") | @deprecated("Use swh.model.identifiers.SWHID instead") | ||||
class PersistentId(SWHID): | class PersistentId(SWHID): | ||||
""" | """ | ||||
Named tuple holding the relevant info associated to a SoftWare Heritage | Named tuple holding the relevant info associated to a SoftWare Heritage | ||||
persistent IDentifier. | persistent IDentifier. | ||||
.. deprecated:: 0.3.8 | .. deprecated:: 0.3.8 | ||||
Use :class:`swh.model.identifiers.SWHID` instead | Use :class:`swh.model.identifiers.SWHID` instead | ||||
""" | """ | ||||
def __new__(cls, *args, **kwargs): | pass | ||||
return super(cls, PersistentId).__new__(cls, *args, **kwargs) | |||||
def swhid( | def swhid( | ||||
object_type: str, | object_type: str, | ||||
object_id: str, | object_id: Union[str, Dict[str, Any]], | ||||
douardda: how is this (accept dict) related to the namedtuple->attr refactoring? | |||||
Done Inline Actions07:53 <vlorentz> douardda: the reason is that we were already passing dicts vlorentz: 07:53 <vlorentz> douardda: the reason is that we were already passing dicts
07:54 <+douardda>… | |||||
scheme_version: int = 1, | scheme_version: int = 1, | ||||
metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), | metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), | ||||
) -> str: | ) -> str: | ||||
"""Compute :ref:`persistent-identifiers` | """Compute :ref:`persistent-identifiers` | ||||
Args: | Args: | ||||
object_type: object's type, either ``content``, ``directory``, | object_type: object's type, either ``content``, ``directory``, | ||||
``release``, ``revision`` or ``snapshot`` | ``release``, ``revision`` or ``snapshot`` | ||||
object_id: object's identifier | object_id: object's identifier | ||||
scheme_version: SWHID scheme version, defaults to 1 | scheme_version: SWHID scheme version, defaults to 1 | ||||
metadata: metadata related to the pointed object | metadata: metadata related to the pointed object | ||||
Raises: | Raises: | ||||
swh.model.exceptions.ValidationError: In case of invalid object type or id | swh.model.exceptions.ValidationError: In case of invalid object type or id | ||||
Returns: | Returns: | ||||
the SWHID of the object | the SWHID of the object | ||||
""" | """ | ||||
if isinstance(object_id, dict): | |||||
o = _object_type_map[object_type] | |||||
object_id = object_id[o["key_id"]] | |||||
swhid = SWHID( | swhid = SWHID( | ||||
scheme_version=scheme_version, | scheme_version=scheme_version, | ||||
object_type=object_type, | object_type=object_type, | ||||
object_id=object_id, | object_id=object_id, | ||||
metadata=metadata, | metadata=metadata, # type: ignore # mypy can't properly unify types | ||||
) | ) | ||||
return str(swhid) | return str(swhid) | ||||
@deprecated("Use swh.model.identifiers.swhid instead") | @deprecated("Use swh.model.identifiers.swhid instead") | ||||
def persistent_identifier(*args, **kwargs) -> str: | def persistent_identifier(*args, **kwargs) -> str: | ||||
"""Compute :ref:`persistent-identifiers` | """Compute :ref:`persistent-identifiers` | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | def parse_swhid(swhid: str) -> SWHID: | ||||
_metadata = {} | _metadata = {} | ||||
for part in swhid_parts: | for part in swhid_parts: | ||||
try: | try: | ||||
key, val = part.split("=") | key, val = part.split("=") | ||||
_metadata[key] = val | _metadata[key] = val | ||||
except Exception: | except Exception: | ||||
msg = "Contextual data is badly formatted, form key=val expected" | msg = "Contextual data is badly formatted, form key=val expected" | ||||
raise ValidationError(msg) | raise ValidationError(msg) | ||||
return SWHID(_ns, int(_version), _type, _id, _metadata) | return SWHID( | ||||
_ns, | |||||
int(_version), | |||||
_type, | |||||
_id, | |||||
_metadata, # type: ignore # mypy can't properly unify types | |||||
) | |||||
@deprecated("Use swh.model.identifiers.parse_swhid instead") | @deprecated("Use swh.model.identifiers.parse_swhid instead") | ||||
def parse_persistent_identifier(persistent_id: str) -> PersistentId: | def parse_persistent_identifier(persistent_id: str) -> PersistentId: | ||||
"""Parse :ref:`persistent-identifiers`. | """Parse :ref:`persistent-identifiers`. | ||||
.. deprecated:: 0.3.8 | .. deprecated:: 0.3.8 | ||||
Use :func:`swh.model.identifiers.parse_swhid` instead | Use :func:`swh.model.identifiers.parse_swhid` instead | ||||
""" | """ | ||||
return PersistentId(**parse_swhid(persistent_id)._asdict()) | return PersistentId(**attr.asdict(parse_swhid(persistent_id))) |
how is this (accept dict) related to the namedtuple->attr refactoring?