Changeset View
Standalone View
swh/model/identifiers.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from __future__ import annotations | |||||
import binascii | import binascii | ||||
import datetime | import datetime | ||||
import enum | |||||
from functools import lru_cache | from functools import lru_cache | ||||
import hashlib | import hashlib | ||||
import re | import re | ||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
import warnings | |||||
import attr | import attr | ||||
from attrs_strict import type_validator | |||||
from .collections import ImmutableDict | from .collections import ImmutableDict | ||||
from .exceptions import ValidationError | from .exceptions import ValidationError | ||||
from .fields.hashes import validate_sha1 | from .fields.hashes import validate_sha1 | ||||
from .hashutil import MultiHash, hash_git_data, hash_to_hex | from .hashutil import MultiHash, hash_git_data, hash_to_bytes, hash_to_hex | ||||
zack: Thanks gawd yes.
- minor (1): can you add a docstring for this enum, even a oneliner would do… | |||||
class ObjectType(enum.Enum): | |||||
"""Possible object types of a QualifiedSWHID. | |||||
The values of each variant is what is used in the SWHID's string representation.""" | |||||
ORIGIN = "ori" | |||||
SNAPSHOT = "snp" | |||||
REVISION = "rev" | |||||
RELEASE = "rel" | |||||
DIRECTORY = "dir" | |||||
CONTENT = "cnt" | |||||
# The following are deprecated aliases of the variants defined in ObjectType | |||||
# while transitioning from SWHID to QualifiedSWHID | |||||
ORIGIN = "origin" | ORIGIN = "origin" | ||||
SNAPSHOT = "snapshot" | SNAPSHOT = "snapshot" | ||||
REVISION = "revision" | REVISION = "revision" | ||||
RELEASE = "release" | RELEASE = "release" | ||||
DIRECTORY = "directory" | DIRECTORY = "directory" | ||||
CONTENT = "content" | CONTENT = "content" | ||||
SWHID_NAMESPACE = "swh" | SWHID_NAMESPACE = "swh" | ||||
▲ Show 20 Lines • Show All 664 Lines • ▼ Show 20 Lines | _swhid_type_map = { | ||||
"snp": SNAPSHOT, | "snp": SNAPSHOT, | ||||
"rel": RELEASE, | "rel": RELEASE, | ||||
"rev": REVISION, | "rev": REVISION, | ||||
"dir": DIRECTORY, | "dir": DIRECTORY, | ||||
"cnt": CONTENT, | "cnt": CONTENT, | ||||
} | } | ||||
@attr.s(frozen=True, kw_only=True) | |||||
class QualifiedSWHID: | |||||
""" | |||||
Dataclass holding the relevant info associated to a SoftWare Heritage | |||||
persistent IDentifier (SWHID) | |||||
Raises: | |||||
swh.model.exceptions.ValidationError: In case of invalid object type or id | |||||
To get the raw SWHID string from an instance of this class, | |||||
use the :func:`str` function: | |||||
>>> swhid = QualifiedSWHID( | |||||
... object_type=ObjectType.CONTENT, | |||||
... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), | |||||
... qualifiers={"lines": "5-10"}, | |||||
... ) | |||||
>>> str(swhid) | |||||
'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10' | |||||
And vice-versa with :meth:`QualifiedSWHID.from_string`: | |||||
>>> swhid == QualifiedSWHID.from_string( | |||||
... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10" | |||||
... ) | |||||
True | |||||
""" | |||||
namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | |||||
"""the namespace of the identifier, defaults to ``swh``""" | |||||
Done Inline Actionscan you separate attributes with empty lines here? zack: can you separate attributes with empty lines here?
without them it is hard to visually… | |||||
scheme_version = attr.ib(type=int, default=SWHID_VERSION) | |||||
"""the scheme version of the identifier, defaults to 1""" | |||||
Not Done Inline ActionsI've a grudge with a series of separate validators here, considering most of the syntax (except qualifiers) can be easily validated with a regex, which has also been shown in the past to be faster (cf.: 574685052348bfc6ed28570f06b9cc4302dfde27). Can we use a single regex and validator instead (for core unqualified SWHIDs)? zack: I've a grudge with a series of separate validators here, considering most of the syntax (except… | |||||
Done Inline ActionsNo, because using a single regex is only possible when parsing a string. Validators are also used when building the object from parts. vlorentz: No, because using a single regex is only possible when parsing a string. Validators are also… | |||||
object_type = attr.ib(type=ObjectType, validator=type_validator()) | |||||
"""the type of object the identifier points to""" | |||||
object_id = attr.ib(type=bytes, validator=type_validator()) | |||||
"""object's identifier""" | |||||
qualifiers = attr.ib( | |||||
type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | |||||
) | |||||
"""optional dict filled with metadata related to pointed object""" | |||||
@namespace.validator | |||||
def check_namespace(self, attribute, value): | |||||
if value != SWHID_NAMESPACE: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid namespace: %(namespace)s", | |||||
params={"namespace": value}, | |||||
Done Inline Actionsleftover print? zack: leftover print? | |||||
) | |||||
@scheme_version.validator | |||||
def check_scheme_version(self, attribute, value): | |||||
if value != SWHID_VERSION: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid version: %(version)s", params={"version": value} | |||||
) | |||||
@object_id.validator | |||||
def check_object_id(self, attribute, value): | |||||
if len(value) != 20: | |||||
raise ValidationError( | |||||
Not Done Inline ActionsMissing scenarios it seems. ardumont: Missing scenarios it seems. | |||||
"Invalid SWHID: invalid checksum: %(object_id)s", | |||||
params={"object_id": hash_to_hex(value)}, | |||||
) | |||||
@qualifiers.validator | |||||
def check_qualifiers(self, attribute, value): | |||||
for k in value: | |||||
if k not in SWHID_QUALIFIERS: | |||||
raise ValidationError( | |||||
"Invalid SWHID: unknown qualifier: %(qualifier)s", | |||||
params={"qualifier": k}, | |||||
) | |||||
def __str__(self) -> str: | |||||
swhid = SWHID_SEP.join( | |||||
[ | |||||
Not Done Inline ActionsThis is quadratic on the number of qualifiers. zack: This is quadratic on the number of qualifiers.
Can we use something-str.join-something instead? | |||||
Done Inline ActionsI copy-pasted from the existing code, so it's out of scope for this diff. vlorentz: I copy-pasted from the existing code, so it's out of scope for this diff. | |||||
self.namespace, | |||||
str(self.scheme_version), | |||||
self.object_type.value, | |||||
hash_to_hex(self.object_id), | |||||
] | |||||
) | |||||
if self.qualifiers: | |||||
for k, v in self.qualifiers.items(): | |||||
swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) | |||||
Not Done Inline Actionsfrom where does _object_type_map come from? (and missing test) ardumont: from where does `_object_type_map` come from?
(and missing test) | |||||
Done Inline Actionsit's one of the many constants at the toplevel, I'll clean it up when removing SWHID vlorentz: it's one of the many constants at the toplevel, I'll clean it up when removing SWHID | |||||
return swhid | |||||
@classmethod | |||||
def from_string(cls, s: str) -> QualifiedSWHID: | |||||
with warnings.catch_warnings(): | |||||
warnings.simplefilter("ignore") | |||||
old_swhid = parse_swhid(s) | |||||
object_type = ObjectType(_object_type_map[old_swhid.object_type]["short_name"]) | |||||
return QualifiedSWHID( | |||||
namespace=old_swhid.namespace, | |||||
scheme_version=old_swhid.scheme_version, | |||||
object_type=object_type, | |||||
object_id=hash_to_bytes(old_swhid.object_id), | |||||
qualifiers=old_swhid.metadata, | |||||
) | |||||
@attr.s(frozen=True) | @attr.s(frozen=True) | ||||
class SWHID: | class SWHID: | ||||
""" | """ | ||||
Named tuple holding the relevant info associated to a SoftWare Heritage | Deprecated alternative to QualifiedSWHID. | ||||
Done Inline Actionsshould we log a deprecation message here? or is it too soon to do that? zack: should we log a deprecation message here? or is it too soon to do that? | |||||
Done Inline Actionsoops, it got lost before I committed. vlorentz: oops, it got lost before I committed. | |||||
persistent IDentifier (SWHID) | |||||
Args: | Args: | ||||
namespace (str): the namespace of the identifier, defaults to ``swh`` | namespace (str): the namespace of the identifier, defaults to ``swh`` | ||||
scheme_version (int): the scheme version of the identifier, | scheme_version (int): the scheme version of the identifier, | ||||
defaults to 1 | defaults to 1 | ||||
object_type (str): the type of object the identifier points to, | object_type (str): the type of object the identifier points to, | ||||
either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` | either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` | ||||
object_id (str): object's identifier | object_id (str): object's identifier | ||||
Show All 26 Lines | class SWHID: | ||||
namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | ||||
scheme_version = attr.ib(type=int, default=SWHID_VERSION) | scheme_version = attr.ib(type=int, default=SWHID_VERSION) | ||||
object_type = attr.ib(type=str, default="") | object_type = attr.ib(type=str, default="") | ||||
object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore | object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore | ||||
metadata = attr.ib( | metadata = attr.ib( | ||||
type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | ||||
) | ) | ||||
def __attrs_post_init__(self): | |||||
warnings.warn( | |||||
"swh.model.identifiers.SWHID is deprecated; " | |||||
"use swh.model.identifiers.QualifiedSWHID instead.", | |||||
DeprecationWarning, | |||||
) | |||||
@namespace.validator | @namespace.validator | ||||
def check_namespace(self, attribute, value): | def check_namespace(self, attribute, value): | ||||
if value != SWHID_NAMESPACE: | if value != SWHID_NAMESPACE: | ||||
raise ValidationError( | raise ValidationError( | ||||
"Invalid SWHID: invalid namespace: %(namespace)s", | "Invalid SWHID: invalid namespace: %(namespace)s", | ||||
params={"namespace": value}, | params={"namespace": value}, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines |
Thanks gawd yes.