Changeset View
Standalone View
swh/model/identifiers.py
| # Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2021 The Software Heritage developers | ||||
| # See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
| # License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
| # See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
| from __future__ import annotations | |||||
| import binascii | import binascii | ||||
| import datetime | import datetime | ||||
| import enum | |||||
| from functools import lru_cache | from functools import lru_cache | ||||
| import hashlib | import hashlib | ||||
| import re | import re | ||||
| from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
| import warnings | |||||
| import attr | import attr | ||||
| from attrs_strict import type_validator | |||||
| from .collections import ImmutableDict | from .collections import ImmutableDict | ||||
| from .exceptions import ValidationError | from .exceptions import ValidationError | ||||
| from .fields.hashes import validate_sha1 | from .fields.hashes import validate_sha1 | ||||
| from .hashutil import MultiHash, hash_git_data, hash_to_hex | from .hashutil import MultiHash, hash_git_data, hash_to_bytes, hash_to_hex | ||||
zack: Thanks gawd yes.
- minor (1): can you add a docstring for this enum, even a oneliner would do… | |||||
| class ObjectType(enum.Enum): | |||||
| """Possible object types of a QualifiedSWHID. | |||||
| The values of each variant is what is used in the SWHID's string representation.""" | |||||
| ORIGIN = "ori" | |||||
| SNAPSHOT = "snp" | |||||
| REVISION = "rev" | |||||
| RELEASE = "rel" | |||||
| DIRECTORY = "dir" | |||||
| CONTENT = "cnt" | |||||
| # The following are deprecated aliases of the variants defined in ObjectType | |||||
| # while transitioning from SWHID to QualifiedSWHID | |||||
| ORIGIN = "origin" | ORIGIN = "origin" | ||||
| SNAPSHOT = "snapshot" | SNAPSHOT = "snapshot" | ||||
| REVISION = "revision" | REVISION = "revision" | ||||
| RELEASE = "release" | RELEASE = "release" | ||||
| DIRECTORY = "directory" | DIRECTORY = "directory" | ||||
| CONTENT = "content" | CONTENT = "content" | ||||
| SWHID_NAMESPACE = "swh" | SWHID_NAMESPACE = "swh" | ||||
| ▲ Show 20 Lines • Show All 664 Lines • ▼ Show 20 Lines | _swhid_type_map = { | ||||
| "snp": SNAPSHOT, | "snp": SNAPSHOT, | ||||
| "rel": RELEASE, | "rel": RELEASE, | ||||
| "rev": REVISION, | "rev": REVISION, | ||||
| "dir": DIRECTORY, | "dir": DIRECTORY, | ||||
| "cnt": CONTENT, | "cnt": CONTENT, | ||||
| } | } | ||||
| @attr.s(frozen=True, kw_only=True) | |||||
| class QualifiedSWHID: | |||||
| """ | |||||
| Dataclass holding the relevant info associated to a SoftWare Heritage | |||||
| persistent IDentifier (SWHID) | |||||
| Raises: | |||||
| swh.model.exceptions.ValidationError: In case of invalid object type or id | |||||
| To get the raw SWHID string from an instance of this class, | |||||
| use the :func:`str` function: | |||||
| >>> swhid = QualifiedSWHID( | |||||
| ... object_type=ObjectType.CONTENT, | |||||
| ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'), | |||||
| ... qualifiers={"lines": "5-10"}, | |||||
| ... ) | |||||
| >>> str(swhid) | |||||
| 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10' | |||||
| And vice-versa with :meth:`QualifiedSWHID.from_string`: | |||||
| >>> swhid == QualifiedSWHID.from_string( | |||||
| ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0;lines=5-10" | |||||
| ... ) | |||||
| True | |||||
| """ | |||||
| namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | |||||
| """the namespace of the identifier, defaults to ``swh``""" | |||||
Done Inline Actionscan you separate attributes with empty lines here? zack: can you separate attributes with empty lines here?
without them it is hard to visually… | |||||
| scheme_version = attr.ib(type=int, default=SWHID_VERSION) | |||||
| """the scheme version of the identifier, defaults to 1""" | |||||
Not Done Inline ActionsI've a grudge with a series of separate validators here, considering most of the syntax (except qualifiers) can be easily validated with a regex, which has also been shown in the past to be faster (cf.: 574685052348bfc6ed28570f06b9cc4302dfde27). Can we use a single regex and validator instead (for core unqualified SWHIDs)? zack: I've a grudge with a series of separate validators here, considering most of the syntax (except… | |||||
Done Inline ActionsNo, because using a single regex is only possible when parsing a string. Validators are also used when building the object from parts. vlorentz: No, because using a single regex is only possible when parsing a string. Validators are also… | |||||
| object_type = attr.ib(type=ObjectType, validator=type_validator()) | |||||
| """the type of object the identifier points to""" | |||||
| object_id = attr.ib(type=bytes, validator=type_validator()) | |||||
| """object's identifier""" | |||||
| qualifiers = attr.ib( | |||||
| type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | |||||
| ) | |||||
| """optional dict filled with metadata related to pointed object""" | |||||
| @namespace.validator | |||||
| def check_namespace(self, attribute, value): | |||||
| if value != SWHID_NAMESPACE: | |||||
| raise ValidationError( | |||||
| "Invalid SWHID: invalid namespace: %(namespace)s", | |||||
| params={"namespace": value}, | |||||
Done Inline Actionsleftover print? zack: leftover print? | |||||
| ) | |||||
| @scheme_version.validator | |||||
| def check_scheme_version(self, attribute, value): | |||||
| if value != SWHID_VERSION: | |||||
| raise ValidationError( | |||||
| "Invalid SWHID: invalid version: %(version)s", params={"version": value} | |||||
| ) | |||||
| @object_id.validator | |||||
| def check_object_id(self, attribute, value): | |||||
| if len(value) != 20: | |||||
| raise ValidationError( | |||||
Not Done Inline ActionsMissing scenarios it seems. ardumont: Missing scenarios it seems. | |||||
| "Invalid SWHID: invalid checksum: %(object_id)s", | |||||
| params={"object_id": hash_to_hex(value)}, | |||||
| ) | |||||
| @qualifiers.validator | |||||
| def check_qualifiers(self, attribute, value): | |||||
| for k in value: | |||||
| if k not in SWHID_QUALIFIERS: | |||||
| raise ValidationError( | |||||
| "Invalid SWHID: unknown qualifier: %(qualifier)s", | |||||
| params={"qualifier": k}, | |||||
| ) | |||||
| def __str__(self) -> str: | |||||
| swhid = SWHID_SEP.join( | |||||
| [ | |||||
Not Done Inline ActionsThis is quadratic on the number of qualifiers. zack: This is quadratic on the number of qualifiers.
Can we use something-str.join-something instead? | |||||
Done Inline ActionsI copy-pasted from the existing code, so it's out of scope for this diff. vlorentz: I copy-pasted from the existing code, so it's out of scope for this diff. | |||||
| self.namespace, | |||||
| str(self.scheme_version), | |||||
| self.object_type.value, | |||||
| hash_to_hex(self.object_id), | |||||
| ] | |||||
| ) | |||||
| if self.qualifiers: | |||||
| for k, v in self.qualifiers.items(): | |||||
| swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) | |||||
Not Done Inline Actionsfrom where does _object_type_map come from? (and missing test) ardumont: from where does `_object_type_map` come from?
(and missing test) | |||||
Done Inline Actionsit's one of the many constants at the toplevel, I'll clean it up when removing SWHID vlorentz: it's one of the many constants at the toplevel, I'll clean it up when removing SWHID | |||||
| return swhid | |||||
| @classmethod | |||||
| def from_string(cls, s: str) -> QualifiedSWHID: | |||||
| with warnings.catch_warnings(): | |||||
| warnings.simplefilter("ignore") | |||||
| old_swhid = parse_swhid(s) | |||||
| object_type = ObjectType(_object_type_map[old_swhid.object_type]["short_name"]) | |||||
| return QualifiedSWHID( | |||||
| namespace=old_swhid.namespace, | |||||
| scheme_version=old_swhid.scheme_version, | |||||
| object_type=object_type, | |||||
| object_id=hash_to_bytes(old_swhid.object_id), | |||||
| qualifiers=old_swhid.metadata, | |||||
| ) | |||||
| @attr.s(frozen=True) | @attr.s(frozen=True) | ||||
| class SWHID: | class SWHID: | ||||
| """ | """ | ||||
| Named tuple holding the relevant info associated to a SoftWare Heritage | Deprecated alternative to QualifiedSWHID. | ||||
Done Inline Actionsshould we log a deprecation message here? or is it too soon to do that? zack: should we log a deprecation message here? or is it too soon to do that? | |||||
Done Inline Actionsoops, it got lost before I committed. vlorentz: oops, it got lost before I committed. | |||||
| persistent IDentifier (SWHID) | |||||
| Args: | Args: | ||||
| namespace (str): the namespace of the identifier, defaults to ``swh`` | namespace (str): the namespace of the identifier, defaults to ``swh`` | ||||
| scheme_version (int): the scheme version of the identifier, | scheme_version (int): the scheme version of the identifier, | ||||
| defaults to 1 | defaults to 1 | ||||
| object_type (str): the type of object the identifier points to, | object_type (str): the type of object the identifier points to, | ||||
| either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` | either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` | ||||
| object_id (str): object's identifier | object_id (str): object's identifier | ||||
| Show All 26 Lines | class SWHID: | ||||
| namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | ||||
| scheme_version = attr.ib(type=int, default=SWHID_VERSION) | scheme_version = attr.ib(type=int, default=SWHID_VERSION) | ||||
| object_type = attr.ib(type=str, default="") | object_type = attr.ib(type=str, default="") | ||||
| object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore | object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore | ||||
| metadata = attr.ib( | metadata = attr.ib( | ||||
| type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | ||||
| ) | ) | ||||
| def __attrs_post_init__(self): | |||||
| warnings.warn( | |||||
| "swh.model.identifiers.SWHID is deprecated; " | |||||
| "use swh.model.identifiers.QualifiedSWHID instead.", | |||||
| DeprecationWarning, | |||||
| ) | |||||
| @namespace.validator | @namespace.validator | ||||
| def check_namespace(self, attribute, value): | def check_namespace(self, attribute, value): | ||||
| if value != SWHID_NAMESPACE: | if value != SWHID_NAMESPACE: | ||||
| raise ValidationError( | raise ValidationError( | ||||
| "Invalid SWHID: invalid namespace: %(namespace)s", | "Invalid SWHID: invalid namespace: %(namespace)s", | ||||
| params={"namespace": value}, | params={"namespace": value}, | ||||
| ) | ) | ||||
| ▲ Show 20 Lines • Show All 124 Lines • Show Last 20 Lines | |||||
Thanks gawd yes.