Changeset View
Changeset View
Standalone View
Standalone View
swh/model/swhid.py
- This file was added.
# Copyright (C) 2015-2020 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from enum import Enum | |||||
import re | |||||
from typing import Any, Dict, Union | |||||
import attr | |||||
from attrs_strict import type_validator | |||||
from .collections import ImmutableDict | |||||
from .exceptions import ValidationError | |||||
from .fields.hashes import validate_sha1 | |||||
from .hashutil import hash_to_hex | |||||
ORIGIN = "origin" | |||||
SNAPSHOT = "snapshot" | |||||
REVISION = "revision" | |||||
RELEASE = "release" | |||||
DIRECTORY = "directory" | |||||
CONTENT = "content" | |||||
SWHID_NAMESPACE = "swh" | |||||
SWHID_VERSION = 1 | |||||
SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] | |||||
SWHID_SEP = ":" | |||||
SWHID_CTXT_SEP = ";" | |||||
SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"} | |||||
SWHID_RE_RAW = ( | |||||
f"(?P<scheme>{SWHID_NAMESPACE})" | |||||
f"{SWHID_SEP}(?P<version>{SWHID_VERSION})" | |||||
f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})" | |||||
f"{SWHID_SEP}(?P<object_id>[0-9a-f]+)" | |||||
f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?" | |||||
) | |||||
SWHID_RE = re.compile(SWHID_RE_RAW) | |||||
class SWHIDObjectType(Enum): | |||||
"""The type of object extrinsic metadata refer to.""" | |||||
CONTENT = "cnt" | |||||
DIRECTORY = "dir" | |||||
REVISION = "rev" | |||||
RELEASE = "rel" | |||||
SNAPSHOT = "snp" | |||||
ORIGIN = "ori" | |||||
_object_type_id_map = { | |||||
SWHIDObjectType.ORIGIN: "id", | |||||
SWHIDObjectType.SNAPSHOT: "id", | |||||
SWHIDObjectType.RELEASE: "id", | |||||
SWHIDObjectType.REVISION: "id", | |||||
SWHIDObjectType.DIRECTORY: "id", | |||||
SWHIDObjectType.CONTENT: "sha1_git", | |||||
} | |||||
_swhid_type_map = { | |||||
"ori": SWHIDObjectType.ORIGIN, | |||||
"snp": SWHIDObjectType.SNAPSHOT, | |||||
"rel": SWHIDObjectType.RELEASE, | |||||
"rev": SWHIDObjectType.REVISION, | |||||
"dir": SWHIDObjectType.DIRECTORY, | |||||
"cnt": SWHIDObjectType.CONTENT, | |||||
"origin": SWHIDObjectType.ORIGIN, | |||||
"snapshot": SWHIDObjectType.SNAPSHOT, | |||||
"release": SWHIDObjectType.RELEASE, | |||||
"revision": SWHIDObjectType.REVISION, | |||||
"directory": SWHIDObjectType.DIRECTORY, | |||||
"content": SWHIDObjectType.CONTENT, | |||||
} | |||||
@attr.s(frozen=True) | |||||
class SWHID: | |||||
""" | |||||
Named tuple holding the relevant info associated to a SoftWare Heritage | |||||
persistent IDentifier (SWHID) | |||||
Args: | |||||
namespace (str): the namespace of the identifier, defaults to ``swh`` | |||||
scheme_version (int): the scheme version of the identifier, | |||||
defaults to 1 | |||||
object_type (str): the type of object the identifier points to, | |||||
either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` | |||||
object_id (str): object's identifier | |||||
metadata (dict): optional dict filled with metadata related to | |||||
pointed object | |||||
Raises: | |||||
swh.model.exceptions.ValidationError: In case of invalid object type or id | |||||
Once created, it contains the following attributes: | |||||
Attributes: | |||||
namespace (str): the namespace of the identifier | |||||
scheme_version (int): the scheme version of the identifier | |||||
object_type (str): the type of object the identifier points to | |||||
object_id (str): hexadecimal representation of the object hash | |||||
metadata (dict): metadata related to the pointed object | |||||
To get the raw SWHID string from an instance of this named tuple, | |||||
use the :func:`str` function:: | |||||
swhid = SWHID( | |||||
object_type='content', | |||||
object_id='8ff44f081d43176474b267de5451f2c2e88089d0' | |||||
) | |||||
swhid_str = str(swhid) | |||||
# 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' | |||||
""" | |||||
object_type = attr.ib(type=SWHIDObjectType, validator=type_validator()) | |||||
object_id = attr.ib(type=str, converter=hash_to_hex) # type: ignore | |||||
namespace = attr.ib(type=str, default=SWHID_NAMESPACE) | |||||
scheme_version = attr.ib(type=int, default=SWHID_VERSION) | |||||
metadata = attr.ib( | |||||
type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() | |||||
) | |||||
@namespace.validator | |||||
def check_namespace(self, attribute, value): | |||||
if value != SWHID_NAMESPACE: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid namespace: %(namespace)s", | |||||
params={"namespace": value}, | |||||
) | |||||
@scheme_version.validator | |||||
def check_scheme_version(self, attribute, value): | |||||
if value != SWHID_VERSION: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid version: %(version)s", params={"version": value} | |||||
) | |||||
@object_id.validator | |||||
def check_object_id(self, attribute, value): | |||||
if self.object_type == SWHIDObjectType.ORIGIN: | |||||
return | |||||
try: | |||||
validate_sha1(value) # can raise if invalid hash | |||||
except ValidationError: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid checksum: %(object_id)s", | |||||
params={"object_id": value}, | |||||
) from None | |||||
@metadata.validator | |||||
def check_qualifiers(self, attribute, value): | |||||
for k in value: | |||||
if k not in SWHID_QUALIFIERS: | |||||
raise ValidationError( | |||||
"Invalid SWHID: unknown qualifier: %(qualifier)s", | |||||
params={"qualifier": k}, | |||||
) | |||||
def to_dict(self) -> Dict[str, Any]: | |||||
return attr.asdict(self) | |||||
def __str__(self) -> str: | |||||
swhid = SWHID_SEP.join( | |||||
[ | |||||
self.namespace, | |||||
str(self.scheme_version), | |||||
self.object_type.value, | |||||
self.object_id, | |||||
] | |||||
) | |||||
if self.metadata: | |||||
for k, v in self.metadata.items(): | |||||
swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) | |||||
return swhid | |||||
def swhid( | |||||
object_type: Union[SWHIDObjectType, str], | |||||
object_id: Union[str, Dict[str, Any]], | |||||
scheme_version: int = 1, | |||||
metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), | |||||
) -> str: | |||||
"""Compute :ref:`persistent-identifiers` | |||||
Args: | |||||
object_type: object's type (a SWHIDObjectType), | |||||
object_id: object's identifier | |||||
scheme_version: SWHID scheme version, defaults to 1 | |||||
metadata: metadata related to the pointed object | |||||
Raises: | |||||
swh.model.exceptions.ValidationError: In case of invalid object type or id | |||||
Returns: | |||||
the SWHID of the object | |||||
""" | |||||
if isinstance(object_type, str): | |||||
object_type = _swhid_type_map[object_type] | |||||
if isinstance(object_id, dict): | |||||
object_id = object_id[_object_type_id_map[object_type]] | |||||
swhid = SWHID( | |||||
scheme_version=scheme_version, | |||||
object_type=object_type, | |||||
object_id=object_id, | |||||
metadata=metadata, # type: ignore # mypy can't properly unify types | |||||
) | |||||
return str(swhid) | |||||
def parse_swhid(swhid: str) -> SWHID: | |||||
"""Parse a Software Heritage identifier (SWHID) from string (see: | |||||
:ref:`persistent-identifiers`.) | |||||
Args: | |||||
swhid (str): A persistent identifier | |||||
Returns: | |||||
a named tuple holding the parsing result | |||||
Raises: | |||||
swh.model.exceptions.ValidationError: if passed string is not a valid SWHID | |||||
""" | |||||
m = SWHID_RE.fullmatch(swhid) | |||||
if not m: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid} | |||||
) | |||||
parts = m.groupdict() | |||||
_qualifiers = {} | |||||
qualifiers_raw = parts["qualifiers"] | |||||
if qualifiers_raw: | |||||
for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP): | |||||
try: | |||||
k, v = qualifier.split("=") | |||||
except ValueError: | |||||
raise ValidationError( | |||||
"Invalid SWHID: invalid qualifier: %(qualifier)s", | |||||
params={"qualifier": qualifier}, | |||||
) | |||||
_qualifiers[k] = v | |||||
return SWHID( | |||||
namespace=parts["scheme"], | |||||
scheme_version=int(parts["version"]), | |||||
object_type=_swhid_type_map[parts["object_type"]], | |||||
object_id=parts["object_id"], | |||||
metadata=_qualifiers, # type: ignore # mypy can't properly unify types | |||||
) |