diff --git a/swh/model/cli.py b/swh/model/cli.py --- a/swh/model/cli.py +++ b/swh/model/cli.py @@ -12,7 +12,7 @@ import click from swh.core.cli import swh as swh_cli_group -from swh.model.identifiers import SWHID +from swh.model.swhid import SWHID, SWHIDObjectType, swhid CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -44,18 +44,16 @@ def swhid_of_file(path): from swh.model.from_disk import Content - from swh.model.identifiers import CONTENT, swhid object = Content.from_file(path=path).get_data() - return swhid(CONTENT, object) + return swhid(SWHIDObjectType.CONTENT, object) def swhid_of_file_content(data): from swh.model.from_disk import Content - from swh.model.identifiers import CONTENT, swhid object = Content.from_bytes(mode=644, data=data).get_data() - return swhid(CONTENT, object) + return swhid(SWHIDObjectType.CONTENT, object) def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> str: @@ -64,7 +62,6 @@ accept_all_directories, ignore_directories_patterns, ) - from swh.model.identifiers import DIRECTORY, swhid dir_filter = ( ignore_directories_patterns(path, exclude_patterns) @@ -73,20 +70,21 @@ ) object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data() - return swhid(DIRECTORY, object) + return swhid(SWHIDObjectType.DIRECTORY, object) def swhid_of_origin(url): - from swh.model.identifiers import SWHID, origin_identifier + from swh.model.identifiers import origin_identifier - return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url}))) + return str(SWHID(object_type=SWHIDObjectType.ORIGIN, + object_id=origin_identifier({"url": url}))) def swhid_of_git_repo(path): import dulwich.repo from swh.model import hashutil - from swh.model.identifiers import SWHID, snapshot_identifier + from swh.model.identifiers import snapshot_identifier repo = dulwich.repo.Repo(path) @@ -109,7 +107,8 @@ snapshot = {"branches": branches} - return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot))) + return str(SWHID(object_type=SWHIDObjectType.SNAPSHOT, + object_id=snapshot_identifier(snapshot))) def identify_object(obj_type, follow_symlinks, exclude_patterns, obj): diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py --- a/swh/model/identifiers.py +++ b/swh/model/identifiers.py @@ -7,38 +7,12 @@ import datetime from functools import lru_cache import hashlib -import re -from typing import Any, Dict, Iterable, List, Optional, Tuple, Union - -import attr - -from .collections import ImmutableDict -from .exceptions import ValidationError -from .fields.hashes import validate_sha1 -from .hashutil import MultiHash, hash_git_data, hash_to_hex - -ORIGIN = "origin" -SNAPSHOT = "snapshot" -REVISION = "revision" -RELEASE = "release" -DIRECTORY = "directory" -CONTENT = "content" - -SWHID_NAMESPACE = "swh" -SWHID_VERSION = 1 -SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] -SWHID_SEP = ":" -SWHID_CTXT_SEP = ";" -SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"} - -SWHID_RE_RAW = ( - f"(?P{SWHID_NAMESPACE})" - f"{SWHID_SEP}(?P{SWHID_VERSION})" - f"{SWHID_SEP}(?P{'|'.join(SWHID_TYPES)})" - f"{SWHID_SEP}(?P[0-9a-f]{{40}})" - f"({SWHID_CTXT_SEP}(?P\\S+))?" -) -SWHID_RE = re.compile(SWHID_RE_RAW) +from typing import Iterable, List, Optional, Tuple + +# for bw compat +from swh.model.swhid import SWHID, SWHID_RE, SWHID_TYPES, parse_swhid, swhid # noqa + +from .hashutil import MultiHash, hash_git_data @lru_cache() @@ -676,203 +650,3 @@ """ return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest() - - -_object_type_map = { - ORIGIN: {"short_name": "ori", "key_id": "id"}, - SNAPSHOT: {"short_name": "snp", "key_id": "id"}, - RELEASE: {"short_name": "rel", "key_id": "id"}, - REVISION: {"short_name": "rev", "key_id": "id"}, - DIRECTORY: {"short_name": "dir", "key_id": "id"}, - CONTENT: {"short_name": "cnt", "key_id": "sha1_git"}, -} - -_swhid_type_map = { - "ori": ORIGIN, - "snp": SNAPSHOT, - "rel": RELEASE, - "rev": REVISION, - "dir": DIRECTORY, - "cnt": CONTENT, -} - - -@attr.s(frozen=True) -class SWHID: - """ - Named tuple holding the relevant info associated to a SoftWare Heritage - persistent IDentifier (SWHID) - - Args: - namespace (str): the namespace of the identifier, defaults to ``swh`` - scheme_version (int): the scheme version of the identifier, - defaults to 1 - object_type (str): the type of object the identifier points to, - either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` - object_id (str): object's identifier - metadata (dict): optional dict filled with metadata related to - pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Once created, it contains the following attributes: - - Attributes: - namespace (str): the namespace of the identifier - scheme_version (int): the scheme version of the identifier - object_type (str): the type of object the identifier points to - object_id (str): hexadecimal representation of the object hash - metadata (dict): metadata related to the pointed object - - To get the raw SWHID string from an instance of this named tuple, - use the :func:`str` function:: - - swhid = SWHID( - object_type='content', - object_id='8ff44f081d43176474b267de5451f2c2e88089d0' - ) - swhid_str = str(swhid) - # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' - """ - - namespace = attr.ib(type=str, default=SWHID_NAMESPACE) - scheme_version = attr.ib(type=int, default=SWHID_VERSION) - object_type = attr.ib(type=str, default="") - object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore - metadata = attr.ib( - type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() - ) - - @namespace.validator - def check_namespace(self, attribute, value): - if value != SWHID_NAMESPACE: - raise ValidationError( - "Invalid SWHID: invalid namespace: %(namespace)s", - params={"namespace": value}, - ) - - @scheme_version.validator - def check_scheme_version(self, attribute, value): - if value != SWHID_VERSION: - raise ValidationError( - "Invalid SWHID: invalid version: %(version)s", params={"version": value} - ) - - @object_type.validator - def check_object_type(self, attribute, value): - if value not in _object_type_map: - raise ValidationError( - "Invalid SWHID: invalid type: %(object_type)s)", - params={"object_type": value}, - ) - - @object_id.validator - def check_object_id(self, attribute, value): - try: - validate_sha1(value) # can raise if invalid hash - except ValidationError: - raise ValidationError( - "Invalid SWHID: invalid checksum: %(object_id)s", - params={"object_id": value}, - ) from None - - @metadata.validator - def check_qualifiers(self, attribute, value): - for k in value: - if k not in SWHID_QUALIFIERS: - raise ValidationError( - "Invalid SWHID: unknown qualifier: %(qualifier)s", - params={"qualifier": k}, - ) - - def to_dict(self) -> Dict[str, Any]: - return attr.asdict(self) - - def __str__(self) -> str: - o = _object_type_map.get(self.object_type) - assert o - swhid = SWHID_SEP.join( - [self.namespace, str(self.scheme_version), o["short_name"], self.object_id] - ) - if self.metadata: - for k, v in self.metadata.items(): - swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) - return swhid - - -def swhid( - object_type: str, - object_id: Union[str, Dict[str, Any]], - scheme_version: int = 1, - metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), -) -> str: - """Compute :ref:`persistent-identifiers` - - Args: - object_type: object's type, either ``content``, ``directory``, - ``release``, ``revision`` or ``snapshot`` - object_id: object's identifier - scheme_version: SWHID scheme version, defaults to 1 - metadata: metadata related to the pointed object - - Raises: - swh.model.exceptions.ValidationError: In case of invalid object type or id - - Returns: - the SWHID of the object - - """ - if isinstance(object_id, dict): - o = _object_type_map[object_type] - object_id = object_id[o["key_id"]] - swhid = SWHID( - scheme_version=scheme_version, - object_type=object_type, - object_id=object_id, - metadata=metadata, # type: ignore # mypy can't properly unify types - ) - return str(swhid) - - -def parse_swhid(swhid: str) -> SWHID: - """Parse a Software Heritage identifier (SWHID) from string (see: - :ref:`persistent-identifiers`.) - - Args: - swhid (str): A persistent identifier - - Returns: - a named tuple holding the parsing result - - Raises: - swh.model.exceptions.ValidationError: if passed string is not a valid SWHID - - """ - m = SWHID_RE.fullmatch(swhid) - if not m: - raise ValidationError( - "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid} - ) - parts = m.groupdict() - - _qualifiers = {} - qualifiers_raw = parts["qualifiers"] - if qualifiers_raw: - for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP): - try: - k, v = qualifier.split("=") - except ValueError: - raise ValidationError( - "Invalid SWHID: invalid qualifier: %(qualifier)s", - params={"qualifier": qualifier}, - ) - _qualifiers[k] = v - - return SWHID( - parts["scheme"], - int(parts["version"]), - _swhid_type_map[parts["object_type"]], - parts["object_id"], - _qualifiers, # type: ignore # mypy can't properly unify types - ) diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -18,7 +18,6 @@ from .collections import ImmutableDict from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes from .identifiers import ( - SWHID, directory_identifier, normalize_timestamp, parse_swhid, @@ -26,6 +25,7 @@ revision_identifier, snapshot_identifier, ) +from .swhid import SWHID, SWHIDObjectType class MissingData(Exception): @@ -274,6 +274,10 @@ def unique_key(self) -> KeyType: return {"url": self.url} + @property + def id(self) -> bytes: + return self.url.encode() + @attr.s(frozen=True, slots=True) class OriginVisit(BaseModel): @@ -706,6 +710,10 @@ def unique_key(self) -> KeyType: return self.sha1 # TODO: use a dict of hashes + @property + def id(self): + return self.sha1_git + @attr.s(frozen=True, slots=True) class SkippedContent(BaseContent): @@ -850,25 +858,12 @@ return {"name": self.name, "version": self.version} -class MetadataTargetType(Enum): - """The type of object extrinsic metadata refer to.""" - - CONTENT = "content" - DIRECTORY = "directory" - REVISION = "revision" - RELEASE = "release" - SNAPSHOT = "snapshot" - ORIGIN = "origin" - - @attr.s(frozen=True, slots=True) class RawExtrinsicMetadata(BaseModel): object_type: Final = "raw_extrinsic_metadata" # target object - type = attr.ib(type=MetadataTargetType, validator=type_validator()) - target = attr.ib(type=Union[str, SWHID], validator=type_validator()) - """URL if type=MetadataTargetType.ORIGIN, else core SWHID""" + target = attr.ib(type=SWHID, validator=[type_validator()]) # source discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) @@ -880,23 +875,23 @@ metadata = attr.ib(type=bytes, validator=type_validator()) # context - origin = attr.ib(type=Optional[str], default=None, validator=type_validator()) - visit = attr.ib(type=Optional[int], default=None, validator=type_validator()) - snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - path = attr.ib(type=Optional[bytes], default=None, validator=type_validator()) - directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) + origin = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()]) + visit = attr.ib(type=Optional[int], default=None, validator=[type_validator()]) + snapshot = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()]) + release = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()]) + revision = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()]) + path = attr.ib(type=Optional[bytes], default=None, validator=[type_validator()]) + directory = attr.ib( + type=Optional[SWHID], default=None, validator=[type_validator()] + ) + + @property + def type(self): + return self.target.object_type @target.validator def check_target(self, attribute, value): - if self.type == MetadataTargetType.ORIGIN: - if isinstance(value, SWHID) or value.startswith("swh:"): - raise ValueError( - "Got SWHID as target for origin metadata (expected an URL)." - ) - else: - self._check_swhid(self.type.value, value) + self._check_swhid(self.type, value) @discovery_date.validator def check_discovery_date(self, attribute, value): @@ -909,24 +904,11 @@ if value is None: return - if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, - ): - raise ValueError( - f"Unexpected 'origin' context for {self.type.value} object: {value}" - ) + if self.type == SWHIDObjectType.ORIGIN: + raise ValueError(f"Unexpected 'origin' context for object: {value}") - if value.startswith("swh:"): - # Technically this is valid; but: - # 1. SWHIDs are URIs, not URLs - # 2. if a SWHID gets here, it's very likely to be a mistake - # (and we can remove this check if it turns out there is a - # legitimate use for it). - raise ValueError(f"SWHID used as context origin URL: {value}") + if value.object_type != SWHIDObjectType.ORIGIN: + raise ValueError(f"Non origin SWHID used as context origin: {value}") @visit.validator def check_visit(self, attribute, value): @@ -934,15 +916,13 @@ return if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + SWHIDObjectType.SNAPSHOT, + SWHIDObjectType.RELEASE, + SWHIDObjectType.REVISION, + SWHIDObjectType.DIRECTORY, + SWHIDObjectType.CONTENT, ): - raise ValueError( - f"Unexpected 'visit' context for {self.type.value} object: {value}" - ) + raise ValueError(f"Unexpected 'visit' context for object: {value}") if self.origin is None: raise ValueError("'origin' context must be set if 'visit' is.") @@ -956,16 +936,14 @@ return if self.type not in ( - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + SWHIDObjectType.RELEASE, + SWHIDObjectType.REVISION, + SWHIDObjectType.DIRECTORY, + SWHIDObjectType.CONTENT, ): - raise ValueError( - f"Unexpected 'snapshot' context for {self.type.value} object: {value}" - ) + raise ValueError(f"Unexpected 'snapshot' context for object: {value}") - self._check_swhid("snapshot", value) + self._check_swhid(SWHIDObjectType.SNAPSHOT, value) @release.validator def check_release(self, attribute, value): @@ -973,58 +951,51 @@ return if self.type not in ( - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + SWHIDObjectType.REVISION, + SWHIDObjectType.DIRECTORY, + SWHIDObjectType.CONTENT, ): - raise ValueError( - f"Unexpected 'release' context for {self.type.value} object: {value}" - ) + raise ValueError(f"Unexpected 'release' context for object: {value}") - self._check_swhid("release", value) + self._check_swhid(SWHIDObjectType.RELEASE, value) @revision.validator def check_revision(self, attribute, value): if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): - raise ValueError( - f"Unexpected 'revision' context for {self.type.value} object: {value}" - ) + if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,): + raise ValueError(f"Unexpected 'revision' context for object: {value}") - self._check_swhid("revision", value) + self._check_swhid(SWHIDObjectType.REVISION, value) @path.validator def check_path(self, attribute, value): if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): - raise ValueError( - f"Unexpected 'path' context for {self.type.value} object: {value}" - ) + if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,): + raise ValueError(f"Unexpected 'path' context for object: {value}") @directory.validator def check_directory(self, attribute, value): if value is None: return - if self.type not in (MetadataTargetType.CONTENT,): - raise ValueError( - f"Unexpected 'directory' context for {self.type.value} object: {value}" - ) + if self.type not in (SWHIDObjectType.CONTENT,): + raise ValueError(f"Unexpected 'directory' context for object: {value}") - self._check_swhid("directory", value) + self._check_swhid(SWHIDObjectType.DIRECTORY, value) def _check_swhid(self, expected_object_type, swhid): + if isinstance(swhid, str): raise ValueError(f"Expected SWHID, got a string: {swhid}") if swhid.object_type != expected_object_type: raise ValueError( - f"Expected SWHID type '{expected_object_type}', " - f"got '{swhid.object_type}' in {swhid}" + f"Expected SWHID type '{expected_object_type.value}', " + f"got '{swhid.object_type.value}' in {swhid}" ) if swhid.metadata: @@ -1049,26 +1020,20 @@ @classmethod def from_dict(cls, d): + d = { **d, - "type": MetadataTargetType(d["type"]), "authority": MetadataAuthority.from_dict(d["authority"]), "fetcher": MetadataFetcher.from_dict(d["fetcher"]), } - - if d["type"] != MetadataTargetType.ORIGIN: - d["target"] = parse_swhid(d["target"]) - - swhid_keys = ("snapshot", "release", "revision", "directory") - for swhid_key in swhid_keys: - if d.get(swhid_key): - d[swhid_key] = parse_swhid(d[swhid_key]) - + swhid_keys = ("target", "snapshot", "release", "revision", "directory") + for k in swhid_keys: + if k in d and isinstance(d[k], str): + d[k] = parse_swhid(d[k]) return super().from_dict(d) def unique_key(self) -> KeyType: return { - "type": self.type.value, "target": str(self.target), "authority_type": self.authority.type.value, "authority_url": self.authority.url, diff --git a/swh/model/swhid.py b/swh/model/swhid.py new file mode 100644 --- /dev/null +++ b/swh/model/swhid.py @@ -0,0 +1,254 @@ +# Copyright (C) 2015-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from enum import Enum +import re +from typing import Any, Dict, Union + +import attr +from attrs_strict import type_validator + +from .collections import ImmutableDict +from .exceptions import ValidationError +from .fields.hashes import validate_sha1 +from .hashutil import hash_to_hex + +ORIGIN = "origin" +SNAPSHOT = "snapshot" +REVISION = "revision" +RELEASE = "release" +DIRECTORY = "directory" +CONTENT = "content" + +SWHID_NAMESPACE = "swh" +SWHID_VERSION = 1 +SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"] +SWHID_SEP = ":" +SWHID_CTXT_SEP = ";" +SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"} + +SWHID_RE_RAW = ( + f"(?P{SWHID_NAMESPACE})" + f"{SWHID_SEP}(?P{SWHID_VERSION})" + f"{SWHID_SEP}(?P{'|'.join(SWHID_TYPES)})" + f"{SWHID_SEP}(?P[0-9a-f]+)" + f"({SWHID_CTXT_SEP}(?P\\S+))?" +) +SWHID_RE = re.compile(SWHID_RE_RAW) + + +class SWHIDObjectType(Enum): + """The type of object extrinsic metadata refer to.""" + + CONTENT = "cnt" + DIRECTORY = "dir" + REVISION = "rev" + RELEASE = "rel" + SNAPSHOT = "snp" + ORIGIN = "ori" + + +_object_type_id_map = { + SWHIDObjectType.ORIGIN: "id", + SWHIDObjectType.SNAPSHOT: "id", + SWHIDObjectType.RELEASE: "id", + SWHIDObjectType.REVISION: "id", + SWHIDObjectType.DIRECTORY: "id", + SWHIDObjectType.CONTENT: "sha1_git", +} + + +_swhid_type_map = { + "ori": SWHIDObjectType.ORIGIN, + "snp": SWHIDObjectType.SNAPSHOT, + "rel": SWHIDObjectType.RELEASE, + "rev": SWHIDObjectType.REVISION, + "dir": SWHIDObjectType.DIRECTORY, + "cnt": SWHIDObjectType.CONTENT, + "origin": SWHIDObjectType.ORIGIN, + "snapshot": SWHIDObjectType.SNAPSHOT, + "release": SWHIDObjectType.RELEASE, + "revision": SWHIDObjectType.REVISION, + "directory": SWHIDObjectType.DIRECTORY, + "content": SWHIDObjectType.CONTENT, +} + + +@attr.s(frozen=True) +class SWHID: + """ + Named tuple holding the relevant info associated to a SoftWare Heritage + persistent IDentifier (SWHID) + + Args: + namespace (str): the namespace of the identifier, defaults to ``swh`` + scheme_version (int): the scheme version of the identifier, + defaults to 1 + object_type (str): the type of object the identifier points to, + either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot`` + object_id (str): object's identifier + metadata (dict): optional dict filled with metadata related to + pointed object + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + Once created, it contains the following attributes: + + Attributes: + namespace (str): the namespace of the identifier + scheme_version (int): the scheme version of the identifier + object_type (str): the type of object the identifier points to + object_id (str): hexadecimal representation of the object hash + metadata (dict): metadata related to the pointed object + + To get the raw SWHID string from an instance of this named tuple, + use the :func:`str` function:: + + swhid = SWHID( + object_type='content', + object_id='8ff44f081d43176474b267de5451f2c2e88089d0' + ) + swhid_str = str(swhid) + # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0' + """ + + object_type = attr.ib(type=SWHIDObjectType, validator=type_validator()) + object_id = attr.ib(type=str, converter=hash_to_hex) # type: ignore + namespace = attr.ib(type=str, default=SWHID_NAMESPACE) + scheme_version = attr.ib(type=int, default=SWHID_VERSION) + metadata = attr.ib( + type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict() + ) + + @namespace.validator + def check_namespace(self, attribute, value): + if value != SWHID_NAMESPACE: + raise ValidationError( + "Invalid SWHID: invalid namespace: %(namespace)s", + params={"namespace": value}, + ) + + @scheme_version.validator + def check_scheme_version(self, attribute, value): + if value != SWHID_VERSION: + raise ValidationError( + "Invalid SWHID: invalid version: %(version)s", params={"version": value} + ) + + @object_id.validator + def check_object_id(self, attribute, value): + if self.object_type == SWHIDObjectType.ORIGIN: + return + try: + validate_sha1(value) # can raise if invalid hash + except ValidationError: + raise ValidationError( + "Invalid SWHID: invalid checksum: %(object_id)s", + params={"object_id": value}, + ) from None + + @metadata.validator + def check_qualifiers(self, attribute, value): + for k in value: + if k not in SWHID_QUALIFIERS: + raise ValidationError( + "Invalid SWHID: unknown qualifier: %(qualifier)s", + params={"qualifier": k}, + ) + + def to_dict(self) -> Dict[str, Any]: + return attr.asdict(self) + + def __str__(self) -> str: + swhid = SWHID_SEP.join( + [ + self.namespace, + str(self.scheme_version), + self.object_type.value, + self.object_id, + ] + ) + if self.metadata: + for k, v in self.metadata.items(): + swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v) + return swhid + + +def swhid( + object_type: Union[SWHIDObjectType, str], + object_id: Union[str, Dict[str, Any]], + scheme_version: int = 1, + metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(), +) -> str: + """Compute :ref:`persistent-identifiers` + + Args: + object_type: object's type (a SWHIDObjectType), + object_id: object's identifier + scheme_version: SWHID scheme version, defaults to 1 + metadata: metadata related to the pointed object + + Raises: + swh.model.exceptions.ValidationError: In case of invalid object type or id + + Returns: + the SWHID of the object + + """ + if isinstance(object_type, str): + object_type = _swhid_type_map[object_type] + if isinstance(object_id, dict): + object_id = object_id[_object_type_id_map[object_type]] + swhid = SWHID( + scheme_version=scheme_version, + object_type=object_type, + object_id=object_id, + metadata=metadata, # type: ignore # mypy can't properly unify types + ) + return str(swhid) + + +def parse_swhid(swhid: str) -> SWHID: + """Parse a Software Heritage identifier (SWHID) from string (see: + :ref:`persistent-identifiers`.) + + Args: + swhid (str): A persistent identifier + + Returns: + a named tuple holding the parsing result + + Raises: + swh.model.exceptions.ValidationError: if passed string is not a valid SWHID + + """ + m = SWHID_RE.fullmatch(swhid) + if not m: + raise ValidationError( + "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid} + ) + parts = m.groupdict() + + _qualifiers = {} + qualifiers_raw = parts["qualifiers"] + if qualifiers_raw: + for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP): + try: + k, v = qualifier.split("=") + except ValueError: + raise ValidationError( + "Invalid SWHID: invalid qualifier: %(qualifier)s", + params={"qualifier": qualifier}, + ) + _qualifiers[k] = v + + return SWHID( + namespace=parts["scheme"], + scheme_version=int(parts["version"]), + object_type=_swhid_type_map[parts["object_type"]], + object_id=parts["object_id"], + metadata=_qualifiers, # type: ignore # mypy can't properly unify types + ) diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -9,7 +9,7 @@ import attr from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.swhid import SWHID, SWHIDObjectType from swh.model.model import ( BaseModel, Content, @@ -18,7 +18,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -310,8 +309,8 @@ RAW_EXTRINSIC_METADATA = [ RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="http://example.org/foo.git", + target=SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=b"http://example.org/foo.git"), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), @@ -319,9 +318,8 @@ metadata=b'{"foo": "bar"}', ), RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=SWHID( - object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git) + object_type=SWHIDObjectType.CONTENT, object_id=hash_to_hex(CONTENTS[0].sha1_git) ), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py --- a/swh/model/tests/test_identifiers.py +++ b/swh/model/tests/test_identifiers.py @@ -13,15 +13,8 @@ from swh.model import hashutil, identifiers from swh.model.exceptions import ValidationError from swh.model.hashutil import hash_to_bytes as _x -from swh.model.identifiers import ( - CONTENT, - DIRECTORY, - RELEASE, - REVISION, - SNAPSHOT, - SWHID, - normalize_timestamp, -) +from swh.model.swhid import SWHID, SWHIDObjectType +from swh.model.identifiers import normalize_timestamp def remove_id(d: Dict) -> Dict: @@ -897,77 +890,77 @@ for full_type, _hash, expected_swhid, version, _meta in [ ( - SNAPSHOT, + SWHIDObjectType.SNAPSHOT, _snapshot_id, "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", None, {}, ), ( - RELEASE, + SWHIDObjectType.RELEASE, _release_id, "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", 1, {}, ), ( - REVISION, + SWHIDObjectType.REVISION, _revision_id, "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", None, {}, ), ( - DIRECTORY, + SWHIDObjectType.DIRECTORY, _directory_id, "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", None, {}, ), ( - CONTENT, + SWHIDObjectType.CONTENT, _content_id, "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", 1, {}, ), ( - SNAPSHOT, + SWHIDObjectType.SNAPSHOT, _snapshot, "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", None, {}, ), ( - RELEASE, + SWHIDObjectType.RELEASE, _release, "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", 1, {}, ), ( - REVISION, + SWHIDObjectType.REVISION, _revision, "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", None, {}, ), ( - DIRECTORY, + SWHIDObjectType.DIRECTORY, _directory, "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", None, {}, ), ( - CONTENT, + SWHIDObjectType.CONTENT, _content, "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", 1, {}, ), ( - CONTENT, + SWHIDObjectType.CONTENT, _content, "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1", 1, @@ -988,9 +981,10 @@ _snapshot = {"id": _snapshot_id} for _type, _hash in [ - (SNAPSHOT, _snapshot_id), - (SNAPSHOT, _snapshot), - ("lines", "42"), + (SWHIDObjectType.SNAPSHOT, _snapshot_id), + (SWHIDObjectType.SNAPSHOT, _snapshot), + ("snapshot", "42"), + ("snp", "42"), ]: with self.assertRaises(ValidationError): identifiers.swhid(_type, _hash) @@ -999,31 +993,31 @@ for swhid, _type, _version, _hash in [ ( "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - CONTENT, + SWHIDObjectType.CONTENT, 1, "94a9ed024d3859793618152ea559a168bbcbb5e2", ), ( "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505", - DIRECTORY, + SWHIDObjectType.DIRECTORY, 1, "d198bc9d7a6bcf6db04f476d29314f157507d505", ), ( "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d", - REVISION, + SWHIDObjectType.REVISION, 1, "309cf2674ee7a0749978cf8265ab91a60aea0f7d", ), ( "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f", - RELEASE, + SWHIDObjectType.RELEASE, 1, "22ece559cc7cc2364edc5e5593d63ae8bd229f9f", ), ( "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453", - SNAPSHOT, + SWHIDObjectType.SNAPSHOT, 1, "c7c108084bc0bf3d81436bf980b46e98bd338453", ), @@ -1041,14 +1035,14 @@ for swhid, _type, _version, _hash, _metadata in [ ( "swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa - CONTENT, + SWHIDObjectType.CONTENT, 1, "9c95815d9e9d91b8dae8e05d8bbc696fe19f796b", {"lines": "1-18", "origin": "https://github.com/python/cpython"}, ), ( "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa - DIRECTORY, + SWHIDObjectType.DIRECTORY, 1, "0b6959356d30f1a4e9b7f6bca59b9a336464c03d", {"origin": "deb://Debian/packages/linuxdoc-tools"}, @@ -1125,10 +1119,9 @@ @pytest.mark.parametrize( "ns,version,type,id", [ - ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505",), - ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",), + ("foo", 1, SWHIDObjectType.CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",), + ("swh", 2, SWHIDObjectType.DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",), + ("swh", 1, SWHIDObjectType.SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",), ], ) def test_SWHID_class_validation_error(ns, version, type, id): @@ -1141,27 +1134,27 @@ def test_swhid_hash(): object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - assert hash(SWHID(object_type="directory", object_id=object_id)) == hash( - SWHID(object_type="directory", object_id=object_id) + assert hash(SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id)) == hash( + SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id) ) assert hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) + SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers,) ) == hash( - SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) + SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers,) ) # Different order of the dictionary, so the underlying order of the tuple in # ImmutableDict is different. assert hash( SWHID( - object_type="directory", + object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata={"origin": "https://example.com", "lines": "42"}, ) ) == hash( SWHID( - object_type="directory", + object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata={"lines": "42", "origin": "https://example.com"}, ) @@ -1171,14 +1164,14 @@ def test_swhid_eq(): object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2" - assert SWHID(object_type="directory", object_id=object_id) == SWHID( - object_type="directory", object_id=object_id + assert SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id) == SWHID( + object_type=SWHIDObjectType.DIRECTORY.DIRECTORY, object_id=object_id ) assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) + object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers, + ) == SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers,) assert SWHID( - object_type="directory", object_id=object_id, metadata=dummy_qualifiers, - ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,) + object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers, + ) == SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id, metadata=dummy_qualifiers,) diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -15,9 +15,7 @@ from swh.model.hashutil import MultiHash, hash_to_bytes import swh.model.hypothesis_strategies as strategies from swh.model.identifiers import ( - SWHID, directory_identifier, - parse_swhid, release_identifier, revision_identifier, snapshot_identifier, @@ -29,7 +27,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, MissingData, Origin, OriginVisit, @@ -43,6 +40,7 @@ Timestamp, TimestampWithTimezone, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.model.tests.test_identifiers import ( directory_example, release_example, @@ -781,7 +779,8 @@ ) _metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",) _content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2") -_origin_url = "https://forge.softwareheritage.org/source/swh-model.git" +_origin_url = b"https://forge.softwareheritage.org/source/swh-model.git" +_origin_swhid = SWHID(object_type=SWHIDObjectType.ORIGIN, object_id=_origin_url) _dummy_qualifiers = {"origin": "https://example.com", "lines": "42"} _common_metadata_fields = dict( discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), @@ -796,15 +795,11 @@ """Checks valid RawExtrinsicMetadata objects don't raise an error.""" # Simplest case - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields - ) + RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields) # Object with an SWHID RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - **_common_metadata_fields, + target=_content_swhid, **_common_metadata_fields, ) @@ -819,23 +814,18 @@ "metadata": b'{"origin": "https://example.com", "lines": "42"}', } - m = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields, - ) + m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,) assert m.to_dict() == { - "type": "origin", - "target": _origin_url, + "target": ( + "swh:1:ori:68747470733a2f2f666f7267652e736f667477617265686572" + "69746167652e6f72672f736f757263652f7377682d6d6f64656c2e676974" + ), **common_fields, } assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m - m = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - **_common_metadata_fields, - ) + m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,) assert m.to_dict() == { - "type": "content", "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **common_fields, } @@ -845,54 +835,18 @@ def test_metadata_invalid_target(): """Checks various invalid values for the 'target' field.""" - # SWHID for an origin - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_content_swhid, - **_common_metadata_fields, - ) - - # SWHID for an origin (even when passed as string) - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - **_common_metadata_fields, - ) - - # URL for a non-origin - with pytest.raises(ValueError, match="Expected SWHID, got a string"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_origin_url, - **_common_metadata_fields, - ) - # SWHID passed as string instead of SWHID - with pytest.raises(ValueError, match="Expected SWHID, got a string"): + with pytest.raises(AttributeTypeError): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **_common_metadata_fields, ) - # Object type does not match the SWHID - with pytest.raises( - ValueError, match="Expected SWHID type 'revision', got 'content'" - ): - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - target=_content_swhid, - **_common_metadata_fields, - ) - # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=SWHID( - object_type="content", + object_type=SWHIDObjectType.CONTENT, object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", metadata=_dummy_qualifiers, ), @@ -903,8 +857,7 @@ def test_metadata_naive_datetime(): with pytest.raises(ValueError, match="must be a timezone-aware datetime"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, **{**_common_metadata_fields, "discovery_date": datetime.datetime.now()}, ) @@ -913,30 +866,21 @@ """Checks validation of RawExtrinsicMetadata.origin.""" # Origins can't have an 'origin' context - with pytest.raises( - ValueError, match="Unexpected 'origin' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'origin' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - origin=_origin_url, - **_common_metadata_fields, + target=_origin_swhid, origin=_origin_swhid, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - origin=_origin_url, - **_common_metadata_fields, + target=_content_swhid, origin=_origin_swhid, **_common_metadata_fields, ) - # SWHIDs aren't valid origin URLs - with pytest.raises(ValueError, match="SWHID used as context origin URL"): + # non origin SWHIDs aren't valid origin context + with pytest.raises(ValueError, match="Non origin SWHID used as context origin:"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", + origin=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) @@ -945,21 +889,15 @@ """Checks validation of RawExtrinsicMetadata.visit.""" # Origins can't have a 'visit' context - with pytest.raises( - ValueError, match="Unexpected 'visit' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'visit' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - visit=42, - **_common_metadata_fields, + target=_origin_swhid, visit=42, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - origin=_origin_url, + origin=_origin_swhid, visit=42, **_common_metadata_fields, ) @@ -967,18 +905,14 @@ # Missing 'origin' with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - visit=42, - **_common_metadata_fields, + target=_content_swhid, visit=42, **_common_metadata_fields, ) # visit id must be positive with pytest.raises(ValueError, match="Nonpositive visit id"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - origin=_origin_url, + origin=_origin_swhid, visit=-42, **_common_metadata_fields, ) @@ -988,36 +922,26 @@ """Checks validation of RawExtrinsicMetadata.snapshot.""" # Origins can't have a 'snapshot' context - with pytest.raises( - ValueError, match="Unexpected 'snapshot' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'snapshot' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - snapshot=SWHID( - object_type="snapshot", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + target=_origin_swhid, + snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - snapshot=SWHID( - object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, snapshot=SWHID( - object_type="snapshot", + object_type=SWHIDObjectType.SNAPSHOT, object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", metadata=_dummy_qualifiers, ), @@ -1025,16 +949,10 @@ ) # SWHID type doesn't match the expected type of this context key - with pytest.raises( - ValueError, match="Expected SWHID type 'snapshot', got 'content'" - ): + with pytest.raises(ValueError, match="Expected SWHID type 'snp', got 'cnt'"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - snapshot=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + snapshot=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) @@ -1043,36 +961,26 @@ """Checks validation of RawExtrinsicMetadata.release.""" # Origins can't have a 'release' context - with pytest.raises( - ValueError, match="Unexpected 'release' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'release' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - release=SWHID( - object_type="release", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + target=_origin_swhid, + release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - release=SWHID( - object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, release=SWHID( - object_type="release", + object_type=SWHIDObjectType.RELEASE, object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", metadata=_dummy_qualifiers, ), @@ -1080,16 +988,10 @@ ) # SWHID type doesn't match the expected type of this context key - with pytest.raises( - ValueError, match="Expected SWHID type 'release', got 'content'" - ): + with pytest.raises(ValueError, match="Expected SWHID type 'rel', got 'cnt'"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - release=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + release=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) @@ -1098,36 +1000,26 @@ """Checks validation of RawExtrinsicMetadata.revision.""" # Origins can't have a 'revision' context - with pytest.raises( - ValueError, match="Unexpected 'revision' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'revision' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - revision=SWHID( - object_type="revision", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + target=_origin_swhid, + revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - revision=SWHID( - object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" - ), + revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, revision=SWHID( - object_type="revision", + object_type=SWHIDObjectType.REVISION, object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", metadata=_dummy_qualifiers, ), @@ -1135,16 +1027,10 @@ ) # SWHID type doesn't match the expected type of this context key - with pytest.raises( - ValueError, match="Expected SWHID type 'revision', got 'content'" - ): + with pytest.raises(ValueError, match="Expected SWHID type 'rev', got 'cnt'"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - revision=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + revision=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) @@ -1153,20 +1039,14 @@ """Checks validation of RawExtrinsicMetadata.path.""" # Origins can't have a 'path' context - with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"): + with pytest.raises(ValueError, match="Unexpected 'path' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - path=b"/foo/bar", - **_common_metadata_fields, + target=_origin_swhid, path=b"/foo/bar", **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - path=b"/foo/bar", - **_common_metadata_fields, + target=_content_swhid, path=b"/foo/bar", **_common_metadata_fields, ) @@ -1174,37 +1054,26 @@ """Checks validation of RawExtrinsicMetadata.directory.""" # Origins can't have a 'directory' context - with pytest.raises( - ValueError, match="Unexpected 'directory' context for origin object" - ): + with pytest.raises(ValueError, match="Unexpected 'directory' context for object:"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - directory=SWHID( - object_type="directory", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + target=_origin_swhid, + directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - directory=SWHID( - object_type="directory", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, ) # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, directory=SWHID( - object_type="directory", + object_type=SWHIDObjectType.DIRECTORY, object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", metadata=_dummy_qualifiers, ), @@ -1212,15 +1081,9 @@ ) # SWHID type doesn't match the expected type of this context key - with pytest.raises( - ValueError, match="Expected SWHID type 'directory', got 'content'" - ): + with pytest.raises(ValueError, match="Expected SWHID type 'dir', got 'cnt'"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, - directory=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - ), + directory=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"), **_common_metadata_fields, )