Page MenuHomeSoftware Heritage

D4985.id17891.diff
No OneTemporary

D4985.id17891.diff

diff --git a/swh/model/cli.py b/swh/model/cli.py
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -12,7 +12,7 @@
import click
from swh.core.cli import swh as swh_cli_group
-from swh.model.identifiers import SWHID
+from swh.model.swhid import SWHID, SWHIDObjectType, swhid
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
@@ -44,18 +44,16 @@
def swhid_of_file(path):
from swh.model.from_disk import Content
- from swh.model.identifiers import CONTENT, swhid
object = Content.from_file(path=path).get_data()
- return swhid(CONTENT, object)
+ return swhid(SWHIDObjectType.CONTENT, object)
def swhid_of_file_content(data):
from swh.model.from_disk import Content
- from swh.model.identifiers import CONTENT, swhid
object = Content.from_bytes(mode=644, data=data).get_data()
- return swhid(CONTENT, object)
+ return swhid(SWHIDObjectType.CONTENT, object)
def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> str:
@@ -64,7 +62,6 @@
accept_all_directories,
ignore_directories_patterns,
)
- from swh.model.identifiers import DIRECTORY, swhid
dir_filter = (
ignore_directories_patterns(path, exclude_patterns)
@@ -73,20 +70,25 @@
)
object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data()
- return swhid(DIRECTORY, object)
+ return swhid(SWHIDObjectType.DIRECTORY, object)
def swhid_of_origin(url):
- from swh.model.identifiers import SWHID, origin_identifier
+ from swh.model.identifiers import origin_identifier
- return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url})))
+ return str(
+ SWHID(
+ object_type=SWHIDObjectType.ORIGIN,
+ object_id=origin_identifier({"url": url}),
+ )
+ )
def swhid_of_git_repo(path):
import dulwich.repo
from swh.model import hashutil
- from swh.model.identifiers import SWHID, snapshot_identifier
+ from swh.model.identifiers import snapshot_identifier
repo = dulwich.repo.Repo(path)
@@ -109,7 +111,12 @@
snapshot = {"branches": branches}
- return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot)))
+ return str(
+ SWHID(
+ object_type=SWHIDObjectType.SNAPSHOT,
+ object_id=snapshot_identifier(snapshot),
+ )
+ )
def identify_object(obj_type, follow_symlinks, exclude_patterns, obj):
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -7,38 +7,12 @@
import datetime
from functools import lru_cache
import hashlib
-import re
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
-
-import attr
-
-from .collections import ImmutableDict
-from .exceptions import ValidationError
-from .fields.hashes import validate_sha1
-from .hashutil import MultiHash, hash_git_data, hash_to_hex
-
-ORIGIN = "origin"
-SNAPSHOT = "snapshot"
-REVISION = "revision"
-RELEASE = "release"
-DIRECTORY = "directory"
-CONTENT = "content"
-
-SWHID_NAMESPACE = "swh"
-SWHID_VERSION = 1
-SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
-SWHID_SEP = ":"
-SWHID_CTXT_SEP = ";"
-SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
-
-SWHID_RE_RAW = (
- f"(?P<scheme>{SWHID_NAMESPACE})"
- f"{SWHID_SEP}(?P<version>{SWHID_VERSION})"
- f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
- f"{SWHID_SEP}(?P<object_id>[0-9a-f]{{40}})"
- f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
-)
-SWHID_RE = re.compile(SWHID_RE_RAW)
+from typing import Iterable, List, Optional, Tuple
+
+# for bw compat
+from swh.model.swhid import SWHID, SWHID_RE, SWHID_TYPES, parse_swhid, swhid # noqa
+
+from .hashutil import MultiHash, hash_git_data
@lru_cache()
@@ -676,203 +650,3 @@
"""
return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest()
-
-
-_object_type_map = {
- ORIGIN: {"short_name": "ori", "key_id": "id"},
- SNAPSHOT: {"short_name": "snp", "key_id": "id"},
- RELEASE: {"short_name": "rel", "key_id": "id"},
- REVISION: {"short_name": "rev", "key_id": "id"},
- DIRECTORY: {"short_name": "dir", "key_id": "id"},
- CONTENT: {"short_name": "cnt", "key_id": "sha1_git"},
-}
-
-_swhid_type_map = {
- "ori": ORIGIN,
- "snp": SNAPSHOT,
- "rel": RELEASE,
- "rev": REVISION,
- "dir": DIRECTORY,
- "cnt": CONTENT,
-}
-
-
-@attr.s(frozen=True)
-class SWHID:
- """
- Named tuple holding the relevant info associated to a SoftWare Heritage
- persistent IDentifier (SWHID)
-
- Args:
- namespace (str): the namespace of the identifier, defaults to ``swh``
- scheme_version (int): the scheme version of the identifier,
- defaults to 1
- object_type (str): the type of object the identifier points to,
- either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot``
- object_id (str): object's identifier
- metadata (dict): optional dict filled with metadata related to
- pointed object
-
- Raises:
- swh.model.exceptions.ValidationError: In case of invalid object type or id
-
- Once created, it contains the following attributes:
-
- Attributes:
- namespace (str): the namespace of the identifier
- scheme_version (int): the scheme version of the identifier
- object_type (str): the type of object the identifier points to
- object_id (str): hexadecimal representation of the object hash
- metadata (dict): metadata related to the pointed object
-
- To get the raw SWHID string from an instance of this named tuple,
- use the :func:`str` function::
-
- swhid = SWHID(
- object_type='content',
- object_id='8ff44f081d43176474b267de5451f2c2e88089d0'
- )
- swhid_str = str(swhid)
- # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
- """
-
- namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
- scheme_version = attr.ib(type=int, default=SWHID_VERSION)
- object_type = attr.ib(type=str, default="")
- object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore
- metadata = attr.ib(
- type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict()
- )
-
- @namespace.validator
- def check_namespace(self, attribute, value):
- if value != SWHID_NAMESPACE:
- raise ValidationError(
- "Invalid SWHID: invalid namespace: %(namespace)s",
- params={"namespace": value},
- )
-
- @scheme_version.validator
- def check_scheme_version(self, attribute, value):
- if value != SWHID_VERSION:
- raise ValidationError(
- "Invalid SWHID: invalid version: %(version)s", params={"version": value}
- )
-
- @object_type.validator
- def check_object_type(self, attribute, value):
- if value not in _object_type_map:
- raise ValidationError(
- "Invalid SWHID: invalid type: %(object_type)s)",
- params={"object_type": value},
- )
-
- @object_id.validator
- def check_object_id(self, attribute, value):
- try:
- validate_sha1(value) # can raise if invalid hash
- except ValidationError:
- raise ValidationError(
- "Invalid SWHID: invalid checksum: %(object_id)s",
- params={"object_id": value},
- ) from None
-
- @metadata.validator
- def check_qualifiers(self, attribute, value):
- for k in value:
- if k not in SWHID_QUALIFIERS:
- raise ValidationError(
- "Invalid SWHID: unknown qualifier: %(qualifier)s",
- params={"qualifier": k},
- )
-
- def to_dict(self) -> Dict[str, Any]:
- return attr.asdict(self)
-
- def __str__(self) -> str:
- o = _object_type_map.get(self.object_type)
- assert o
- swhid = SWHID_SEP.join(
- [self.namespace, str(self.scheme_version), o["short_name"], self.object_id]
- )
- if self.metadata:
- for k, v in self.metadata.items():
- swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v)
- return swhid
-
-
-def swhid(
- object_type: str,
- object_id: Union[str, Dict[str, Any]],
- scheme_version: int = 1,
- metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(),
-) -> str:
- """Compute :ref:`persistent-identifiers`
-
- Args:
- object_type: object's type, either ``content``, ``directory``,
- ``release``, ``revision`` or ``snapshot``
- object_id: object's identifier
- scheme_version: SWHID scheme version, defaults to 1
- metadata: metadata related to the pointed object
-
- Raises:
- swh.model.exceptions.ValidationError: In case of invalid object type or id
-
- Returns:
- the SWHID of the object
-
- """
- if isinstance(object_id, dict):
- o = _object_type_map[object_type]
- object_id = object_id[o["key_id"]]
- swhid = SWHID(
- scheme_version=scheme_version,
- object_type=object_type,
- object_id=object_id,
- metadata=metadata, # type: ignore # mypy can't properly unify types
- )
- return str(swhid)
-
-
-def parse_swhid(swhid: str) -> SWHID:
- """Parse a Software Heritage identifier (SWHID) from string (see:
- :ref:`persistent-identifiers`.)
-
- Args:
- swhid (str): A persistent identifier
-
- Returns:
- a named tuple holding the parsing result
-
- Raises:
- swh.model.exceptions.ValidationError: if passed string is not a valid SWHID
-
- """
- m = SWHID_RE.fullmatch(swhid)
- if not m:
- raise ValidationError(
- "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid}
- )
- parts = m.groupdict()
-
- _qualifiers = {}
- qualifiers_raw = parts["qualifiers"]
- if qualifiers_raw:
- for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP):
- try:
- k, v = qualifier.split("=")
- except ValueError:
- raise ValidationError(
- "Invalid SWHID: invalid qualifier: %(qualifier)s",
- params={"qualifier": qualifier},
- )
- _qualifiers[k] = v
-
- return SWHID(
- parts["scheme"],
- int(parts["version"]),
- _swhid_type_map[parts["object_type"]],
- parts["object_id"],
- _qualifiers, # type: ignore # mypy can't properly unify types
- )
diff --git a/swh/model/model.py b/swh/model/model.py
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -18,7 +18,6 @@
from .collections import ImmutableDict
from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes
from .identifiers import (
- SWHID,
directory_identifier,
normalize_timestamp,
parse_swhid,
@@ -26,6 +25,7 @@
revision_identifier,
snapshot_identifier,
)
+from .swhid import SWHID, SWHIDObjectType
class MissingData(Exception):
@@ -274,6 +274,10 @@
def unique_key(self) -> KeyType:
return {"url": self.url}
+ @property
+ def id(self) -> bytes:
+ return self.url.encode()
+
@attr.s(frozen=True, slots=True)
class OriginVisit(BaseModel):
@@ -706,6 +710,10 @@
def unique_key(self) -> KeyType:
return self.sha1 # TODO: use a dict of hashes
+ @property
+ def id(self):
+ return self.sha1_git
+
@attr.s(frozen=True, slots=True)
class SkippedContent(BaseContent):
@@ -850,25 +858,12 @@
return {"name": self.name, "version": self.version}
-class MetadataTargetType(Enum):
- """The type of object extrinsic metadata refer to."""
-
- CONTENT = "content"
- DIRECTORY = "directory"
- REVISION = "revision"
- RELEASE = "release"
- SNAPSHOT = "snapshot"
- ORIGIN = "origin"
-
-
@attr.s(frozen=True, slots=True)
class RawExtrinsicMetadata(BaseModel):
object_type: Final = "raw_extrinsic_metadata"
# target object
- type = attr.ib(type=MetadataTargetType, validator=type_validator())
- target = attr.ib(type=Union[str, SWHID], validator=type_validator())
- """URL if type=MetadataTargetType.ORIGIN, else core SWHID"""
+ target = attr.ib(type=SWHID, validator=[type_validator()])
# source
discovery_date = attr.ib(type=datetime.datetime, validator=type_validator())
@@ -880,23 +875,23 @@
metadata = attr.ib(type=bytes, validator=type_validator())
# context
- origin = attr.ib(type=Optional[str], default=None, validator=type_validator())
- visit = attr.ib(type=Optional[int], default=None, validator=type_validator())
- snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- path = attr.ib(type=Optional[bytes], default=None, validator=type_validator())
- directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
+ origin = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ visit = attr.ib(type=Optional[int], default=None, validator=[type_validator()])
+ snapshot = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ release = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ revision = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ path = attr.ib(type=Optional[bytes], default=None, validator=[type_validator()])
+ directory = attr.ib(
+ type=Optional[SWHID], default=None, validator=[type_validator()]
+ )
+
+ @property
+ def type(self):
+ return self.target.object_type
@target.validator
def check_target(self, attribute, value):
- if self.type == MetadataTargetType.ORIGIN:
- if isinstance(value, SWHID) or value.startswith("swh:"):
- raise ValueError(
- "Got SWHID as target for origin metadata (expected an URL)."
- )
- else:
- self._check_swhid(self.type.value, value)
+ self._check_swhid(self.type, value)
@discovery_date.validator
def check_discovery_date(self, attribute, value):
@@ -909,24 +904,11 @@
if value is None:
return
- if self.type not in (
- MetadataTargetType.SNAPSHOT,
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
- ):
- raise ValueError(
- f"Unexpected 'origin' context for {self.type.value} object: {value}"
- )
+ if self.type == SWHIDObjectType.ORIGIN:
+ raise ValueError(f"Unexpected 'origin' context for object: {value}")
- if value.startswith("swh:"):
- # Technically this is valid; but:
- # 1. SWHIDs are URIs, not URLs
- # 2. if a SWHID gets here, it's very likely to be a mistake
- # (and we can remove this check if it turns out there is a
- # legitimate use for it).
- raise ValueError(f"SWHID used as context origin URL: {value}")
+ if value.object_type != SWHIDObjectType.ORIGIN:
+ raise ValueError(f"Non origin SWHID used as context origin: {value}")
@visit.validator
def check_visit(self, attribute, value):
@@ -934,15 +916,13 @@
return
if self.type not in (
- MetadataTargetType.SNAPSHOT,
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.SNAPSHOT,
+ SWHIDObjectType.RELEASE,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'visit' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'visit' context for object: {value}")
if self.origin is None:
raise ValueError("'origin' context must be set if 'visit' is.")
@@ -956,16 +936,14 @@
return
if self.type not in (
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.RELEASE,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'snapshot' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'snapshot' context for object: {value}")
- self._check_swhid("snapshot", value)
+ self._check_swhid(SWHIDObjectType.SNAPSHOT, value)
@release.validator
def check_release(self, attribute, value):
@@ -973,58 +951,51 @@
return
if self.type not in (
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'release' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'release' context for object: {value}")
- self._check_swhid("release", value)
+ self._check_swhid(SWHIDObjectType.RELEASE, value)
@revision.validator
def check_revision(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'revision' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'revision' context for object: {value}")
- self._check_swhid("revision", value)
+ self._check_swhid(SWHIDObjectType.REVISION, value)
@path.validator
def check_path(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'path' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'path' context for object: {value}")
@directory.validator
def check_directory(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'directory' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'directory' context for object: {value}")
- self._check_swhid("directory", value)
+ self._check_swhid(SWHIDObjectType.DIRECTORY, value)
def _check_swhid(self, expected_object_type, swhid):
+
if isinstance(swhid, str):
raise ValueError(f"Expected SWHID, got a string: {swhid}")
if swhid.object_type != expected_object_type:
raise ValueError(
- f"Expected SWHID type '{expected_object_type}', "
- f"got '{swhid.object_type}' in {swhid}"
+ f"Expected SWHID type '{expected_object_type.value}', "
+ f"got '{swhid.object_type.value}' in {swhid}"
)
if swhid.metadata:
@@ -1049,26 +1020,20 @@
@classmethod
def from_dict(cls, d):
+
d = {
**d,
- "type": MetadataTargetType(d["type"]),
"authority": MetadataAuthority.from_dict(d["authority"]),
"fetcher": MetadataFetcher.from_dict(d["fetcher"]),
}
-
- if d["type"] != MetadataTargetType.ORIGIN:
- d["target"] = parse_swhid(d["target"])
-
- swhid_keys = ("snapshot", "release", "revision", "directory")
- for swhid_key in swhid_keys:
- if d.get(swhid_key):
- d[swhid_key] = parse_swhid(d[swhid_key])
-
+ swhid_keys = ("target", "snapshot", "release", "revision", "directory")
+ for k in swhid_keys:
+ if k in d and isinstance(d[k], str):
+ d[k] = parse_swhid(d[k])
return super().from_dict(d)
def unique_key(self) -> KeyType:
return {
- "type": self.type.value,
"target": str(self.target),
"authority_type": self.authority.type.value,
"authority_url": self.authority.url,
diff --git a/swh/model/swhid.py b/swh/model/swhid.py
new file mode 100644
--- /dev/null
+++ b/swh/model/swhid.py
@@ -0,0 +1,254 @@
+# Copyright (C) 2015-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from enum import Enum
+import re
+from typing import Any, Dict, Union
+
+import attr
+from attrs_strict import type_validator
+
+from .collections import ImmutableDict
+from .exceptions import ValidationError
+from .fields.hashes import validate_sha1
+from .hashutil import hash_to_hex
+
+ORIGIN = "origin"
+SNAPSHOT = "snapshot"
+REVISION = "revision"
+RELEASE = "release"
+DIRECTORY = "directory"
+CONTENT = "content"
+
+SWHID_NAMESPACE = "swh"
+SWHID_VERSION = 1
+SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
+SWHID_SEP = ":"
+SWHID_CTXT_SEP = ";"
+SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
+
+SWHID_RE_RAW = (
+ f"(?P<scheme>{SWHID_NAMESPACE})"
+ f"{SWHID_SEP}(?P<version>{SWHID_VERSION})"
+ f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
+ f"{SWHID_SEP}(?P<object_id>[0-9a-f]+)"
+ f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
+)
+SWHID_RE = re.compile(SWHID_RE_RAW)
+
+
+class SWHIDObjectType(Enum):
+ """The type of object extrinsic metadata refer to."""
+
+ CONTENT = "cnt"
+ DIRECTORY = "dir"
+ REVISION = "rev"
+ RELEASE = "rel"
+ SNAPSHOT = "snp"
+ ORIGIN = "ori"
+
+
+_object_type_id_map = {
+ SWHIDObjectType.ORIGIN: "id",
+ SWHIDObjectType.SNAPSHOT: "id",
+ SWHIDObjectType.RELEASE: "id",
+ SWHIDObjectType.REVISION: "id",
+ SWHIDObjectType.DIRECTORY: "id",
+ SWHIDObjectType.CONTENT: "sha1_git",
+}
+
+
+_swhid_type_map = {
+ "ori": SWHIDObjectType.ORIGIN,
+ "snp": SWHIDObjectType.SNAPSHOT,
+ "rel": SWHIDObjectType.RELEASE,
+ "rev": SWHIDObjectType.REVISION,
+ "dir": SWHIDObjectType.DIRECTORY,
+ "cnt": SWHIDObjectType.CONTENT,
+ "origin": SWHIDObjectType.ORIGIN,
+ "snapshot": SWHIDObjectType.SNAPSHOT,
+ "release": SWHIDObjectType.RELEASE,
+ "revision": SWHIDObjectType.REVISION,
+ "directory": SWHIDObjectType.DIRECTORY,
+ "content": SWHIDObjectType.CONTENT,
+}
+
+
+@attr.s(frozen=True)
+class SWHID:
+ """
+ Named tuple holding the relevant info associated to a SoftWare Heritage
+ persistent IDentifier (SWHID)
+
+ Args:
+ namespace (str): the namespace of the identifier, defaults to ``swh``
+ scheme_version (int): the scheme version of the identifier,
+ defaults to 1
+ object_type (str): the type of object the identifier points to,
+ either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot``
+ object_id (str): object's identifier
+ metadata (dict): optional dict filled with metadata related to
+ pointed object
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ Once created, it contains the following attributes:
+
+ Attributes:
+ namespace (str): the namespace of the identifier
+ scheme_version (int): the scheme version of the identifier
+ object_type (str): the type of object the identifier points to
+ object_id (str): hexadecimal representation of the object hash
+ metadata (dict): metadata related to the pointed object
+
+ To get the raw SWHID string from an instance of this named tuple,
+ use the :func:`str` function::
+
+ swhid = SWHID(
+ object_type='content',
+ object_id='8ff44f081d43176474b267de5451f2c2e88089d0'
+ )
+ swhid_str = str(swhid)
+ # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
+ """
+
+ object_type = attr.ib(type=SWHIDObjectType, validator=type_validator())
+ object_id = attr.ib(type=str, converter=hash_to_hex) # type: ignore
+ namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
+ scheme_version = attr.ib(type=int, default=SWHID_VERSION)
+ metadata = attr.ib(
+ type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict()
+ )
+
+ @namespace.validator
+ def check_namespace(self, attribute, value):
+ if value != SWHID_NAMESPACE:
+ raise ValidationError(
+ "Invalid SWHID: invalid namespace: %(namespace)s",
+ params={"namespace": value},
+ )
+
+ @scheme_version.validator
+ def check_scheme_version(self, attribute, value):
+ if value != SWHID_VERSION:
+ raise ValidationError(
+ "Invalid SWHID: invalid version: %(version)s", params={"version": value}
+ )
+
+ @object_id.validator
+ def check_object_id(self, attribute, value):
+ if self.object_type == SWHIDObjectType.ORIGIN:
+ return
+ try:
+ validate_sha1(value) # can raise if invalid hash
+ except ValidationError:
+ raise ValidationError(
+ "Invalid SWHID: invalid checksum: %(object_id)s",
+ params={"object_id": value},
+ ) from None
+
+ @metadata.validator
+ def check_qualifiers(self, attribute, value):
+ for k in value:
+ if k not in SWHID_QUALIFIERS:
+ raise ValidationError(
+ "Invalid SWHID: unknown qualifier: %(qualifier)s",
+ params={"qualifier": k},
+ )
+
+ def to_dict(self) -> Dict[str, Any]:
+ return attr.asdict(self)
+
+ def __str__(self) -> str:
+ swhid = SWHID_SEP.join(
+ [
+ self.namespace,
+ str(self.scheme_version),
+ self.object_type.value,
+ self.object_id,
+ ]
+ )
+ if self.metadata:
+ for k, v in self.metadata.items():
+ swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v)
+ return swhid
+
+
+def swhid(
+ object_type: Union[SWHIDObjectType, str],
+ object_id: Union[str, Dict[str, Any]],
+ scheme_version: int = 1,
+ metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(),
+) -> str:
+ """Compute :ref:`persistent-identifiers`
+
+ Args:
+ object_type: object's type (a SWHIDObjectType),
+ object_id: object's identifier
+ scheme_version: SWHID scheme version, defaults to 1
+ metadata: metadata related to the pointed object
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ Returns:
+ the SWHID of the object
+
+ """
+ if isinstance(object_type, str):
+ object_type = _swhid_type_map[object_type]
+ if isinstance(object_id, dict):
+ object_id = object_id[_object_type_id_map[object_type]]
+ swhid = SWHID(
+ scheme_version=scheme_version,
+ object_type=object_type,
+ object_id=object_id,
+ metadata=metadata, # type: ignore # mypy can't properly unify types
+ )
+ return str(swhid)
+
+
+def parse_swhid(swhid: str) -> SWHID:
+ """Parse a Software Heritage identifier (SWHID) from string (see:
+ :ref:`persistent-identifiers`.)
+
+ Args:
+ swhid (str): A persistent identifier
+
+ Returns:
+ a named tuple holding the parsing result
+
+ Raises:
+ swh.model.exceptions.ValidationError: if passed string is not a valid SWHID
+
+ """
+ m = SWHID_RE.fullmatch(swhid)
+ if not m:
+ raise ValidationError(
+ "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid}
+ )
+ parts = m.groupdict()
+
+ _qualifiers = {}
+ qualifiers_raw = parts["qualifiers"]
+ if qualifiers_raw:
+ for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP):
+ try:
+ k, v = qualifier.split("=")
+ except ValueError:
+ raise ValidationError(
+ "Invalid SWHID: invalid qualifier: %(qualifier)s",
+ params={"qualifier": qualifier},
+ )
+ _qualifiers[k] = v
+
+ return SWHID(
+ namespace=parts["scheme"],
+ scheme_version=int(parts["version"]),
+ object_type=_swhid_type_map[parts["object_type"]],
+ object_id=parts["object_id"],
+ metadata=_qualifiers, # type: ignore # mypy can't properly unify types
+ )
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -9,7 +9,6 @@
import attr
from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
BaseModel,
Content,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -35,6 +33,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
UTC = datetime.timezone.utc
@@ -310,8 +309,9 @@
RAW_EXTRINSIC_METADATA = [
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target="http://example.org/foo.git",
+ target=SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=b"http://example.org/foo.git"
+ ),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
@@ -319,9 +319,9 @@
metadata=b'{"foo": "bar"}',
),
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=SWHID(
- object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git)
+ object_type=SWHIDObjectType.CONTENT,
+ object_id=hash_to_hex(CONTENTS[0].sha1_git),
),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -13,15 +13,8 @@
from swh.model import hashutil, identifiers
from swh.model.exceptions import ValidationError
from swh.model.hashutil import hash_to_bytes as _x
-from swh.model.identifiers import (
- CONTENT,
- DIRECTORY,
- RELEASE,
- REVISION,
- SNAPSHOT,
- SWHID,
- normalize_timestamp,
-)
+from swh.model.identifiers import normalize_timestamp
+from swh.model.swhid import SWHID, SWHIDObjectType
def remove_id(d: Dict) -> Dict:
@@ -897,77 +890,77 @@
for full_type, _hash, expected_swhid, version, _meta in [
(
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
_snapshot_id,
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
None,
{},
),
(
- RELEASE,
+ SWHIDObjectType.RELEASE,
_release_id,
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
1,
{},
),
(
- REVISION,
+ SWHIDObjectType.REVISION,
_revision_id,
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
None,
{},
),
(
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
_directory_id,
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
None,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content_id,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
1,
{},
),
(
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
_snapshot,
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
None,
{},
),
(
- RELEASE,
+ SWHIDObjectType.RELEASE,
_release,
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
1,
{},
),
(
- REVISION,
+ SWHIDObjectType.REVISION,
_revision,
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
None,
{},
),
(
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
_directory,
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
None,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
1,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1",
1,
@@ -988,9 +981,10 @@
_snapshot = {"id": _snapshot_id}
for _type, _hash in [
- (SNAPSHOT, _snapshot_id),
- (SNAPSHOT, _snapshot),
- ("lines", "42"),
+ (SWHIDObjectType.SNAPSHOT, _snapshot_id),
+ (SWHIDObjectType.SNAPSHOT, _snapshot),
+ ("snapshot", "42"),
+ ("snp", "42"),
]:
with self.assertRaises(ValidationError):
identifiers.swhid(_type, _hash)
@@ -999,31 +993,31 @@
for swhid, _type, _version, _hash in [
(
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- CONTENT,
+ SWHIDObjectType.CONTENT,
1,
"94a9ed024d3859793618152ea559a168bbcbb5e2",
),
(
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
1,
"d198bc9d7a6bcf6db04f476d29314f157507d505",
),
(
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
- REVISION,
+ SWHIDObjectType.REVISION,
1,
"309cf2674ee7a0749978cf8265ab91a60aea0f7d",
),
(
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
- RELEASE,
+ SWHIDObjectType.RELEASE,
1,
"22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
),
(
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
1,
"c7c108084bc0bf3d81436bf980b46e98bd338453",
),
@@ -1041,14 +1035,14 @@
for swhid, _type, _version, _hash, _metadata in [
(
"swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa
- CONTENT,
+ SWHIDObjectType.CONTENT,
1,
"9c95815d9e9d91b8dae8e05d8bbc696fe19f796b",
{"lines": "1-18", "origin": "https://github.com/python/cpython"},
),
(
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
1,
"0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
{"origin": "deb://Debian/packages/linuxdoc-tools"},
@@ -1125,10 +1119,24 @@
@pytest.mark.parametrize(
"ns,version,type,id",
[
- ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",),
+ (
+ "foo",
+ 1,
+ SWHIDObjectType.CONTENT,
+ "abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 2,
+ SWHIDObjectType.DIRECTORY,
+ "def8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 1,
+ SWHIDObjectType.SNAPSHOT,
+ "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ ),
],
)
def test_SWHID_class_validation_error(ns, version, type, id):
@@ -1141,27 +1149,35 @@
def test_swhid_hash():
object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2"
- assert hash(SWHID(object_type="directory", object_id=object_id)) == hash(
- SWHID(object_type="directory", object_id=object_id)
- )
+ assert hash(
+ SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id)
+ ) == hash(SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id))
assert hash(
- SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
) == hash(
- SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
)
# Different order of the dictionary, so the underlying order of the tuple in
# ImmutableDict is different.
assert hash(
SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id=object_id,
metadata={"origin": "https://example.com", "lines": "42"},
)
) == hash(
SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id=object_id,
metadata={"lines": "42", "origin": "https://example.com"},
)
@@ -1171,14 +1187,26 @@
def test_swhid_eq():
object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2"
- assert SWHID(object_type="directory", object_id=object_id) == SWHID(
- object_type="directory", object_id=object_id
+ assert SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY.DIRECTORY, object_id=object_id
)
assert SWHID(
- object_type="directory", object_id=object_id, metadata=dummy_qualifiers,
- ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ ) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
assert SWHID(
- object_type="directory", object_id=object_id, metadata=dummy_qualifiers,
- ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ ) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -15,9 +15,7 @@
from swh.model.hashutil import MultiHash, hash_to_bytes
import swh.model.hypothesis_strategies as strategies
from swh.model.identifiers import (
- SWHID,
directory_identifier,
- parse_swhid,
release_identifier,
revision_identifier,
snapshot_identifier,
@@ -29,7 +27,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
MissingData,
Origin,
OriginVisit,
@@ -43,6 +40,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.model.tests.test_identifiers import (
directory_example,
release_example,
@@ -781,7 +779,8 @@
)
_metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",)
_content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2")
-_origin_url = "https://forge.softwareheritage.org/source/swh-model.git"
+_origin_url = b"https://forge.softwareheritage.org/source/swh-model.git"
+_origin_swhid = SWHID(object_type=SWHIDObjectType.ORIGIN, object_id=_origin_url)
_dummy_qualifiers = {"origin": "https://example.com", "lines": "42"}
_common_metadata_fields = dict(
discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
@@ -796,15 +795,11 @@
"""Checks valid RawExtrinsicMetadata objects don't raise an error."""
# Simplest case
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields
- )
+ RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields)
# Object with an SWHID
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- **_common_metadata_fields,
+ target=_content_swhid, **_common_metadata_fields,
)
@@ -819,23 +814,18 @@
"metadata": b'{"origin": "https://example.com", "lines": "42"}',
}
- m = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields,
- )
+ m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
- "type": "origin",
- "target": _origin_url,
+ "target": (
+ "swh:1:ori:68747470733a2f2f666f7267652e736f667477617265686572"
+ "69746167652e6f72672f736f757263652f7377682d6d6f64656c2e676974"
+ ),
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
- m = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- **_common_metadata_fields,
- )
+ m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
- "type": "content",
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**common_fields,
}
@@ -845,54 +835,18 @@
def test_metadata_invalid_target():
"""Checks various invalid values for the 'target' field."""
- # SWHID for an origin
- with pytest.raises(ValueError, match="expected an URL"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_content_swhid,
- **_common_metadata_fields,
- )
-
- # SWHID for an origin (even when passed as string)
- with pytest.raises(ValueError, match="expected an URL"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- **_common_metadata_fields,
- )
-
- # URL for a non-origin
- with pytest.raises(ValueError, match="Expected SWHID, got a string"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_origin_url,
- **_common_metadata_fields,
- )
-
# SWHID passed as string instead of SWHID
- with pytest.raises(ValueError, match="Expected SWHID, got a string"):
+ with pytest.raises(AttributeTypeError):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**_common_metadata_fields,
)
- # Object type does not match the SWHID
- with pytest.raises(
- ValueError, match="Expected SWHID type 'revision', got 'content'"
- ):
- RawExtrinsicMetadata(
- type=MetadataTargetType.REVISION,
- target=_content_swhid,
- **_common_metadata_fields,
- )
-
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=SWHID(
- object_type="content",
+ object_type=SWHIDObjectType.CONTENT,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -903,8 +857,7 @@
def test_metadata_naive_datetime():
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
+ target=_origin_swhid,
**{**_common_metadata_fields, "discovery_date": datetime.datetime.now()},
)
@@ -913,30 +866,21 @@
"""Checks validation of RawExtrinsicMetadata.origin."""
# Origins can't have an 'origin' context
- with pytest.raises(
- ValueError, match="Unexpected 'origin' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'origin' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- origin=_origin_url,
- **_common_metadata_fields,
+ target=_origin_swhid, origin=_origin_swhid, **_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- origin=_origin_url,
- **_common_metadata_fields,
+ target=_content_swhid, origin=_origin_swhid, **_common_metadata_fields,
)
- # SWHIDs aren't valid origin URLs
- with pytest.raises(ValueError, match="SWHID used as context origin URL"):
+ # non origin SWHIDs aren't valid origin context
+ with pytest.raises(ValueError, match="Non origin SWHID used as context origin:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ origin=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -945,21 +889,15 @@
"""Checks validation of RawExtrinsicMetadata.visit."""
# Origins can't have a 'visit' context
- with pytest.raises(
- ValueError, match="Unexpected 'visit' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'visit' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- visit=42,
- **_common_metadata_fields,
+ target=_origin_swhid, visit=42, **_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin=_origin_url,
+ origin=_origin_swhid,
visit=42,
**_common_metadata_fields,
)
@@ -967,18 +905,14 @@
# Missing 'origin'
with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- visit=42,
- **_common_metadata_fields,
+ target=_content_swhid, visit=42, **_common_metadata_fields,
)
# visit id must be positive
with pytest.raises(ValueError, match="Nonpositive visit id"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin=_origin_url,
+ origin=_origin_swhid,
visit=-42,
**_common_metadata_fields,
)
@@ -988,36 +922,26 @@
"""Checks validation of RawExtrinsicMetadata.snapshot."""
# Origins can't have a 'snapshot' context
- with pytest.raises(
- ValueError, match="Unexpected 'snapshot' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'snapshot' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- snapshot=SWHID(
- object_type="snapshot",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- snapshot=SWHID(
- object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
snapshot=SWHID(
- object_type="snapshot",
+ object_type=SWHIDObjectType.SNAPSHOT,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1025,16 +949,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'snapshot', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'snp', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- snapshot=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ snapshot=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1043,36 +961,26 @@
"""Checks validation of RawExtrinsicMetadata.release."""
# Origins can't have a 'release' context
- with pytest.raises(
- ValueError, match="Unexpected 'release' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'release' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- release=SWHID(
- object_type="release",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- release=SWHID(
- object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
release=SWHID(
- object_type="release",
+ object_type=SWHIDObjectType.RELEASE,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1080,16 +988,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'release', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'rel', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- release=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ release=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1098,36 +1000,26 @@
"""Checks validation of RawExtrinsicMetadata.revision."""
# Origins can't have a 'revision' context
- with pytest.raises(
- ValueError, match="Unexpected 'revision' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'revision' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- revision=SWHID(
- object_type="revision",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- revision=SWHID(
- object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
revision=SWHID(
- object_type="revision",
+ object_type=SWHIDObjectType.REVISION,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1135,16 +1027,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'revision', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'rev', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- revision=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ revision=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1153,20 +1039,14 @@
"""Checks validation of RawExtrinsicMetadata.path."""
# Origins can't have a 'path' context
- with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"):
+ with pytest.raises(ValueError, match="Unexpected 'path' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- path=b"/foo/bar",
- **_common_metadata_fields,
+ target=_origin_swhid, path=b"/foo/bar", **_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- path=b"/foo/bar",
- **_common_metadata_fields,
+ target=_content_swhid, path=b"/foo/bar", **_common_metadata_fields,
)
@@ -1174,37 +1054,26 @@
"""Checks validation of RawExtrinsicMetadata.directory."""
# Origins can't have a 'directory' context
- with pytest.raises(
- ValueError, match="Unexpected 'directory' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'directory' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- directory=SWHID(
- object_type="directory",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- directory=SWHID(
- object_type="directory",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
directory=SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1212,15 +1081,9 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'directory', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'dir', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- directory=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ directory=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 7:05 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219373

Event Timeline