Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124323
D4985.id17891.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
58 KB
Subscribers
None
D4985.id17891.diff
View Options
diff --git a/swh/model/cli.py b/swh/model/cli.py
--- a/swh/model/cli.py
+++ b/swh/model/cli.py
@@ -12,7 +12,7 @@
import click
from swh.core.cli import swh as swh_cli_group
-from swh.model.identifiers import SWHID
+from swh.model.swhid import SWHID, SWHIDObjectType, swhid
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
@@ -44,18 +44,16 @@
def swhid_of_file(path):
from swh.model.from_disk import Content
- from swh.model.identifiers import CONTENT, swhid
object = Content.from_file(path=path).get_data()
- return swhid(CONTENT, object)
+ return swhid(SWHIDObjectType.CONTENT, object)
def swhid_of_file_content(data):
from swh.model.from_disk import Content
- from swh.model.identifiers import CONTENT, swhid
object = Content.from_bytes(mode=644, data=data).get_data()
- return swhid(CONTENT, object)
+ return swhid(SWHIDObjectType.CONTENT, object)
def swhid_of_dir(path: bytes, exclude_patterns: List[bytes] = None) -> str:
@@ -64,7 +62,6 @@
accept_all_directories,
ignore_directories_patterns,
)
- from swh.model.identifiers import DIRECTORY, swhid
dir_filter = (
ignore_directories_patterns(path, exclude_patterns)
@@ -73,20 +70,25 @@
)
object = Directory.from_disk(path=path, dir_filter=dir_filter).get_data()
- return swhid(DIRECTORY, object)
+ return swhid(SWHIDObjectType.DIRECTORY, object)
def swhid_of_origin(url):
- from swh.model.identifiers import SWHID, origin_identifier
+ from swh.model.identifiers import origin_identifier
- return str(SWHID(object_type="origin", object_id=origin_identifier({"url": url})))
+ return str(
+ SWHID(
+ object_type=SWHIDObjectType.ORIGIN,
+ object_id=origin_identifier({"url": url}),
+ )
+ )
def swhid_of_git_repo(path):
import dulwich.repo
from swh.model import hashutil
- from swh.model.identifiers import SWHID, snapshot_identifier
+ from swh.model.identifiers import snapshot_identifier
repo = dulwich.repo.Repo(path)
@@ -109,7 +111,12 @@
snapshot = {"branches": branches}
- return str(SWHID(object_type="snapshot", object_id=snapshot_identifier(snapshot)))
+ return str(
+ SWHID(
+ object_type=SWHIDObjectType.SNAPSHOT,
+ object_id=snapshot_identifier(snapshot),
+ )
+ )
def identify_object(obj_type, follow_symlinks, exclude_patterns, obj):
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -7,38 +7,12 @@
import datetime
from functools import lru_cache
import hashlib
-import re
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
-
-import attr
-
-from .collections import ImmutableDict
-from .exceptions import ValidationError
-from .fields.hashes import validate_sha1
-from .hashutil import MultiHash, hash_git_data, hash_to_hex
-
-ORIGIN = "origin"
-SNAPSHOT = "snapshot"
-REVISION = "revision"
-RELEASE = "release"
-DIRECTORY = "directory"
-CONTENT = "content"
-
-SWHID_NAMESPACE = "swh"
-SWHID_VERSION = 1
-SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
-SWHID_SEP = ":"
-SWHID_CTXT_SEP = ";"
-SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
-
-SWHID_RE_RAW = (
- f"(?P<scheme>{SWHID_NAMESPACE})"
- f"{SWHID_SEP}(?P<version>{SWHID_VERSION})"
- f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
- f"{SWHID_SEP}(?P<object_id>[0-9a-f]{{40}})"
- f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
-)
-SWHID_RE = re.compile(SWHID_RE_RAW)
+from typing import Iterable, List, Optional, Tuple
+
+# for bw compat
+from swh.model.swhid import SWHID, SWHID_RE, SWHID_TYPES, parse_swhid, swhid # noqa
+
+from .hashutil import MultiHash, hash_git_data
@lru_cache()
@@ -676,203 +650,3 @@
"""
return hashlib.sha1(origin["url"].encode("utf-8")).hexdigest()
-
-
-_object_type_map = {
- ORIGIN: {"short_name": "ori", "key_id": "id"},
- SNAPSHOT: {"short_name": "snp", "key_id": "id"},
- RELEASE: {"short_name": "rel", "key_id": "id"},
- REVISION: {"short_name": "rev", "key_id": "id"},
- DIRECTORY: {"short_name": "dir", "key_id": "id"},
- CONTENT: {"short_name": "cnt", "key_id": "sha1_git"},
-}
-
-_swhid_type_map = {
- "ori": ORIGIN,
- "snp": SNAPSHOT,
- "rel": RELEASE,
- "rev": REVISION,
- "dir": DIRECTORY,
- "cnt": CONTENT,
-}
-
-
-@attr.s(frozen=True)
-class SWHID:
- """
- Named tuple holding the relevant info associated to a SoftWare Heritage
- persistent IDentifier (SWHID)
-
- Args:
- namespace (str): the namespace of the identifier, defaults to ``swh``
- scheme_version (int): the scheme version of the identifier,
- defaults to 1
- object_type (str): the type of object the identifier points to,
- either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot``
- object_id (str): object's identifier
- metadata (dict): optional dict filled with metadata related to
- pointed object
-
- Raises:
- swh.model.exceptions.ValidationError: In case of invalid object type or id
-
- Once created, it contains the following attributes:
-
- Attributes:
- namespace (str): the namespace of the identifier
- scheme_version (int): the scheme version of the identifier
- object_type (str): the type of object the identifier points to
- object_id (str): hexadecimal representation of the object hash
- metadata (dict): metadata related to the pointed object
-
- To get the raw SWHID string from an instance of this named tuple,
- use the :func:`str` function::
-
- swhid = SWHID(
- object_type='content',
- object_id='8ff44f081d43176474b267de5451f2c2e88089d0'
- )
- swhid_str = str(swhid)
- # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
- """
-
- namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
- scheme_version = attr.ib(type=int, default=SWHID_VERSION)
- object_type = attr.ib(type=str, default="")
- object_id = attr.ib(type=str, converter=hash_to_hex, default="") # type: ignore
- metadata = attr.ib(
- type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict()
- )
-
- @namespace.validator
- def check_namespace(self, attribute, value):
- if value != SWHID_NAMESPACE:
- raise ValidationError(
- "Invalid SWHID: invalid namespace: %(namespace)s",
- params={"namespace": value},
- )
-
- @scheme_version.validator
- def check_scheme_version(self, attribute, value):
- if value != SWHID_VERSION:
- raise ValidationError(
- "Invalid SWHID: invalid version: %(version)s", params={"version": value}
- )
-
- @object_type.validator
- def check_object_type(self, attribute, value):
- if value not in _object_type_map:
- raise ValidationError(
- "Invalid SWHID: invalid type: %(object_type)s)",
- params={"object_type": value},
- )
-
- @object_id.validator
- def check_object_id(self, attribute, value):
- try:
- validate_sha1(value) # can raise if invalid hash
- except ValidationError:
- raise ValidationError(
- "Invalid SWHID: invalid checksum: %(object_id)s",
- params={"object_id": value},
- ) from None
-
- @metadata.validator
- def check_qualifiers(self, attribute, value):
- for k in value:
- if k not in SWHID_QUALIFIERS:
- raise ValidationError(
- "Invalid SWHID: unknown qualifier: %(qualifier)s",
- params={"qualifier": k},
- )
-
- def to_dict(self) -> Dict[str, Any]:
- return attr.asdict(self)
-
- def __str__(self) -> str:
- o = _object_type_map.get(self.object_type)
- assert o
- swhid = SWHID_SEP.join(
- [self.namespace, str(self.scheme_version), o["short_name"], self.object_id]
- )
- if self.metadata:
- for k, v in self.metadata.items():
- swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v)
- return swhid
-
-
-def swhid(
- object_type: str,
- object_id: Union[str, Dict[str, Any]],
- scheme_version: int = 1,
- metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(),
-) -> str:
- """Compute :ref:`persistent-identifiers`
-
- Args:
- object_type: object's type, either ``content``, ``directory``,
- ``release``, ``revision`` or ``snapshot``
- object_id: object's identifier
- scheme_version: SWHID scheme version, defaults to 1
- metadata: metadata related to the pointed object
-
- Raises:
- swh.model.exceptions.ValidationError: In case of invalid object type or id
-
- Returns:
- the SWHID of the object
-
- """
- if isinstance(object_id, dict):
- o = _object_type_map[object_type]
- object_id = object_id[o["key_id"]]
- swhid = SWHID(
- scheme_version=scheme_version,
- object_type=object_type,
- object_id=object_id,
- metadata=metadata, # type: ignore # mypy can't properly unify types
- )
- return str(swhid)
-
-
-def parse_swhid(swhid: str) -> SWHID:
- """Parse a Software Heritage identifier (SWHID) from string (see:
- :ref:`persistent-identifiers`.)
-
- Args:
- swhid (str): A persistent identifier
-
- Returns:
- a named tuple holding the parsing result
-
- Raises:
- swh.model.exceptions.ValidationError: if passed string is not a valid SWHID
-
- """
- m = SWHID_RE.fullmatch(swhid)
- if not m:
- raise ValidationError(
- "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid}
- )
- parts = m.groupdict()
-
- _qualifiers = {}
- qualifiers_raw = parts["qualifiers"]
- if qualifiers_raw:
- for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP):
- try:
- k, v = qualifier.split("=")
- except ValueError:
- raise ValidationError(
- "Invalid SWHID: invalid qualifier: %(qualifier)s",
- params={"qualifier": qualifier},
- )
- _qualifiers[k] = v
-
- return SWHID(
- parts["scheme"],
- int(parts["version"]),
- _swhid_type_map[parts["object_type"]],
- parts["object_id"],
- _qualifiers, # type: ignore # mypy can't properly unify types
- )
diff --git a/swh/model/model.py b/swh/model/model.py
--- a/swh/model/model.py
+++ b/swh/model/model.py
@@ -18,7 +18,6 @@
from .collections import ImmutableDict
from .hashutil import DEFAULT_ALGORITHMS, MultiHash, hash_to_bytes
from .identifiers import (
- SWHID,
directory_identifier,
normalize_timestamp,
parse_swhid,
@@ -26,6 +25,7 @@
revision_identifier,
snapshot_identifier,
)
+from .swhid import SWHID, SWHIDObjectType
class MissingData(Exception):
@@ -274,6 +274,10 @@
def unique_key(self) -> KeyType:
return {"url": self.url}
+ @property
+ def id(self) -> bytes:
+ return self.url.encode()
+
@attr.s(frozen=True, slots=True)
class OriginVisit(BaseModel):
@@ -706,6 +710,10 @@
def unique_key(self) -> KeyType:
return self.sha1 # TODO: use a dict of hashes
+ @property
+ def id(self):
+ return self.sha1_git
+
@attr.s(frozen=True, slots=True)
class SkippedContent(BaseContent):
@@ -850,25 +858,12 @@
return {"name": self.name, "version": self.version}
-class MetadataTargetType(Enum):
- """The type of object extrinsic metadata refer to."""
-
- CONTENT = "content"
- DIRECTORY = "directory"
- REVISION = "revision"
- RELEASE = "release"
- SNAPSHOT = "snapshot"
- ORIGIN = "origin"
-
-
@attr.s(frozen=True, slots=True)
class RawExtrinsicMetadata(BaseModel):
object_type: Final = "raw_extrinsic_metadata"
# target object
- type = attr.ib(type=MetadataTargetType, validator=type_validator())
- target = attr.ib(type=Union[str, SWHID], validator=type_validator())
- """URL if type=MetadataTargetType.ORIGIN, else core SWHID"""
+ target = attr.ib(type=SWHID, validator=[type_validator()])
# source
discovery_date = attr.ib(type=datetime.datetime, validator=type_validator())
@@ -880,23 +875,23 @@
metadata = attr.ib(type=bytes, validator=type_validator())
# context
- origin = attr.ib(type=Optional[str], default=None, validator=type_validator())
- visit = attr.ib(type=Optional[int], default=None, validator=type_validator())
- snapshot = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- release = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- revision = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
- path = attr.ib(type=Optional[bytes], default=None, validator=type_validator())
- directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator())
+ origin = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ visit = attr.ib(type=Optional[int], default=None, validator=[type_validator()])
+ snapshot = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ release = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ revision = attr.ib(type=Optional[SWHID], default=None, validator=[type_validator()])
+ path = attr.ib(type=Optional[bytes], default=None, validator=[type_validator()])
+ directory = attr.ib(
+ type=Optional[SWHID], default=None, validator=[type_validator()]
+ )
+
+ @property
+ def type(self):
+ return self.target.object_type
@target.validator
def check_target(self, attribute, value):
- if self.type == MetadataTargetType.ORIGIN:
- if isinstance(value, SWHID) or value.startswith("swh:"):
- raise ValueError(
- "Got SWHID as target for origin metadata (expected an URL)."
- )
- else:
- self._check_swhid(self.type.value, value)
+ self._check_swhid(self.type, value)
@discovery_date.validator
def check_discovery_date(self, attribute, value):
@@ -909,24 +904,11 @@
if value is None:
return
- if self.type not in (
- MetadataTargetType.SNAPSHOT,
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
- ):
- raise ValueError(
- f"Unexpected 'origin' context for {self.type.value} object: {value}"
- )
+ if self.type == SWHIDObjectType.ORIGIN:
+ raise ValueError(f"Unexpected 'origin' context for object: {value}")
- if value.startswith("swh:"):
- # Technically this is valid; but:
- # 1. SWHIDs are URIs, not URLs
- # 2. if a SWHID gets here, it's very likely to be a mistake
- # (and we can remove this check if it turns out there is a
- # legitimate use for it).
- raise ValueError(f"SWHID used as context origin URL: {value}")
+ if value.object_type != SWHIDObjectType.ORIGIN:
+ raise ValueError(f"Non origin SWHID used as context origin: {value}")
@visit.validator
def check_visit(self, attribute, value):
@@ -934,15 +916,13 @@
return
if self.type not in (
- MetadataTargetType.SNAPSHOT,
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.SNAPSHOT,
+ SWHIDObjectType.RELEASE,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'visit' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'visit' context for object: {value}")
if self.origin is None:
raise ValueError("'origin' context must be set if 'visit' is.")
@@ -956,16 +936,14 @@
return
if self.type not in (
- MetadataTargetType.RELEASE,
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.RELEASE,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'snapshot' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'snapshot' context for object: {value}")
- self._check_swhid("snapshot", value)
+ self._check_swhid(SWHIDObjectType.SNAPSHOT, value)
@release.validator
def check_release(self, attribute, value):
@@ -973,58 +951,51 @@
return
if self.type not in (
- MetadataTargetType.REVISION,
- MetadataTargetType.DIRECTORY,
- MetadataTargetType.CONTENT,
+ SWHIDObjectType.REVISION,
+ SWHIDObjectType.DIRECTORY,
+ SWHIDObjectType.CONTENT,
):
- raise ValueError(
- f"Unexpected 'release' context for {self.type.value} object: {value}"
- )
+ raise ValueError(f"Unexpected 'release' context for object: {value}")
- self._check_swhid("release", value)
+ self._check_swhid(SWHIDObjectType.RELEASE, value)
@revision.validator
def check_revision(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'revision' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'revision' context for object: {value}")
- self._check_swhid("revision", value)
+ self._check_swhid(SWHIDObjectType.REVISION, value)
@path.validator
def check_path(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'path' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.DIRECTORY, SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'path' context for object: {value}")
@directory.validator
def check_directory(self, attribute, value):
if value is None:
return
- if self.type not in (MetadataTargetType.CONTENT,):
- raise ValueError(
- f"Unexpected 'directory' context for {self.type.value} object: {value}"
- )
+ if self.type not in (SWHIDObjectType.CONTENT,):
+ raise ValueError(f"Unexpected 'directory' context for object: {value}")
- self._check_swhid("directory", value)
+ self._check_swhid(SWHIDObjectType.DIRECTORY, value)
def _check_swhid(self, expected_object_type, swhid):
+
if isinstance(swhid, str):
raise ValueError(f"Expected SWHID, got a string: {swhid}")
if swhid.object_type != expected_object_type:
raise ValueError(
- f"Expected SWHID type '{expected_object_type}', "
- f"got '{swhid.object_type}' in {swhid}"
+ f"Expected SWHID type '{expected_object_type.value}', "
+ f"got '{swhid.object_type.value}' in {swhid}"
)
if swhid.metadata:
@@ -1049,26 +1020,20 @@
@classmethod
def from_dict(cls, d):
+
d = {
**d,
- "type": MetadataTargetType(d["type"]),
"authority": MetadataAuthority.from_dict(d["authority"]),
"fetcher": MetadataFetcher.from_dict(d["fetcher"]),
}
-
- if d["type"] != MetadataTargetType.ORIGIN:
- d["target"] = parse_swhid(d["target"])
-
- swhid_keys = ("snapshot", "release", "revision", "directory")
- for swhid_key in swhid_keys:
- if d.get(swhid_key):
- d[swhid_key] = parse_swhid(d[swhid_key])
-
+ swhid_keys = ("target", "snapshot", "release", "revision", "directory")
+ for k in swhid_keys:
+ if k in d and isinstance(d[k], str):
+ d[k] = parse_swhid(d[k])
return super().from_dict(d)
def unique_key(self) -> KeyType:
return {
- "type": self.type.value,
"target": str(self.target),
"authority_type": self.authority.type.value,
"authority_url": self.authority.url,
diff --git a/swh/model/swhid.py b/swh/model/swhid.py
new file mode 100644
--- /dev/null
+++ b/swh/model/swhid.py
@@ -0,0 +1,254 @@
+# Copyright (C) 2015-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from enum import Enum
+import re
+from typing import Any, Dict, Union
+
+import attr
+from attrs_strict import type_validator
+
+from .collections import ImmutableDict
+from .exceptions import ValidationError
+from .fields.hashes import validate_sha1
+from .hashutil import hash_to_hex
+
+ORIGIN = "origin"
+SNAPSHOT = "snapshot"
+REVISION = "revision"
+RELEASE = "release"
+DIRECTORY = "directory"
+CONTENT = "content"
+
+SWHID_NAMESPACE = "swh"
+SWHID_VERSION = 1
+SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
+SWHID_SEP = ":"
+SWHID_CTXT_SEP = ";"
+SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
+
+SWHID_RE_RAW = (
+ f"(?P<scheme>{SWHID_NAMESPACE})"
+ f"{SWHID_SEP}(?P<version>{SWHID_VERSION})"
+ f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
+ f"{SWHID_SEP}(?P<object_id>[0-9a-f]+)"
+ f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
+)
+SWHID_RE = re.compile(SWHID_RE_RAW)
+
+
+class SWHIDObjectType(Enum):
+ """The type of object extrinsic metadata refer to."""
+
+ CONTENT = "cnt"
+ DIRECTORY = "dir"
+ REVISION = "rev"
+ RELEASE = "rel"
+ SNAPSHOT = "snp"
+ ORIGIN = "ori"
+
+
+_object_type_id_map = {
+ SWHIDObjectType.ORIGIN: "id",
+ SWHIDObjectType.SNAPSHOT: "id",
+ SWHIDObjectType.RELEASE: "id",
+ SWHIDObjectType.REVISION: "id",
+ SWHIDObjectType.DIRECTORY: "id",
+ SWHIDObjectType.CONTENT: "sha1_git",
+}
+
+
+_swhid_type_map = {
+ "ori": SWHIDObjectType.ORIGIN,
+ "snp": SWHIDObjectType.SNAPSHOT,
+ "rel": SWHIDObjectType.RELEASE,
+ "rev": SWHIDObjectType.REVISION,
+ "dir": SWHIDObjectType.DIRECTORY,
+ "cnt": SWHIDObjectType.CONTENT,
+ "origin": SWHIDObjectType.ORIGIN,
+ "snapshot": SWHIDObjectType.SNAPSHOT,
+ "release": SWHIDObjectType.RELEASE,
+ "revision": SWHIDObjectType.REVISION,
+ "directory": SWHIDObjectType.DIRECTORY,
+ "content": SWHIDObjectType.CONTENT,
+}
+
+
+@attr.s(frozen=True)
+class SWHID:
+ """
+ Named tuple holding the relevant info associated to a SoftWare Heritage
+ persistent IDentifier (SWHID)
+
+ Args:
+ namespace (str): the namespace of the identifier, defaults to ``swh``
+ scheme_version (int): the scheme version of the identifier,
+ defaults to 1
+ object_type (str): the type of object the identifier points to,
+ either ``content``, ``directory``, ``release``, ``revision`` or ``snapshot``
+ object_id (str): object's identifier
+ metadata (dict): optional dict filled with metadata related to
+ pointed object
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ Once created, it contains the following attributes:
+
+ Attributes:
+ namespace (str): the namespace of the identifier
+ scheme_version (int): the scheme version of the identifier
+ object_type (str): the type of object the identifier points to
+ object_id (str): hexadecimal representation of the object hash
+ metadata (dict): metadata related to the pointed object
+
+ To get the raw SWHID string from an instance of this named tuple,
+ use the :func:`str` function::
+
+ swhid = SWHID(
+ object_type='content',
+ object_id='8ff44f081d43176474b267de5451f2c2e88089d0'
+ )
+ swhid_str = str(swhid)
+ # 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
+ """
+
+ object_type = attr.ib(type=SWHIDObjectType, validator=type_validator())
+ object_id = attr.ib(type=str, converter=hash_to_hex) # type: ignore
+ namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
+ scheme_version = attr.ib(type=int, default=SWHID_VERSION)
+ metadata = attr.ib(
+ type=ImmutableDict[str, Any], converter=ImmutableDict, default=ImmutableDict()
+ )
+
+ @namespace.validator
+ def check_namespace(self, attribute, value):
+ if value != SWHID_NAMESPACE:
+ raise ValidationError(
+ "Invalid SWHID: invalid namespace: %(namespace)s",
+ params={"namespace": value},
+ )
+
+ @scheme_version.validator
+ def check_scheme_version(self, attribute, value):
+ if value != SWHID_VERSION:
+ raise ValidationError(
+ "Invalid SWHID: invalid version: %(version)s", params={"version": value}
+ )
+
+ @object_id.validator
+ def check_object_id(self, attribute, value):
+ if self.object_type == SWHIDObjectType.ORIGIN:
+ return
+ try:
+ validate_sha1(value) # can raise if invalid hash
+ except ValidationError:
+ raise ValidationError(
+ "Invalid SWHID: invalid checksum: %(object_id)s",
+ params={"object_id": value},
+ ) from None
+
+ @metadata.validator
+ def check_qualifiers(self, attribute, value):
+ for k in value:
+ if k not in SWHID_QUALIFIERS:
+ raise ValidationError(
+ "Invalid SWHID: unknown qualifier: %(qualifier)s",
+ params={"qualifier": k},
+ )
+
+ def to_dict(self) -> Dict[str, Any]:
+ return attr.asdict(self)
+
+ def __str__(self) -> str:
+ swhid = SWHID_SEP.join(
+ [
+ self.namespace,
+ str(self.scheme_version),
+ self.object_type.value,
+ self.object_id,
+ ]
+ )
+ if self.metadata:
+ for k, v in self.metadata.items():
+ swhid += "%s%s=%s" % (SWHID_CTXT_SEP, k, v)
+ return swhid
+
+
+def swhid(
+ object_type: Union[SWHIDObjectType, str],
+ object_id: Union[str, Dict[str, Any]],
+ scheme_version: int = 1,
+ metadata: Union[ImmutableDict[str, Any], Dict[str, Any]] = ImmutableDict(),
+) -> str:
+ """Compute :ref:`persistent-identifiers`
+
+ Args:
+ object_type: object's type (a SWHIDObjectType),
+ object_id: object's identifier
+ scheme_version: SWHID scheme version, defaults to 1
+ metadata: metadata related to the pointed object
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ Returns:
+ the SWHID of the object
+
+ """
+ if isinstance(object_type, str):
+ object_type = _swhid_type_map[object_type]
+ if isinstance(object_id, dict):
+ object_id = object_id[_object_type_id_map[object_type]]
+ swhid = SWHID(
+ scheme_version=scheme_version,
+ object_type=object_type,
+ object_id=object_id,
+ metadata=metadata, # type: ignore # mypy can't properly unify types
+ )
+ return str(swhid)
+
+
+def parse_swhid(swhid: str) -> SWHID:
+ """Parse a Software Heritage identifier (SWHID) from string (see:
+ :ref:`persistent-identifiers`.)
+
+ Args:
+ swhid (str): A persistent identifier
+
+ Returns:
+ a named tuple holding the parsing result
+
+ Raises:
+ swh.model.exceptions.ValidationError: if passed string is not a valid SWHID
+
+ """
+ m = SWHID_RE.fullmatch(swhid)
+ if not m:
+ raise ValidationError(
+ "Invalid SWHID: invalid syntax: %(swhid)s", params={"swhid": swhid}
+ )
+ parts = m.groupdict()
+
+ _qualifiers = {}
+ qualifiers_raw = parts["qualifiers"]
+ if qualifiers_raw:
+ for qualifier in qualifiers_raw.split(SWHID_CTXT_SEP):
+ try:
+ k, v = qualifier.split("=")
+ except ValueError:
+ raise ValidationError(
+ "Invalid SWHID: invalid qualifier: %(qualifier)s",
+ params={"qualifier": qualifier},
+ )
+ _qualifiers[k] = v
+
+ return SWHID(
+ namespace=parts["scheme"],
+ scheme_version=int(parts["version"]),
+ object_type=_swhid_type_map[parts["object_type"]],
+ object_id=parts["object_id"],
+ metadata=_qualifiers, # type: ignore # mypy can't properly unify types
+ )
diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py
--- a/swh/model/tests/swh_model_data.py
+++ b/swh/model/tests/swh_model_data.py
@@ -9,7 +9,6 @@
import attr
from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex
-from swh.model.identifiers import SWHID
from swh.model.model import (
BaseModel,
Content,
@@ -18,7 +17,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
ObjectType,
Origin,
OriginVisit,
@@ -35,6 +33,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType
UTC = datetime.timezone.utc
@@ -310,8 +309,9 @@
RAW_EXTRINSIC_METADATA = [
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target="http://example.org/foo.git",
+ target=SWHID(
+ object_type=SWHIDObjectType.ORIGIN, object_id=b"http://example.org/foo.git"
+ ),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None),
@@ -319,9 +319,9 @@
metadata=b'{"foo": "bar"}',
),
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=SWHID(
- object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git)
+ object_type=SWHIDObjectType.CONTENT,
+ object_id=hash_to_hex(CONTENTS[0].sha1_git),
),
discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC),
authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None),
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -13,15 +13,8 @@
from swh.model import hashutil, identifiers
from swh.model.exceptions import ValidationError
from swh.model.hashutil import hash_to_bytes as _x
-from swh.model.identifiers import (
- CONTENT,
- DIRECTORY,
- RELEASE,
- REVISION,
- SNAPSHOT,
- SWHID,
- normalize_timestamp,
-)
+from swh.model.identifiers import normalize_timestamp
+from swh.model.swhid import SWHID, SWHIDObjectType
def remove_id(d: Dict) -> Dict:
@@ -897,77 +890,77 @@
for full_type, _hash, expected_swhid, version, _meta in [
(
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
_snapshot_id,
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
None,
{},
),
(
- RELEASE,
+ SWHIDObjectType.RELEASE,
_release_id,
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
1,
{},
),
(
- REVISION,
+ SWHIDObjectType.REVISION,
_revision_id,
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
None,
{},
),
(
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
_directory_id,
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
None,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content_id,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
1,
{},
),
(
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
_snapshot,
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
None,
{},
),
(
- RELEASE,
+ SWHIDObjectType.RELEASE,
_release,
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
1,
{},
),
(
- REVISION,
+ SWHIDObjectType.REVISION,
_revision,
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
None,
{},
),
(
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
_directory,
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
None,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
1,
{},
),
(
- CONTENT,
+ SWHIDObjectType.CONTENT,
_content,
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2;origin=1",
1,
@@ -988,9 +981,10 @@
_snapshot = {"id": _snapshot_id}
for _type, _hash in [
- (SNAPSHOT, _snapshot_id),
- (SNAPSHOT, _snapshot),
- ("lines", "42"),
+ (SWHIDObjectType.SNAPSHOT, _snapshot_id),
+ (SWHIDObjectType.SNAPSHOT, _snapshot),
+ ("snapshot", "42"),
+ ("snp", "42"),
]:
with self.assertRaises(ValidationError):
identifiers.swhid(_type, _hash)
@@ -999,31 +993,31 @@
for swhid, _type, _version, _hash in [
(
"swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- CONTENT,
+ SWHIDObjectType.CONTENT,
1,
"94a9ed024d3859793618152ea559a168bbcbb5e2",
),
(
"swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
1,
"d198bc9d7a6bcf6db04f476d29314f157507d505",
),
(
"swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
- REVISION,
+ SWHIDObjectType.REVISION,
1,
"309cf2674ee7a0749978cf8265ab91a60aea0f7d",
),
(
"swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
- RELEASE,
+ SWHIDObjectType.RELEASE,
1,
"22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
),
(
"swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
- SNAPSHOT,
+ SWHIDObjectType.SNAPSHOT,
1,
"c7c108084bc0bf3d81436bf980b46e98bd338453",
),
@@ -1041,14 +1035,14 @@
for swhid, _type, _version, _hash, _metadata in [
(
"swh:1:cnt:9c95815d9e9d91b8dae8e05d8bbc696fe19f796b;lines=1-18;origin=https://github.com/python/cpython", # noqa
- CONTENT,
+ SWHIDObjectType.CONTENT,
1,
"9c95815d9e9d91b8dae8e05d8bbc696fe19f796b",
{"lines": "1-18", "origin": "https://github.com/python/cpython"},
),
(
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;origin=deb://Debian/packages/linuxdoc-tools", # noqa
- DIRECTORY,
+ SWHIDObjectType.DIRECTORY,
1,
"0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
{"origin": "deb://Debian/packages/linuxdoc-tools"},
@@ -1125,10 +1119,24 @@
@pytest.mark.parametrize(
"ns,version,type,id",
[
- ("foo", 1, CONTENT, "abc8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 2, DIRECTORY, "def8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 1, "foo", "fed8bc9d7a6bcf6db04f476d29314f157507d505",),
- ("swh", 1, SNAPSHOT, "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",),
+ (
+ "foo",
+ 1,
+ SWHIDObjectType.CONTENT,
+ "abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 2,
+ SWHIDObjectType.DIRECTORY,
+ "def8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 1,
+ SWHIDObjectType.SNAPSHOT,
+ "gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ ),
],
)
def test_SWHID_class_validation_error(ns, version, type, id):
@@ -1141,27 +1149,35 @@
def test_swhid_hash():
object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2"
- assert hash(SWHID(object_type="directory", object_id=object_id)) == hash(
- SWHID(object_type="directory", object_id=object_id)
- )
+ assert hash(
+ SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id)
+ ) == hash(SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id))
assert hash(
- SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
) == hash(
- SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
)
# Different order of the dictionary, so the underlying order of the tuple in
# ImmutableDict is different.
assert hash(
SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id=object_id,
metadata={"origin": "https://example.com", "lines": "42"},
)
) == hash(
SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id=object_id,
metadata={"lines": "42", "origin": "https://example.com"},
)
@@ -1171,14 +1187,26 @@
def test_swhid_eq():
object_id = "94a9ed024d3859793618152ea559a168bbcbb5e2"
- assert SWHID(object_type="directory", object_id=object_id) == SWHID(
- object_type="directory", object_id=object_id
+ assert SWHID(object_type=SWHIDObjectType.DIRECTORY, object_id=object_id) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY.DIRECTORY, object_id=object_id
)
assert SWHID(
- object_type="directory", object_id=object_id, metadata=dummy_qualifiers,
- ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ ) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
assert SWHID(
- object_type="directory", object_id=object_id, metadata=dummy_qualifiers,
- ) == SWHID(object_type="directory", object_id=object_id, metadata=dummy_qualifiers,)
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ ) == SWHID(
+ object_type=SWHIDObjectType.DIRECTORY,
+ object_id=object_id,
+ metadata=dummy_qualifiers,
+ )
diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py
--- a/swh/model/tests/test_model.py
+++ b/swh/model/tests/test_model.py
@@ -15,9 +15,7 @@
from swh.model.hashutil import MultiHash, hash_to_bytes
import swh.model.hypothesis_strategies as strategies
from swh.model.identifiers import (
- SWHID,
directory_identifier,
- parse_swhid,
release_identifier,
revision_identifier,
snapshot_identifier,
@@ -29,7 +27,6 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
MissingData,
Origin,
OriginVisit,
@@ -43,6 +40,7 @@
Timestamp,
TimestampWithTimezone,
)
+from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid
from swh.model.tests.test_identifiers import (
directory_example,
release_example,
@@ -781,7 +779,8 @@
)
_metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",)
_content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2")
-_origin_url = "https://forge.softwareheritage.org/source/swh-model.git"
+_origin_url = b"https://forge.softwareheritage.org/source/swh-model.git"
+_origin_swhid = SWHID(object_type=SWHIDObjectType.ORIGIN, object_id=_origin_url)
_dummy_qualifiers = {"origin": "https://example.com", "lines": "42"}
_common_metadata_fields = dict(
discovery_date=datetime.datetime.now(tz=datetime.timezone.utc),
@@ -796,15 +795,11 @@
"""Checks valid RawExtrinsicMetadata objects don't raise an error."""
# Simplest case
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields
- )
+ RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields)
# Object with an SWHID
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- **_common_metadata_fields,
+ target=_content_swhid, **_common_metadata_fields,
)
@@ -819,23 +814,18 @@
"metadata": b'{"origin": "https://example.com", "lines": "42"}',
}
- m = RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields,
- )
+ m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
- "type": "origin",
- "target": _origin_url,
+ "target": (
+ "swh:1:ori:68747470733a2f2f666f7267652e736f667477617265686572"
+ "69746167652e6f72672f736f757263652f7377682d6d6f64656c2e676974"
+ ),
**common_fields,
}
assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m
- m = RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- **_common_metadata_fields,
- )
+ m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,)
assert m.to_dict() == {
- "type": "content",
"target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**common_fields,
}
@@ -845,54 +835,18 @@
def test_metadata_invalid_target():
"""Checks various invalid values for the 'target' field."""
- # SWHID for an origin
- with pytest.raises(ValueError, match="expected an URL"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_content_swhid,
- **_common_metadata_fields,
- )
-
- # SWHID for an origin (even when passed as string)
- with pytest.raises(ValueError, match="expected an URL"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
- **_common_metadata_fields,
- )
-
- # URL for a non-origin
- with pytest.raises(ValueError, match="Expected SWHID, got a string"):
- RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_origin_url,
- **_common_metadata_fields,
- )
-
# SWHID passed as string instead of SWHID
- with pytest.raises(ValueError, match="Expected SWHID, got a string"):
+ with pytest.raises(AttributeTypeError):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
**_common_metadata_fields,
)
- # Object type does not match the SWHID
- with pytest.raises(
- ValueError, match="Expected SWHID type 'revision', got 'content'"
- ):
- RawExtrinsicMetadata(
- type=MetadataTargetType.REVISION,
- target=_content_swhid,
- **_common_metadata_fields,
- )
-
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=SWHID(
- object_type="content",
+ object_type=SWHIDObjectType.CONTENT,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -903,8 +857,7 @@
def test_metadata_naive_datetime():
with pytest.raises(ValueError, match="must be a timezone-aware datetime"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
+ target=_origin_swhid,
**{**_common_metadata_fields, "discovery_date": datetime.datetime.now()},
)
@@ -913,30 +866,21 @@
"""Checks validation of RawExtrinsicMetadata.origin."""
# Origins can't have an 'origin' context
- with pytest.raises(
- ValueError, match="Unexpected 'origin' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'origin' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- origin=_origin_url,
- **_common_metadata_fields,
+ target=_origin_swhid, origin=_origin_swhid, **_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- origin=_origin_url,
- **_common_metadata_fields,
+ target=_content_swhid, origin=_origin_swhid, **_common_metadata_fields,
)
- # SWHIDs aren't valid origin URLs
- with pytest.raises(ValueError, match="SWHID used as context origin URL"):
+ # non origin SWHIDs aren't valid origin context
+ with pytest.raises(ValueError, match="Non origin SWHID used as context origin:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ origin=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -945,21 +889,15 @@
"""Checks validation of RawExtrinsicMetadata.visit."""
# Origins can't have a 'visit' context
- with pytest.raises(
- ValueError, match="Unexpected 'visit' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'visit' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- visit=42,
- **_common_metadata_fields,
+ target=_origin_swhid, visit=42, **_common_metadata_fields,
)
# but all other types can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin=_origin_url,
+ origin=_origin_swhid,
visit=42,
**_common_metadata_fields,
)
@@ -967,18 +905,14 @@
# Missing 'origin'
with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- visit=42,
- **_common_metadata_fields,
+ target=_content_swhid, visit=42, **_common_metadata_fields,
)
# visit id must be positive
with pytest.raises(ValueError, match="Nonpositive visit id"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- origin=_origin_url,
+ origin=_origin_swhid,
visit=-42,
**_common_metadata_fields,
)
@@ -988,36 +922,26 @@
"""Checks validation of RawExtrinsicMetadata.snapshot."""
# Origins can't have a 'snapshot' context
- with pytest.raises(
- ValueError, match="Unexpected 'snapshot' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'snapshot' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- snapshot=SWHID(
- object_type="snapshot",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- snapshot=SWHID(
- object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ snapshot=parse_swhid("swh:1:snp:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
snapshot=SWHID(
- object_type="snapshot",
+ object_type=SWHIDObjectType.SNAPSHOT,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1025,16 +949,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'snapshot', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'snp', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- snapshot=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ snapshot=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1043,36 +961,26 @@
"""Checks validation of RawExtrinsicMetadata.release."""
# Origins can't have a 'release' context
- with pytest.raises(
- ValueError, match="Unexpected 'release' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'release' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- release=SWHID(
- object_type="release",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- release=SWHID(
- object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ release=parse_swhid("swh:1:rel:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
release=SWHID(
- object_type="release",
+ object_type=SWHIDObjectType.RELEASE,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1080,16 +988,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'release', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'rel', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- release=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ release=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1098,36 +1000,26 @@
"""Checks validation of RawExtrinsicMetadata.revision."""
# Origins can't have a 'revision' context
- with pytest.raises(
- ValueError, match="Unexpected 'revision' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'revision' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- revision=SWHID(
- object_type="revision",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- revision=SWHID(
- object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2"
- ),
+ revision=parse_swhid("swh:1:rev:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
revision=SWHID(
- object_type="revision",
+ object_type=SWHIDObjectType.REVISION,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1135,16 +1027,10 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'revision', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'rev', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- revision=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ revision=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
@@ -1153,20 +1039,14 @@
"""Checks validation of RawExtrinsicMetadata.path."""
# Origins can't have a 'path' context
- with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"):
+ with pytest.raises(ValueError, match="Unexpected 'path' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- path=b"/foo/bar",
- **_common_metadata_fields,
+ target=_origin_swhid, path=b"/foo/bar", **_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
- target=_content_swhid,
- path=b"/foo/bar",
- **_common_metadata_fields,
+ target=_content_swhid, path=b"/foo/bar", **_common_metadata_fields,
)
@@ -1174,37 +1054,26 @@
"""Checks validation of RawExtrinsicMetadata.directory."""
# Origins can't have a 'directory' context
- with pytest.raises(
- ValueError, match="Unexpected 'directory' context for origin object"
- ):
+ with pytest.raises(ValueError, match="Unexpected 'directory' context for object:"):
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=_origin_url,
- directory=SWHID(
- object_type="directory",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ target=_origin_swhid,
+ directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# but content can
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- directory=SWHID(
- object_type="directory",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ directory=parse_swhid("swh:1:dir:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
# Non-core SWHID
with pytest.raises(ValueError, match="Expected core SWHID"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
directory=SWHID(
- object_type="directory",
+ object_type=SWHIDObjectType.DIRECTORY,
object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
metadata=_dummy_qualifiers,
),
@@ -1212,15 +1081,9 @@
)
# SWHID type doesn't match the expected type of this context key
- with pytest.raises(
- ValueError, match="Expected SWHID type 'directory', got 'content'"
- ):
+ with pytest.raises(ValueError, match="Expected SWHID type 'dir', got 'cnt'"):
RawExtrinsicMetadata(
- type=MetadataTargetType.CONTENT,
target=_content_swhid,
- directory=SWHID(
- object_type="content",
- object_id="94a9ed024d3859793618152ea559a168bbcbb5e2",
- ),
+ directory=parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2"),
**_common_metadata_fields,
)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 7:05 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219373
Attached To
D4985: [WIP] Add support for SWHID on Origin
Event Timeline
Log In to Comment