Page MenuHomeSoftware Heritage

D5118.diff
No OneTemporary

D5118.diff

diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -24,11 +24,10 @@
class ObjectType(enum.Enum):
- """Possible object types of a QualifiedSWHID.
+ """Possible object types of a QualifiedSWHID or CoreSWHID.
The values of each variant is what is used in the SWHID's string representation."""
- ORIGIN = "ori"
SNAPSHOT = "snp"
REVISION = "rev"
RELEASE = "rel"
@@ -36,6 +35,20 @@
CONTENT = "cnt"
+class ExtendedObjectType(enum.Enum):
+ """Possible object types of an ExtendedSWHID.
+
+ The variants are a superset of :cls:`ObjectType`'s"""
+
+ SNAPSHOT = "snp"
+ REVISION = "rev"
+ RELEASE = "rel"
+ DIRECTORY = "dir"
+ CONTENT = "cnt"
+ ORIGIN = "ori"
+ RAW_EXTRINSIC_METADATA = "emd"
+
+
# The following are deprecated aliases of the variants defined in ObjectType
# while transitioning from SWHID to QualifiedSWHID
ORIGIN = "origin"
@@ -44,10 +57,12 @@
RELEASE = "release"
DIRECTORY = "directory"
CONTENT = "content"
+RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata"
SWHID_NAMESPACE = "swh"
SWHID_VERSION = 1
-SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
+SWHID_TYPES = ["snp", "rel", "rev", "dir", "cnt"]
+EXTENDED_SWHID_TYPES = SWHID_TYPES + ["ori", "emd"]
SWHID_SEP = ":"
SWHID_CTXT_SEP = ";"
SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
@@ -55,7 +70,7 @@
SWHID_RE_RAW = (
f"(?P<namespace>{SWHID_NAMESPACE})"
f"{SWHID_SEP}(?P<scheme_version>{SWHID_VERSION})"
- f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
+ f"{SWHID_SEP}(?P<object_type>{'|'.join(EXTENDED_SWHID_TYPES)})"
f"{SWHID_SEP}(?P<object_id>[0-9a-f]{{40}})"
f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
)
@@ -706,6 +721,7 @@
REVISION: {"short_name": "rev", "key_id": "id"},
DIRECTORY: {"short_name": "dir", "key_id": "id"},
CONTENT: {"short_name": "cnt", "key_id": "sha1_git"},
+ RAW_EXTRINSIC_METADATA: {"short_name": "emd", "key_id": "id"},
}
_swhid_type_map = {
@@ -715,6 +731,7 @@
"rev": REVISION,
"dir": DIRECTORY,
"cnt": CONTENT,
+ "emd": RAW_EXTRINSIC_METADATA,
}
@@ -988,6 +1005,93 @@
raise ValidationError(*e.args) from None
+@attr.s(frozen=True, kw_only=True)
+class ExtendedSWHID:
+ """
+ Dataclass holding the relevant info associated to a SoftWare Heritage
+ persistent IDentifier (SWHID).
+
+ It extends `CoreSWHID`, by allowing non-standard object types; and should
+ only be used internally to Software Heritage.
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ To get the raw SWHID string from an instance of this class,
+ use the :func:`str` function:
+
+ >>> swhid = ExtendedSWHID(
+ ... object_type=ExtendedObjectType.CONTENT,
+ ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'),
+ ... )
+ >>> str(swhid)
+ 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
+
+ And vice-versa with :meth:`CoreSWHID.from_string`:
+
+ >>> swhid == ExtendedSWHID.from_string(
+ ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0"
+ ... )
+ True
+ """
+
+ namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
+ """the namespace of the identifier, defaults to ``swh``"""
+
+ scheme_version = attr.ib(type=int, default=SWHID_VERSION)
+ """the scheme version of the identifier, defaults to 1"""
+
+ object_type = attr.ib(
+ type=ExtendedObjectType,
+ validator=type_validator(),
+ converter=ExtendedObjectType,
+ )
+ """the type of object the identifier points to"""
+
+ object_id = attr.ib(type=bytes, validator=type_validator())
+ """object's identifier"""
+
+ @namespace.validator
+ def check_namespace(self, attribute, value):
+ if value != SWHID_NAMESPACE:
+ raise ValidationError(
+ "Invalid SWHID: invalid namespace: %(namespace)s",
+ params={"namespace": value},
+ )
+
+ @scheme_version.validator
+ def check_scheme_version(self, attribute, value):
+ if value != SWHID_VERSION:
+ raise ValidationError(
+ "Invalid SWHID: invalid version: %(version)s", params={"version": value}
+ )
+
+ @object_id.validator
+ def check_object_id(self, attribute, value):
+ if len(value) != 20:
+ raise ValidationError(
+ "Invalid SWHID: invalid checksum: %(object_id)s",
+ params={"object_id": hash_to_hex(value)},
+ )
+
+ def __str__(self) -> str:
+ return SWHID_SEP.join(
+ [
+ self.namespace,
+ str(self.scheme_version),
+ self.object_type.value,
+ hash_to_hex(self.object_id),
+ ]
+ )
+
+ @classmethod
+ def from_string(cls, s: str) -> ExtendedSWHID:
+ parts = _parse_swhid(s)
+ if parts.pop("qualifiers"):
+ raise ValidationError("ExtendedSWHID does not support qualifiers.")
+ return ExtendedSWHID(**parts)
+
+
@attr.s(frozen=True)
class SWHID:
"""
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -21,6 +21,8 @@
SNAPSHOT,
SWHID,
CoreSWHID,
+ ExtendedObjectType,
+ ExtendedSWHID,
ObjectType,
QualifiedSWHID,
normalize_timestamp,
@@ -1277,6 +1279,8 @@
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:ori:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:emd:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed",
"swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1:snp:foo",
@@ -1444,6 +1448,8 @@
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:ori:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:emd:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;visit=swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", # noqa
"swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1:snp:foo",
@@ -1503,3 +1509,152 @@
assert CoreSWHID(
object_type=ObjectType.DIRECTORY, object_id=object_id,
) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
+
+
+def test_parse_serialize_extended_swhid():
+ for swhid, _type, _version, _hash in [
+ (
+ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ ExtendedObjectType.CONTENT,
+ 1,
+ _x("94a9ed024d3859793618152ea559a168bbcbb5e2"),
+ ),
+ (
+ "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
+ ExtendedObjectType.DIRECTORY,
+ 1,
+ _x("d198bc9d7a6bcf6db04f476d29314f157507d505"),
+ ),
+ (
+ "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
+ ExtendedObjectType.REVISION,
+ 1,
+ _x("309cf2674ee7a0749978cf8265ab91a60aea0f7d"),
+ ),
+ (
+ "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
+ ExtendedObjectType.RELEASE,
+ 1,
+ _x("22ece559cc7cc2364edc5e5593d63ae8bd229f9f"),
+ ),
+ (
+ "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.SNAPSHOT,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ (
+ "swh:1:ori:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.ORIGIN,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ (
+ "swh:1:emd:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.RAW_EXTRINSIC_METADATA,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ ]:
+ expected_result = ExtendedSWHID(
+ namespace="swh",
+ scheme_version=_version,
+ object_type=_type,
+ object_id=_hash,
+ )
+ actual_result = ExtendedSWHID.from_string(swhid)
+ assert actual_result == expected_result
+ assert str(expected_result) == str(actual_result) == swhid
+
+
+@pytest.mark.parametrize(
+ "invalid_swhid",
+ [
+ "swh:1:cnt",
+ "swh:1:",
+ "swh:",
+ "swh:1:cnt:",
+ "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;visit=swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", # noqa
+ "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ "swh:1:snp:foo",
+ "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ ],
+)
+def test_parse_extended_swhid_parsing_error(invalid_swhid):
+ with pytest.raises(ValidationError):
+ ExtendedSWHID.from_string(invalid_swhid)
+
+
+@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning")
+@pytest.mark.parametrize(
+ "ns,version,type,id",
+ [
+ (
+ "foo",
+ 1,
+ ExtendedObjectType.CONTENT,
+ "abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 2,
+ ExtendedObjectType.CONTENT,
+ "def8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ ("swh", 1, ExtendedObjectType.DIRECTORY, "aaaa"),
+ ],
+)
+def test_ExtendedSWHID_validation_error(ns, version, type, id):
+ with pytest.raises(ValidationError):
+ ExtendedSWHID(
+ namespace=ns, scheme_version=version, object_type=type, object_id=_x(id),
+ )
+
+
+def test_ExtendedSWHID_hash():
+ object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+ )
+
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ )
+
+ # Different order of the dictionary, so the underlying order of the tuple in
+ # ImmutableDict is different.
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ )
+
+
+def test_ExtendedSWHID_eq():
+ object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+
+
+def test_object_types():
+ """Checks ExtendedObjectType is a superset of ObjectType"""
+ for member in ObjectType:
+ assert getattr(ExtendedObjectType, member.name).value == member.value

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 2:59 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217295

Event Timeline