Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124256
D5118.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Subscribers
None
D5118.diff
View Options
diff --git a/swh/model/identifiers.py b/swh/model/identifiers.py
--- a/swh/model/identifiers.py
+++ b/swh/model/identifiers.py
@@ -24,11 +24,10 @@
class ObjectType(enum.Enum):
- """Possible object types of a QualifiedSWHID.
+ """Possible object types of a QualifiedSWHID or CoreSWHID.
The values of each variant is what is used in the SWHID's string representation."""
- ORIGIN = "ori"
SNAPSHOT = "snp"
REVISION = "rev"
RELEASE = "rel"
@@ -36,6 +35,20 @@
CONTENT = "cnt"
+class ExtendedObjectType(enum.Enum):
+ """Possible object types of an ExtendedSWHID.
+
+ The variants are a superset of :cls:`ObjectType`'s"""
+
+ SNAPSHOT = "snp"
+ REVISION = "rev"
+ RELEASE = "rel"
+ DIRECTORY = "dir"
+ CONTENT = "cnt"
+ ORIGIN = "ori"
+ RAW_EXTRINSIC_METADATA = "emd"
+
+
# The following are deprecated aliases of the variants defined in ObjectType
# while transitioning from SWHID to QualifiedSWHID
ORIGIN = "origin"
@@ -44,10 +57,12 @@
RELEASE = "release"
DIRECTORY = "directory"
CONTENT = "content"
+RAW_EXTRINSIC_METADATA = "raw_extrinsic_metadata"
SWHID_NAMESPACE = "swh"
SWHID_VERSION = 1
-SWHID_TYPES = ["ori", "snp", "rel", "rev", "dir", "cnt"]
+SWHID_TYPES = ["snp", "rel", "rev", "dir", "cnt"]
+EXTENDED_SWHID_TYPES = SWHID_TYPES + ["ori", "emd"]
SWHID_SEP = ":"
SWHID_CTXT_SEP = ";"
SWHID_QUALIFIERS = {"origin", "anchor", "visit", "path", "lines"}
@@ -55,7 +70,7 @@
SWHID_RE_RAW = (
f"(?P<namespace>{SWHID_NAMESPACE})"
f"{SWHID_SEP}(?P<scheme_version>{SWHID_VERSION})"
- f"{SWHID_SEP}(?P<object_type>{'|'.join(SWHID_TYPES)})"
+ f"{SWHID_SEP}(?P<object_type>{'|'.join(EXTENDED_SWHID_TYPES)})"
f"{SWHID_SEP}(?P<object_id>[0-9a-f]{{40}})"
f"({SWHID_CTXT_SEP}(?P<qualifiers>\\S+))?"
)
@@ -706,6 +721,7 @@
REVISION: {"short_name": "rev", "key_id": "id"},
DIRECTORY: {"short_name": "dir", "key_id": "id"},
CONTENT: {"short_name": "cnt", "key_id": "sha1_git"},
+ RAW_EXTRINSIC_METADATA: {"short_name": "emd", "key_id": "id"},
}
_swhid_type_map = {
@@ -715,6 +731,7 @@
"rev": REVISION,
"dir": DIRECTORY,
"cnt": CONTENT,
+ "emd": RAW_EXTRINSIC_METADATA,
}
@@ -988,6 +1005,93 @@
raise ValidationError(*e.args) from None
+@attr.s(frozen=True, kw_only=True)
+class ExtendedSWHID:
+ """
+ Dataclass holding the relevant info associated to a SoftWare Heritage
+ persistent IDentifier (SWHID).
+
+ It extends `CoreSWHID`, by allowing non-standard object types; and should
+ only be used internally to Software Heritage.
+
+ Raises:
+ swh.model.exceptions.ValidationError: In case of invalid object type or id
+
+ To get the raw SWHID string from an instance of this class,
+ use the :func:`str` function:
+
+ >>> swhid = ExtendedSWHID(
+ ... object_type=ExtendedObjectType.CONTENT,
+ ... object_id=bytes.fromhex('8ff44f081d43176474b267de5451f2c2e88089d0'),
+ ... )
+ >>> str(swhid)
+ 'swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0'
+
+ And vice-versa with :meth:`CoreSWHID.from_string`:
+
+ >>> swhid == ExtendedSWHID.from_string(
+ ... "swh:1:cnt:8ff44f081d43176474b267de5451f2c2e88089d0"
+ ... )
+ True
+ """
+
+ namespace = attr.ib(type=str, default=SWHID_NAMESPACE)
+ """the namespace of the identifier, defaults to ``swh``"""
+
+ scheme_version = attr.ib(type=int, default=SWHID_VERSION)
+ """the scheme version of the identifier, defaults to 1"""
+
+ object_type = attr.ib(
+ type=ExtendedObjectType,
+ validator=type_validator(),
+ converter=ExtendedObjectType,
+ )
+ """the type of object the identifier points to"""
+
+ object_id = attr.ib(type=bytes, validator=type_validator())
+ """object's identifier"""
+
+ @namespace.validator
+ def check_namespace(self, attribute, value):
+ if value != SWHID_NAMESPACE:
+ raise ValidationError(
+ "Invalid SWHID: invalid namespace: %(namespace)s",
+ params={"namespace": value},
+ )
+
+ @scheme_version.validator
+ def check_scheme_version(self, attribute, value):
+ if value != SWHID_VERSION:
+ raise ValidationError(
+ "Invalid SWHID: invalid version: %(version)s", params={"version": value}
+ )
+
+ @object_id.validator
+ def check_object_id(self, attribute, value):
+ if len(value) != 20:
+ raise ValidationError(
+ "Invalid SWHID: invalid checksum: %(object_id)s",
+ params={"object_id": hash_to_hex(value)},
+ )
+
+ def __str__(self) -> str:
+ return SWHID_SEP.join(
+ [
+ self.namespace,
+ str(self.scheme_version),
+ self.object_type.value,
+ hash_to_hex(self.object_id),
+ ]
+ )
+
+ @classmethod
+ def from_string(cls, s: str) -> ExtendedSWHID:
+ parts = _parse_swhid(s)
+ if parts.pop("qualifiers"):
+ raise ValidationError("ExtendedSWHID does not support qualifiers.")
+ return ExtendedSWHID(**parts)
+
+
@attr.s(frozen=True)
class SWHID:
"""
diff --git a/swh/model/tests/test_identifiers.py b/swh/model/tests/test_identifiers.py
--- a/swh/model/tests/test_identifiers.py
+++ b/swh/model/tests/test_identifiers.py
@@ -21,6 +21,8 @@
SNAPSHOT,
SWHID,
CoreSWHID,
+ ExtendedObjectType,
+ ExtendedSWHID,
ObjectType,
QualifiedSWHID,
normalize_timestamp,
@@ -1277,6 +1279,8 @@
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:ori:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:emd:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;invalid;malformed",
"swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1:snp:foo",
@@ -1444,6 +1448,8 @@
"foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:ori:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:emd:fed8bc9d7a6bcf6db04f476d29314f157507d505",
"swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;visit=swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", # noqa
"swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
"swh:1:snp:foo",
@@ -1503,3 +1509,152 @@
assert CoreSWHID(
object_type=ObjectType.DIRECTORY, object_id=object_id,
) == CoreSWHID(object_type=ObjectType.DIRECTORY, object_id=object_id,)
+
+
+def test_parse_serialize_extended_swhid():
+ for swhid, _type, _version, _hash in [
+ (
+ "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2",
+ ExtendedObjectType.CONTENT,
+ 1,
+ _x("94a9ed024d3859793618152ea559a168bbcbb5e2"),
+ ),
+ (
+ "swh:1:dir:d198bc9d7a6bcf6db04f476d29314f157507d505",
+ ExtendedObjectType.DIRECTORY,
+ 1,
+ _x("d198bc9d7a6bcf6db04f476d29314f157507d505"),
+ ),
+ (
+ "swh:1:rev:309cf2674ee7a0749978cf8265ab91a60aea0f7d",
+ ExtendedObjectType.REVISION,
+ 1,
+ _x("309cf2674ee7a0749978cf8265ab91a60aea0f7d"),
+ ),
+ (
+ "swh:1:rel:22ece559cc7cc2364edc5e5593d63ae8bd229f9f",
+ ExtendedObjectType.RELEASE,
+ 1,
+ _x("22ece559cc7cc2364edc5e5593d63ae8bd229f9f"),
+ ),
+ (
+ "swh:1:snp:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.SNAPSHOT,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ (
+ "swh:1:ori:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.ORIGIN,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ (
+ "swh:1:emd:c7c108084bc0bf3d81436bf980b46e98bd338453",
+ ExtendedObjectType.RAW_EXTRINSIC_METADATA,
+ 1,
+ _x("c7c108084bc0bf3d81436bf980b46e98bd338453"),
+ ),
+ ]:
+ expected_result = ExtendedSWHID(
+ namespace="swh",
+ scheme_version=_version,
+ object_type=_type,
+ object_id=_hash,
+ )
+ actual_result = ExtendedSWHID.from_string(swhid)
+ assert actual_result == expected_result
+ assert str(expected_result) == str(actual_result) == swhid
+
+
+@pytest.mark.parametrize(
+ "invalid_swhid",
+ [
+ "swh:1:cnt",
+ "swh:1:",
+ "swh:",
+ "swh:1:cnt:",
+ "foo:1:cnt:abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:2:dir:def8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:foo:fed8bc9d7a6bcf6db04f476d29314f157507d505",
+ "swh:1:dir:0b6959356d30f1a4e9b7f6bca59b9a336464c03d;visit=swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d", # noqa
+ "swh:1:snp:gh6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ "swh:1:snp:foo",
+ "swh:1: dir: 0b6959356d30f1a4e9b7f6bca59b9a336464c03d",
+ ],
+)
+def test_parse_extended_swhid_parsing_error(invalid_swhid):
+ with pytest.raises(ValidationError):
+ ExtendedSWHID.from_string(invalid_swhid)
+
+
+@pytest.mark.filterwarnings("ignore:.*SWHID.*:DeprecationWarning")
+@pytest.mark.parametrize(
+ "ns,version,type,id",
+ [
+ (
+ "foo",
+ 1,
+ ExtendedObjectType.CONTENT,
+ "abc8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ (
+ "swh",
+ 2,
+ ExtendedObjectType.CONTENT,
+ "def8bc9d7a6bcf6db04f476d29314f157507d505",
+ ),
+ ("swh", 1, ExtendedObjectType.DIRECTORY, "aaaa"),
+ ],
+)
+def test_ExtendedSWHID_validation_error(ns, version, type, id):
+ with pytest.raises(ValidationError):
+ ExtendedSWHID(
+ namespace=ns, scheme_version=version, object_type=type, object_id=_x(id),
+ )
+
+
+def test_ExtendedSWHID_hash():
+ object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+ )
+
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ )
+
+ # Different order of the dictionary, so the underlying order of the tuple in
+ # ImmutableDict is different.
+ assert hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ ) == hash(
+ ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+ )
+
+
+def test_ExtendedSWHID_eq():
+ object_id = _x("94a9ed024d3859793618152ea559a168bbcbb5e2")
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id)
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+
+ assert ExtendedSWHID(
+ object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,
+ ) == ExtendedSWHID(object_type=ExtendedObjectType.DIRECTORY, object_id=object_id,)
+
+
+def test_object_types():
+ """Checks ExtendedObjectType is a superset of ObjectType"""
+ for member in ObjectType:
+ assert getattr(ExtendedObjectType, member.name).value == member.value
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 2:59 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3217295
Attached To
D5118: Add new class ExtendedSWHID as an alternative to SWHID/QualifiedSWHID
Event Timeline
Log In to Comment