diff --git a/swh/model/model.py b/swh/model/model.py --- a/swh/model/model.py +++ b/swh/model/model.py @@ -67,7 +67,7 @@ "Helper function used by BaseModel.to_dict()" if isinstance(value, BaseModel): return value.to_dict() - elif isinstance(value, SWHID): + elif isinstance(value, (SWHID, ExtendedSWHID)): return str(value) elif isinstance(value, Enum): return value.value @@ -881,25 +881,12 @@ return {"name": self.name, "version": self.version} -class MetadataTargetType(Enum): - """The type of object extrinsic metadata refer to.""" - - CONTENT = "content" - DIRECTORY = "directory" - REVISION = "revision" - RELEASE = "release" - SNAPSHOT = "snapshot" - ORIGIN = "origin" - - @attr.s(frozen=True, slots=True) class RawExtrinsicMetadata(BaseModel): object_type: Final = "raw_extrinsic_metadata" # target object - type = attr.ib(type=MetadataTargetType, validator=type_validator()) - target = attr.ib(type=Union[str, SWHID], validator=type_validator()) - """URL if type=MetadataTargetType.ORIGIN, else core SWHID""" + target = attr.ib(type=ExtendedSWHID, validator=type_validator()) # source discovery_date = attr.ib(type=datetime.datetime, validator=type_validator()) @@ -919,16 +906,6 @@ path = attr.ib(type=Optional[bytes], default=None, validator=type_validator()) directory = attr.ib(type=Optional[SWHID], default=None, validator=type_validator()) - @target.validator - def check_target(self, attribute, value): - if self.type == MetadataTargetType.ORIGIN: - if isinstance(value, SWHID) or value.startswith("swh:"): - raise ValueError( - "Got SWHID as target for origin metadata (expected an URL)." - ) - else: - self._check_swhid(self.type.value, value) - @discovery_date.validator def check_discovery_date(self, attribute, value): """Checks the discovery_date has a timezone.""" @@ -940,15 +917,16 @@ if value is None: return - if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.SNAPSHOT, + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'origin' context for {self.type.value} object: {value}" + f"Unexpected 'origin' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) if value.startswith("swh:"): @@ -964,15 +942,16 @@ if value is None: return - if self.type not in ( - MetadataTargetType.SNAPSHOT, - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.SNAPSHOT, + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'visit' context for {self.type.value} object: {value}" + f"Unexpected 'visit' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) if self.origin is None: @@ -986,14 +965,15 @@ if value is None: return - if self.type not in ( - MetadataTargetType.RELEASE, - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.RELEASE, + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'snapshot' context for {self.type.value} object: {value}" + f"Unexpected 'snapshot' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) self._check_swhid("snapshot", value) @@ -1003,13 +983,14 @@ if value is None: return - if self.type not in ( - MetadataTargetType.REVISION, - MetadataTargetType.DIRECTORY, - MetadataTargetType.CONTENT, + if self.target.object_type not in ( + SwhidExtendedObjectType.REVISION, + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, ): raise ValueError( - f"Unexpected 'release' context for {self.type.value} object: {value}" + f"Unexpected 'release' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) self._check_swhid("release", value) @@ -1019,9 +1000,13 @@ if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): + if self.target.object_type not in ( + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, + ): raise ValueError( - f"Unexpected 'revision' context for {self.type.value} object: {value}" + f"Unexpected 'revision' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) self._check_swhid("revision", value) @@ -1031,9 +1016,13 @@ if value is None: return - if self.type not in (MetadataTargetType.DIRECTORY, MetadataTargetType.CONTENT,): + if self.target.object_type not in ( + SwhidExtendedObjectType.DIRECTORY, + SwhidExtendedObjectType.CONTENT, + ): raise ValueError( - f"Unexpected 'path' context for {self.type.value} object: {value}" + f"Unexpected 'path' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) @directory.validator @@ -1041,9 +1030,10 @@ if value is None: return - if self.type not in (MetadataTargetType.CONTENT,): + if self.target.object_type not in (SwhidExtendedObjectType.CONTENT,): raise ValueError( - f"Unexpected 'directory' context for {self.type.value} object: {value}" + f"Unexpected 'directory' context for " + f"{self.target.object_type.name.lower()} object: {value}" ) self._check_swhid("directory", value) @@ -1082,14 +1072,11 @@ def from_dict(cls, d): d = { **d, - "type": MetadataTargetType(d["type"]), + "target": ExtendedSWHID.from_string(d["target"]), "authority": MetadataAuthority.from_dict(d["authority"]), "fetcher": MetadataFetcher.from_dict(d["fetcher"]), } - if d["type"] != MetadataTargetType.ORIGIN: - d["target"] = parse_swhid(d["target"]) - swhid_keys = ("snapshot", "release", "revision", "directory") for swhid_key in swhid_keys: if d.get(swhid_key): @@ -1099,7 +1086,6 @@ def unique_key(self) -> KeyType: return { - "type": self.type.value, "target": str(self.target), "authority_type": self.authority.type.value, "authority_url": self.authority.url, diff --git a/swh/model/tests/swh_model_data.py b/swh/model/tests/swh_model_data.py --- a/swh/model/tests/swh_model_data.py +++ b/swh/model/tests/swh_model_data.py @@ -8,8 +8,8 @@ import attr -from swh.model.hashutil import MultiHash, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.hashutil import MultiHash, hash_to_bytes +from swh.model.identifiers import ExtendedSWHID from swh.model.model import ( BaseModel, Content, @@ -18,7 +18,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -310,8 +309,7 @@ RAW_EXTRINSIC_METADATA = [ RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="http://example.org/foo.git", + target=Origin("http://example.org/foo.git").swhid(), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), @@ -319,10 +317,7 @@ metadata=b'{"foo": "bar"}', ), RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=SWHID( - object_type="content", object_id=hash_to_hex(CONTENTS[0].sha1_git) - ), + target=ExtendedSWHID.from_string(str(CONTENTS[0].swhid())), discovery_date=datetime.datetime(2020, 7, 30, 17, 8, 20, tzinfo=UTC), authority=attr.evolve(METADATA_AUTHORITIES[0], metadata=None), fetcher=attr.evolve(METADATA_FETCHERS[0], metadata=None), diff --git a/swh/model/tests/test_model.py b/swh/model/tests/test_model.py --- a/swh/model/tests/test_model.py +++ b/swh/model/tests/test_model.py @@ -16,10 +16,10 @@ import swh.model.hypothesis_strategies as strategies from swh.model.identifiers import ( SWHID, + ExtendedSWHID, content_identifier, directory_identifier, origin_identifier, - parse_swhid, release_identifier, revision_identifier, snapshot_identifier, @@ -31,7 +31,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, MissingData, Origin, OriginVisit, @@ -808,8 +807,13 @@ type=MetadataAuthorityType.FORGE, url="https://forge.softwareheritage.org", ) _metadata_fetcher = MetadataFetcher(name="test-fetcher", version="0.0.1",) -_content_swhid = parse_swhid("swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2") +_content_swhid = ExtendedSWHID.from_string( + "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2" +) _origin_url = "https://forge.softwareheritage.org/source/swh-model.git" +_origin_swhid = ExtendedSWHID.from_string( + "swh:1:ori:94a9ed024d3859793618152ea559a168bbcbb5e2" +) _dummy_qualifiers = {"origin": "https://example.com", "lines": "42"} _common_metadata_fields = dict( discovery_date=datetime.datetime.now(tz=datetime.timezone.utc), @@ -824,15 +828,11 @@ """Checks valid RawExtrinsicMetadata objects don't raise an error.""" # Simplest case - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields - ) + RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields) # Object with an SWHID RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - **_common_metadata_fields, + target=_content_swhid, **_common_metadata_fields, ) @@ -847,23 +847,15 @@ "metadata": b'{"origin": "https://example.com", "lines": "42"}', } - m = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, target=_origin_url, **_common_metadata_fields, - ) + m = RawExtrinsicMetadata(target=_origin_swhid, **_common_metadata_fields,) assert m.to_dict() == { - "type": "origin", - "target": _origin_url, + "target": str(_origin_swhid), **common_fields, } assert RawExtrinsicMetadata.from_dict(m.to_dict()) == m - m = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - **_common_metadata_fields, - ) + m = RawExtrinsicMetadata(target=_content_swhid, **_common_metadata_fields,) assert m.to_dict() == { - "type": "content", "target": "swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **common_fields, } @@ -872,67 +864,18 @@ def test_metadata_invalid_target(): """Checks various invalid values for the 'target' field.""" - - # SWHID for an origin - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_content_swhid, - **_common_metadata_fields, - ) - - # SWHID for an origin (even when passed as string) - with pytest.raises(ValueError, match="expected an URL"): - RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", - **_common_metadata_fields, - ) - - # URL for a non-origin - with pytest.raises(ValueError, match="Expected SWHID, got a string"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_origin_url, - **_common_metadata_fields, - ) - # SWHID passed as string instead of SWHID - with pytest.raises(ValueError, match="Expected SWHID, got a string"): + with pytest.raises(ValueError, match="target must be.*ExtendedSWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **_common_metadata_fields, ) - # Object type does not match the SWHID - with pytest.raises( - ValueError, match="Expected SWHID type 'revision', got 'content'" - ): - RawExtrinsicMetadata( - type=MetadataTargetType.REVISION, - target=_content_swhid, - **_common_metadata_fields, - ) - - # Non-core SWHID - with pytest.raises(ValueError, match="Expected core SWHID"): - RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=SWHID( - object_type="content", - object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", - metadata=_dummy_qualifiers, - ), - **_common_metadata_fields, - ) - def test_metadata_naive_datetime(): with pytest.raises(ValueError, match="must be a timezone-aware datetime"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, **{**_common_metadata_fields, "discovery_date": datetime.datetime.now()}, ) @@ -945,24 +888,17 @@ ValueError, match="Unexpected 'origin' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - origin=_origin_url, - **_common_metadata_fields, + target=_origin_swhid, origin=_origin_url, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - origin=_origin_url, - **_common_metadata_fields, + target=_content_swhid, origin=_origin_url, **_common_metadata_fields, ) # SWHIDs aren't valid origin URLs with pytest.raises(ValueError, match="SWHID used as context origin URL"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, origin="swh:1:cnt:94a9ed024d3859793618152ea559a168bbcbb5e2", **_common_metadata_fields, @@ -977,34 +913,23 @@ ValueError, match="Unexpected 'visit' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - visit=42, - **_common_metadata_fields, + target=_origin_swhid, visit=42, **_common_metadata_fields, ) # but all other types can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - origin=_origin_url, - visit=42, - **_common_metadata_fields, + target=_content_swhid, origin=_origin_url, visit=42, **_common_metadata_fields, ) # Missing 'origin' with pytest.raises(ValueError, match="'origin' context must be set if 'visit' is"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - visit=42, - **_common_metadata_fields, + target=_content_swhid, visit=42, **_common_metadata_fields, ) # visit id must be positive with pytest.raises(ValueError, match="Nonpositive visit id"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, origin=_origin_url, visit=-42, @@ -1020,8 +945,7 @@ ValueError, match="Unexpected 'snapshot' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, snapshot=SWHID( object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", @@ -1031,7 +955,6 @@ # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, snapshot=SWHID( object_type="snapshot", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" @@ -1042,7 +965,6 @@ # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, snapshot=SWHID( object_type="snapshot", @@ -1057,7 +979,6 @@ ValueError, match="Expected SWHID type 'snapshot', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, snapshot=SWHID( object_type="content", @@ -1075,8 +996,7 @@ ValueError, match="Unexpected 'release' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, release=SWHID( object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", @@ -1086,7 +1006,6 @@ # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, release=SWHID( object_type="release", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" @@ -1097,7 +1016,6 @@ # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, release=SWHID( object_type="release", @@ -1112,7 +1030,6 @@ ValueError, match="Expected SWHID type 'release', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, release=SWHID( object_type="content", @@ -1130,8 +1047,7 @@ ValueError, match="Unexpected 'revision' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, revision=SWHID( object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", @@ -1141,7 +1057,6 @@ # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, revision=SWHID( object_type="revision", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2" @@ -1152,7 +1067,6 @@ # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, revision=SWHID( object_type="revision", @@ -1167,7 +1081,6 @@ ValueError, match="Expected SWHID type 'revision', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, revision=SWHID( object_type="content", @@ -1183,18 +1096,12 @@ # Origins can't have a 'path' context with pytest.raises(ValueError, match="Unexpected 'path' context for origin object"): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, - path=b"/foo/bar", - **_common_metadata_fields, + target=_origin_swhid, path=b"/foo/bar", **_common_metadata_fields, ) # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=_content_swhid, - path=b"/foo/bar", - **_common_metadata_fields, + target=_content_swhid, path=b"/foo/bar", **_common_metadata_fields, ) @@ -1206,8 +1113,7 @@ ValueError, match="Unexpected 'directory' context for origin object" ): RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=_origin_url, + target=_origin_swhid, directory=SWHID( object_type="directory", object_id="94a9ed024d3859793618152ea559a168bbcbb5e2", @@ -1217,7 +1123,6 @@ # but content can RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, directory=SWHID( object_type="directory", @@ -1229,7 +1134,6 @@ # Non-core SWHID with pytest.raises(ValueError, match="Expected core SWHID"): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, directory=SWHID( object_type="directory", @@ -1244,7 +1148,6 @@ ValueError, match="Expected SWHID type 'directory', got 'content'" ): RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=_content_swhid, directory=SWHID( object_type="content",