diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ swh.core[db,http] >= 0.5 -swh.model >= 0.10.0 +swh.model >= 1.0.0 swh.objstorage >= 0.2.2 diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py --- a/swh/storage/api/serializers.py +++ b/swh/storage/api/serializers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2020 The Software Heritage developers +# Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,7 +7,13 @@ from typing import Callable, Dict, List, Tuple -from swh.model.identifiers import SWHID, parse_swhid +from swh.model.identifiers import ( + SWHID, + CoreSWHID, + ExtendedSWHID, + QualifiedSWHID, + parse_swhid, +) import swh.model.model as model from swh.storage import interface @@ -36,7 +42,9 @@ ENCODERS: List[Tuple[type, str, Callable]] = [ (model.BaseModel, "model", _encode_model_object), (SWHID, "swhid", str), - (model.MetadataTargetType, "model_enum", _encode_enum), + (CoreSWHID, "core_swhid", str), + (ExtendedSWHID, "extended_swhid", str), + (QualifiedSWHID, "qualified_swhid", str), (model.MetadataAuthorityType, "model_enum", _encode_enum), (interface.ListOrder, "storage_enum", _encode_enum), ] @@ -44,8 +52,10 @@ DECODERS: Dict[str, Callable] = { "swhid": parse_swhid, + "core_swhid": CoreSWHID.from_string, + "extended_swhid": ExtendedSWHID.from_string, + "qualified_swhid": QualifiedSWHID.from_string, "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d), "model_enum": _decode_model_enum, - "model_enum": _decode_model_enum, "storage_enum": _decode_storage_enum, } diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -27,7 +27,7 @@ from swh.core.api.classes import stream_results from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.model.hashutil import DEFAULT_ALGORITHMS -from swh.model.identifiers import SWHID, parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedSWHID from swh.model.model import ( Content, Directory, @@ -35,7 +35,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -1172,7 +1171,7 @@ try: row = RawExtrinsicMetadataRow( - type=metadata_entry.type.value, + type=metadata_entry.target.object_type.name.lower(), target=str(metadata_entry.target), authority_type=metadata_entry.authority.type.value, authority_url=metadata_entry.authority.url, @@ -1195,26 +1194,12 @@ def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: ExtendedSWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, ) -> PagedResult[RawExtrinsicMetadata]: - if type == MetadataTargetType.ORIGIN: - if isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type='origin', " - f"but provided target is a SWHID: {target!r}" - ) - else: - if not isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type!='origin', " - f"but provided target is not a SWHID: {target!r}" - ) - if page_token is not None: (after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads( base64.b64decode(page_token) @@ -1250,7 +1235,6 @@ assert str(target) == entry.target result = RawExtrinsicMetadata( - type=MetadataTargetType(entry.type), target=target, authority=MetadataAuthority( type=MetadataAuthorityType(entry.authority_type), @@ -1264,11 +1248,11 @@ metadata=entry.metadata, origin=entry.origin, visit=entry.visit, - snapshot=map_optional(parse_swhid, entry.snapshot), - release=map_optional(parse_swhid, entry.release), - revision=map_optional(parse_swhid, entry.revision), + snapshot=map_optional(CoreSWHID.from_string, entry.snapshot), + release=map_optional(CoreSWHID.from_string, entry.release), + revision=map_optional(CoreSWHID.from_string, entry.revision), path=entry.path, - directory=map_optional(parse_swhid, entry.directory), + directory=map_optional(CoreSWHID.from_string, entry.directory), ) results.append(result) diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -5,20 +5,19 @@ import datetime from enum import Enum -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar from typing_extensions import Protocol, TypedDict, runtime_checkable from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult -from swh.model.identifiers import SWHID +from swh.model.identifiers import ExtendedSWHID from swh.model.model import ( Content, Directory, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -1099,8 +1098,7 @@ @remote_api_endpoint("raw_extrinsic_metadata/get") def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: ExtendedSWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1109,8 +1107,7 @@ """Retrieve list of all raw_extrinsic_metadata entries for the id Args: - type: one of the values of swh.model.model.MetadataTargetType - target: an URL if type is 'origin', else a core SWHID + target: the SWHID of the objects to find metadata on authority: a dict containing keys `type` and `url`. after: minimum discovery_date for a result to be returned page_token: opaque token, used to get the next page of results diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py --- a/swh/storage/migrate_extrinsic_metadata.py +++ b/swh/storage/migrate_extrinsic_metadata.py @@ -38,12 +38,17 @@ from swh.core.db import BaseDb from swh.model.hashutil import hash_to_hex -from swh.model.identifiers import SWHID, parse_swhid +from swh.model.identifiers import ( + CoreSWHID, + ExtendedObjectType, + ExtendedSWHID, + ObjectType, + QualifiedSWHID, +) from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, RawExtrinsicMetadata, Sha1Git, ) @@ -412,12 +417,11 @@ dry_run: bool, ): """Does the actual loading to swh-storage.""" - directory_swhid = SWHID( - object_type="directory", object_id=hash_to_hex(directory_id) + directory_swhid = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id ) - revision_swhid = SWHID(object_type="revision", object_id=hash_to_hex(revision_id)) + revision_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id) obj = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=directory_swhid, discovery_date=discovery_date, authority=authority, @@ -564,7 +568,7 @@ assert_origin_exists(storage, origin) # check the origin we computed matches the one in the deposit db - swhid_origin = parse_swhid(swhid).metadata["origin"] + swhid_origin = QualifiedSWHID.from_string(swhid).origin if origin is not None: # explicit list of mistakes that happened in the past, but shouldn't # happen again: diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -7,12 +7,11 @@ from typing import Any, Dict, Optional from swh.core.utils import encode_with_unescape -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Person, RawExtrinsicMetadata, @@ -295,13 +294,8 @@ def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata: - type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"]) - target = row["raw_extrinsic_metadata.target"] - if type_ != MetadataTargetType.ORIGIN: - target = parse_swhid(target) return RawExtrinsicMetadata( - type=type_, - target=target, + target=ExtendedSWHID.from_string(row["raw_extrinsic_metadata.target"]), authority=MetadataAuthority( type=MetadataAuthorityType(row["metadata_authority.type"]), url=row["metadata_authority.url"], @@ -314,9 +308,9 @@ metadata=row["raw_extrinsic_metadata.metadata"], origin=row["origin"], visit=row["visit"], - snapshot=map_optional(parse_swhid, row["snapshot"]), - release=map_optional(parse_swhid, row["release"]), - revision=map_optional(parse_swhid, row["revision"]), + snapshot=map_optional(CoreSWHID.from_string, row["snapshot"]), + release=map_optional(CoreSWHID.from_string, row["release"]), + revision=map_optional(CoreSWHID.from_string, row["revision"]), path=row["path"], - directory=map_optional(parse_swhid, row["directory"]), + directory=map_optional(CoreSWHID.from_string, row["directory"]), ) diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -9,7 +9,7 @@ from contextlib import contextmanager import datetime import itertools -from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple import attr import psycopg2 @@ -19,7 +19,7 @@ from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.core.db.common import db_transaction, db_transaction_generator from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID +from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID from swh.model.model import ( SHA1_SIZE, Content, @@ -27,7 +27,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -1232,13 +1231,13 @@ ) -> None: metadata = list(metadata) self.journal_writer.raw_extrinsic_metadata_add(metadata) - counter = Counter[MetadataTargetType]() + counter = Counter[ExtendedObjectType]() for metadata_entry in metadata: authority_id = self._get_authority_id(metadata_entry.authority, db, cur) fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) db.raw_extrinsic_metadata_add( - type=metadata_entry.type.value, + type=metadata_entry.target.object_type.name.lower(), target=str(metadata_entry.target), discovery_date=metadata_entry.discovery_date, authority_id=authority_id, @@ -1254,20 +1253,19 @@ directory=map_optional(str, metadata_entry.directory), cur=cur, ) - counter[metadata_entry.type] += 1 + counter[metadata_entry.target.object_type] += 1 for (type, count) in counter.items(): send_metric( f"{type.value}_metadata:add", count=count, - method_name=f"{type.value}_metadata_add", + method_name=f"{type.name.lower()}_metadata_add", ) @db_transaction() def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: ExtendedSWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1275,19 +1273,6 @@ db=None, cur=None, ) -> PagedResult[RawExtrinsicMetadata]: - if type == MetadataTargetType.ORIGIN: - if isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type='origin', " - f"but provided target is a SWHID: {target!r}" - ) - else: - if not isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type!='origin', " - f"but provided target is not a SWHID: {target!r}" - ) - if page_token: (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) if after and after_time < after: diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py @@ -11,12 +11,11 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) @@ -32,7 +31,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def test_cran_package_from_url(): @@ -118,7 +119,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 7, 15, 27, 38, 652281, tzinfo=datetime.timezone.utc, @@ -128,7 +128,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269" ), ), @@ -207,7 +207,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 4, 30, 11, 1, 57, 832481, tzinfo=datetime.timezone.utc, @@ -217,7 +216,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e" ), ), @@ -285,7 +284,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 9, 25, 14, 4, 20, 926667, tzinfo=datetime.timezone.utc, @@ -295,7 +293,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py @@ -15,12 +15,11 @@ import attr import pytest -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -48,7 +47,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def now(): @@ -437,7 +438,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, @@ -447,7 +447,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" ), ), @@ -554,7 +554,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc @@ -564,7 +563,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py @@ -11,12 +11,11 @@ import json from unittest.mock import MagicMock, Mock, call -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) @@ -51,7 +50,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def get_mock_deposit_cur(row_dicts): @@ -186,7 +187,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc @@ -196,7 +196,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" ), ), @@ -205,7 +205,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc @@ -215,7 +214,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" ), ), @@ -352,7 +351,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc @@ -362,7 +360,7 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" ), ), @@ -371,7 +369,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc @@ -381,7 +378,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" ), ), @@ -517,7 +514,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc @@ -527,7 +523,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" ), ), @@ -536,7 +532,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc @@ -546,7 +541,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" ), ), @@ -681,7 +676,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc @@ -691,7 +685,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" ), ), @@ -700,7 +694,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc @@ -710,7 +703,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" ), ), @@ -856,7 +849,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc @@ -866,7 +858,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), ), @@ -875,7 +867,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc @@ -885,7 +876,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), ), @@ -894,7 +885,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc @@ -904,7 +894,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), ), @@ -1075,7 +1065,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc @@ -1085,7 +1074,7 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" ), ), @@ -1183,7 +1172,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc @@ -1193,7 +1181,7 @@ format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" ), ), @@ -1316,7 +1304,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc @@ -1326,7 +1313,7 @@ format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" ), ), @@ -1335,7 +1322,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc @@ -1345,7 +1331,7 @@ format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py @@ -11,12 +11,11 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) @@ -32,7 +31,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def test_gnu(): @@ -92,7 +93,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc @@ -102,7 +102,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py @@ -11,12 +11,11 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) @@ -37,7 +36,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def test_nixguix(): @@ -90,7 +91,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc @@ -100,7 +100,7 @@ format="nixguix-sources-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" ), ), @@ -109,7 +109,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc @@ -119,7 +118,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py @@ -11,12 +11,11 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) @@ -38,7 +37,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def test_npm_package_from_source_url(): @@ -142,7 +143,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, @@ -152,7 +152,7 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" ), ), @@ -161,7 +161,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, @@ -171,7 +170,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" ), ), @@ -247,7 +246,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, @@ -257,7 +255,7 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" ), ), @@ -266,7 +264,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, @@ -276,7 +273,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" ), ), @@ -348,7 +345,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, @@ -358,7 +354,7 @@ format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" ), ), @@ -367,7 +363,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, @@ -377,7 +372,7 @@ format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), origin=origin_url, - revision=parse_swhid( + revision=CoreSWHID.from_string( "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" ), ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -13,12 +13,11 @@ import attr -from swh.model.identifiers import parse_swhid +from swh.model.identifiers import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -46,7 +45,9 @@ ) DIRECTORY_ID = b"a" * 20 -DIRECTORY_SWHID = parse_swhid("swh:1:dir:" + DIRECTORY_ID.hex()) +DIRECTORY_SWHID = ExtendedSWHID( + object_type=ExtendedObjectType.DIRECTORY, object_id=DIRECTORY_ID +) def now(): @@ -280,13 +281,14 @@ deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") + revision_swhid = CoreSWHID.from_string( + "swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517" + ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, @@ -302,11 +304,10 @@ next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, @@ -403,13 +404,14 @@ handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") + revision_swhid = CoreSWHID.from_string( + "swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca" + ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, @@ -425,11 +427,10 @@ next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, @@ -509,17 +510,18 @@ deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") + revision_swhid = CoreSWHID.from_string( + "swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2" + ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, @@ -621,17 +623,18 @@ deposit_cur = None handle_row(copy.deepcopy(row), storage, deposit_cur, dry_run=False) - revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") + revision_swhid = CoreSWHID.from_string( + "swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2" + ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -9,8 +9,8 @@ import attr from swh.model import from_disk -from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import parse_swhid +from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import ExtendedObjectType, ExtendedSWHID from swh.model.model import ( Content, Directory, @@ -18,7 +18,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -463,8 +462,9 @@ snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot) content_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + target=ExtendedSWHID( + object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git + ), origin=origin.url, discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc @@ -475,8 +475,9 @@ metadata=b'{"foo": "bar"}', ) content_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + target=ExtendedSWHID( + object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git + ), origin=origin2.url, discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc @@ -487,8 +488,9 @@ metadata=b"foo: bar", ) content_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, - target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), + target=ExtendedSWHID( + object_type=ExtendedObjectType.CONTENT, object_id=content.sha1_git + ), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -498,10 +500,10 @@ metadata=b"foo: bar", origin=origin.url, visit=42, - snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"), - release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"), - revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision.id)}"), - directory=parse_swhid(f"swh:1:dir:{hash_to_hex(directory.id)}"), + snapshot=snapshot.swhid(), + release=release.swhid(), + revision=revision.swhid(), + directory=directory.swhid(), path=b"/foo/bar", ) @@ -512,8 +514,7 @@ ) origin_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=Origin(origin.url).swhid(), discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), @@ -523,8 +524,7 @@ metadata=b'{"foo": "bar"}', ) origin_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=Origin(origin.url).swhid(), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -534,8 +534,7 @@ metadata=b"foo: bar", ) origin_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=Origin(origin.url).swhid(), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -19,11 +19,9 @@ from swh.model import from_disk from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects -from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -3380,17 +3378,13 @@ authority = sample_data.metadata_authority content_metadata = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add(content_metadata) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority + content.swhid().to_extended(), authority ) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( @@ -3412,9 +3406,6 @@ fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) new_content_metadata2 = RawExtrinsicMetadata.from_dict( { @@ -3431,7 +3422,7 @@ swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority + content.swhid().to_extended(), authority ) assert result.next_page_token is None @@ -3453,12 +3444,10 @@ content1_metadata3, ) = sample_data.content_metadata[:3] - content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) - content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) content2_metadata = RawExtrinsicMetadata.from_dict( { **remove_keys(content1_metadata2.to_dict(), ("id",)), # recompute id - "target": str(content2_swhid), + "target": str(content2.swhid()), } ) @@ -3475,7 +3464,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority + content.swhid().to_extended(), authority ) assert result.next_page_token is None assert [content1_metadata1, content1_metadata2] == list( @@ -3483,7 +3472,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority2 + content.swhid().to_extended(), authority2 ) assert result.next_page_token is None assert [content1_metadata3] == list( @@ -3491,7 +3480,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content2_swhid, authority + content2.swhid().to_extended(), authority ) assert result.next_page_token is None assert [content2_metadata] == list(result.results,) @@ -3502,16 +3491,13 @@ authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, + content.swhid().to_extended(), authority, after=content_metadata.discovery_date - timedelta(seconds=1), ) @@ -3521,8 +3507,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, + content.swhid().to_extended(), authority, after=content_metadata.discovery_date, ) @@ -3530,8 +3515,7 @@ assert result.results == [content_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, + content.swhid().to_extended(), authority, after=content_metadata2.discovery_date, ) @@ -3544,24 +3528,19 @@ authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + swh_storage.raw_extrinsic_metadata_get(content.swhid().to_extended(), authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content.swhid().to_extended(), authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, + content.swhid().to_extended(), authority, limit=1, page_token=result.next_page_token, @@ -3575,8 +3554,6 @@ authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) swh_storage.metadata_authority_add([authority]) @@ -3593,14 +3570,13 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content.swhid().to_extended(), authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, + content.swhid().to_extended(), authority, limit=1, page_token=result.next_page_token, @@ -3609,21 +3585,6 @@ assert result.results[0].to_dict() == new_content_metadata2.to_dict() assert result.results == [new_content_metadata2] - def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): - origin = sample_data.origin - fetcher = sample_data.metadata_fetcher - authority = sample_data.metadata_authority - content_metadata, content_metadata2 = sample_data.content_metadata[:2] - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, origin.url, authority - ) - def test_origin_metadata_add(self, swh_storage, sample_data): origin = sample_data.origin fetcher = sample_data.metadata_fetcher @@ -3638,7 +3599,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority + Origin(origin.url).swhid(), authority ) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ @@ -3680,7 +3641,7 @@ swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority + Origin(origin.url).swhid(), authority ) assert result.next_page_token is None @@ -3708,7 +3669,7 @@ origin2_metadata = RawExtrinsicMetadata.from_dict( { **remove_keys(origin1_metadata2.to_dict(), ("id",)), # recompute id - "target": origin2.url, + "target": str(Origin(origin2.url).swhid()), } ) @@ -3720,7 +3681,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority + Origin(origin.url).swhid(), authority ) assert result.next_page_token is None assert [origin1_metadata1, origin1_metadata2] == list( @@ -3728,7 +3689,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority2 + Origin(origin.url).swhid(), authority2 ) assert result.next_page_token is None assert [origin1_metadata3] == list( @@ -3736,7 +3697,7 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin2.url, authority + Origin(origin2.url).swhid(), authority ) assert result.next_page_token is None assert [origin2_metadata] == list(result.results,) @@ -3754,8 +3715,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + Origin(origin.url).swhid(), authority, after=origin_metadata.discovery_date - timedelta(seconds=1), ) @@ -3766,17 +3726,13 @@ ] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - after=origin_metadata.discovery_date, + Origin(origin.url).swhid(), authority, after=origin_metadata.discovery_date, ) assert result.next_page_token is None assert result.results == [origin_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + Origin(origin.url).swhid(), authority, after=origin_metadata2.discovery_date, ) @@ -3795,19 +3751,16 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + swh_storage.raw_extrinsic_metadata_get(Origin(origin.url).swhid(), authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + Origin(origin.url).swhid(), authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + Origin(origin.url).swhid(), authority, limit=1, page_token=result.next_page_token, @@ -3836,14 +3789,13 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + Origin(origin.url).swhid(), authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + Origin(origin.url).swhid(), authority, limit=1, page_token=result.next_page_token, @@ -3873,24 +3825,6 @@ with pytest.raises(StorageArgumentException, match="fetcher"): swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): - origin = sample_data.origin - authority = sample_data.metadata_authority - fetcher = sample_data.metadata_fetcher - origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] - content_metadata = sample_data.content_metadata[0] - assert swh_storage.origin_add([origin]) == {"origin:add": 1} - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - - swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, content_metadata.target, authority, - ) - class TestStorageGeneratedData: def test_generate_content_get_data(self, swh_storage, swh_contents):