diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py --- a/swh/storage/api/serializers.py +++ b/swh/storage/api/serializers.py @@ -7,6 +7,8 @@ from typing import Callable, Dict, List, Tuple +from swh.model.collections import ImmutableDict +from swh.model.identifiers import SWHID, parse_swhid import swh.model.model as model @@ -16,11 +18,24 @@ return d +def _encode_model_enum(obj): + return { + "value": obj.value, + "__type__": type(obj).__name__, + } + + ENCODERS: List[Tuple[type, str, Callable]] = [ (model.BaseModel, "model", _encode_model_object), + (SWHID, "swhid", str), + (model.MetadataTargetType, "model_enum", _encode_model_enum), + (model.MetadataAuthorityType, "model_enum", _encode_model_enum), ] DECODERS: Dict[str, Callable] = { - "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d) + "immutabledict": ImmutableDict, + "swhid": parse_swhid, + "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d), + "model_enum": lambda d: getattr(model, d.pop("__type__"))(d["value"]), } diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -33,6 +33,7 @@ from deprecated import deprecated from swh.core.api.serializers import msgpack_loads, msgpack_dumps +from swh.model.identifiers import SWHID from swh.model.model import ( BaseContent, Content, @@ -45,6 +46,11 @@ OriginVisitStatus, Origin, SHA1_SIZE, + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + MetadataTargetType, + RawExtrinsicMetadata, ) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.storage.objstorage import ObjStorage @@ -52,7 +58,6 @@ from .converters import origin_url_to_sha1 from .exc import StorageArgumentException, HashCollision -from .extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS from .utils import get_partition_bounds_bytes from .writer import JournalWriter @@ -138,21 +143,33 @@ self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {} self._persons = {} - # {origin_url: {authority: [metadata]}} + # {object_type: {id: {authority: [metadata]}}} self._object_metadata: Dict[ - str, + MetadataTargetType, Dict[ - Hashable, - SortedList[Tuple[datetime.datetime, FetcherKey], Dict[str, Any]], + Union[str, SWHID], + Dict[ + Hashable, + SortedList[ + Tuple[datetime.datetime, FetcherKey], RawExtrinsicMetadata + ], + ], ], ] = defaultdict( lambda: defaultdict( - lambda: SortedList(key=lambda x: (x["discovery_date"], x["fetcher"])) + lambda: defaultdict( + lambda: SortedList( + key=lambda x: ( + x.discovery_date, + self._metadata_fetcher_key(x.fetcher), + ) + ) + ) ) ) # noqa - self._metadata_fetchers: Dict[FetcherKey, Dict[str, Any]] = {} - self._metadata_authorities: Dict[Hashable, Dict[str, Any]] = {} + self._metadata_fetchers: Dict[FetcherKey, MetadataFetcher] = {} + self._metadata_authorities: Dict[Hashable, MetadataAuthority] = {} self._objects = defaultdict(list) self._sorted_sha1s = SortedList[bytes, bytes]() @@ -1018,144 +1035,58 @@ def refresh_stat_counters(self): pass - def content_metadata_add( - self, - id: str, - context: Dict[str, Union[str, bytes, int]], - discovery_date: datetime.datetime, - authority: Dict[str, Any], - fetcher: Dict[str, Any], - format: str, - metadata: bytes, - ) -> None: - self._object_metadata_add( - "content", - id, - discovery_date, - authority, - fetcher, - format, - metadata, - context, - ) - - def content_metadata_get( - self, - id: str, - authority: Dict[str, str], - after: Optional[datetime.datetime] = None, - page_token: Optional[bytes] = None, - limit: int = 1000, - ) -> Dict[str, Any]: - return self._object_metadata_get( - "content", id, authority, after, page_token, limit - ) - - def origin_metadata_add( - self, - origin_url: str, - discovery_date: datetime.datetime, - authority: Dict[str, Any], - fetcher: Dict[str, Any], - format: str, - metadata: bytes, - ) -> None: - if not isinstance(origin_url, str): - raise StorageArgumentException( - "origin_url must be str, not %r" % (origin_url,) - ) + def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: + for metadata_entry in metadata: + authority_key = self._metadata_authority_key(metadata_entry.authority) + if authority_key not in self._metadata_authorities: + raise StorageArgumentException( + f"Unknown authority {metadata_entry.authority}" + ) + fetcher_key = self._metadata_fetcher_key(metadata_entry.fetcher) + if fetcher_key not in self._metadata_fetchers: + raise StorageArgumentException( + f"Unknown fetcher {metadata_entry.fetcher}" + ) - context: Dict[str, Union[str, bytes, int]] = {} # origins have no context + object_metadata_list = self._object_metadata[metadata_entry.type][ + metadata_entry.id + ][authority_key] - self._object_metadata_add( - "origin", - origin_url, - discovery_date, - authority, - fetcher, - format, - metadata, - context, - ) + for existing_object_metadata in object_metadata_list: + if ( + self._metadata_fetcher_key(existing_object_metadata.fetcher) + == fetcher_key + and existing_object_metadata.discovery_date + == metadata_entry.discovery_date + ): + # Duplicate of an existing one; ignore it. + break + else: + object_metadata_list.add(metadata_entry) - def origin_metadata_get( + def object_metadata_get( self, - origin_url: str, - authority: Dict[str, str], + object_type: MetadataTargetType, + id: Union[str, SWHID], + authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - ) -> Dict[str, Any]: - if not isinstance(origin_url, str): - raise TypeError("origin_url must be str, not %r" % (origin_url,)) - - res = self._object_metadata_get( - "origin", origin_url, authority, after, page_token, limit - ) - res["results"] = copy.deepcopy(res["results"]) - for result in res["results"]: - result["origin_url"] = result.pop("id") - - return res - - def _object_metadata_add( - self, - object_type: str, - id: str, - discovery_date: datetime.datetime, - authority: Dict[str, Any], - fetcher: Dict[str, Any], - format: str, - metadata: bytes, - context: Dict[str, Union[str, bytes, int]], - ) -> None: - check_extrinsic_metadata_context(object_type, context) - if not isinstance(metadata, bytes): - raise StorageArgumentException( - "metadata must be bytes, not %r" % (metadata,) - ) + ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: authority_key = self._metadata_authority_key(authority) - if authority_key not in self._metadata_authorities: - raise StorageArgumentException(f"Unknown authority {authority}") - fetcher_key = self._metadata_fetcher_key(fetcher) - if fetcher_key not in self._metadata_fetchers: - raise StorageArgumentException(f"Unknown fetcher {fetcher}") - - object_metadata_list = self._object_metadata[id][authority_key] - - object_metadata: Dict[str, Any] = { - "id": id, - "discovery_date": discovery_date, - "authority": authority_key, - "fetcher": fetcher_key, - "format": format, - "metadata": metadata, - } - - if CONTEXT_KEYS[object_type]: - object_metadata["context"] = context - for existing_object_metadata in object_metadata_list: - if ( - existing_object_metadata["fetcher"] == fetcher_key - and existing_object_metadata["discovery_date"] == discovery_date - ): - # Duplicate of an existing one; replace it. - existing_object_metadata.update(object_metadata) - break + if object_type == MetadataTargetType.ORIGIN: + if isinstance(id, SWHID): + raise StorageArgumentException( + f"object_metadata_get called with object_type='origin', but " + f"provided id is an SWHID: {id!r}" + ) else: - object_metadata_list.add(object_metadata) - - def _object_metadata_get( - self, - object_type: str, - id: str, - authority: Dict[str, str], - after: Optional[datetime.datetime] = None, - page_token: Optional[bytes] = None, - limit: int = 1000, - ) -> Dict[str, Any]: - authority_key = self._metadata_authority_key(authority) + if not isinstance(id, SWHID): + raise StorageArgumentException( + f"object_metadata_get called with object_type!='origin', but " + f"provided id is not an SWHID: {id!r}" + ) if page_token is not None: (after_time, after_fetcher) = msgpack_loads(page_token) @@ -1164,33 +1095,32 @@ raise StorageArgumentException( "page_token is inconsistent with the value of 'after'." ) - entries = self._object_metadata[id][authority_key].iter_after( + entries = self._object_metadata[object_type][id][authority_key].iter_after( (after_time, after_fetcher) ) elif after is not None: - entries = self._object_metadata[id][authority_key].iter_from((after,)) - entries = (entry for entry in entries if entry["discovery_date"] > after) + entries = self._object_metadata[object_type][id][authority_key].iter_from( + (after,) + ) + entries = (entry for entry in entries if entry.discovery_date > after) else: - entries = iter(self._object_metadata[id][authority_key]) + entries = iter(self._object_metadata[object_type][id][authority_key]) if limit: entries = itertools.islice(entries, 0, limit + 1) results = [] for entry in entries: - authority = self._metadata_authorities[entry["authority"]] - fetcher = self._metadata_fetchers[entry["fetcher"]] + entry_authority = self._metadata_authorities[ + self._metadata_authority_key(entry.authority) + ] + entry_fetcher = self._metadata_fetchers[ + self._metadata_fetcher_key(entry.fetcher) + ] if after: - assert entry["discovery_date"] > after + assert entry.discovery_date > after results.append( - { - **entry, - "authority": {"type": authority["type"], "url": authority["url"],}, - "fetcher": { - "name": fetcher["name"], - "version": fetcher["version"], - }, - } + attr.evolve(entry, authority=entry_authority, fetcher=entry_fetcher,) ) if len(results) > limit: @@ -1199,8 +1129,8 @@ last_result = results[-1] next_page_token: Optional[bytes] = msgpack_dumps( ( - last_result["discovery_date"], - self._metadata_fetcher_key(last_result["fetcher"]), + last_result.discovery_date, + self._metadata_fetcher_key(last_result.fetcher), ) ) else: @@ -1211,37 +1141,31 @@ "results": results, } - def metadata_fetcher_add( - self, name: str, version: str, metadata: Dict[str, Any] - ) -> None: - fetcher = { - "name": name, - "version": version, - "metadata": metadata, - } - key = self._metadata_fetcher_key(fetcher) - if key not in self._metadata_fetchers: - self._metadata_fetchers[key] = fetcher + def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher]) -> None: + for fetcher in fetchers: + key = self._metadata_fetcher_key(fetcher) + if key not in self._metadata_fetchers: + self._metadata_fetchers[key] = fetcher - def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]: + def metadata_fetcher_get( + self, name: str, version: str + ) -> Optional[MetadataFetcher]: return self._metadata_fetchers.get( - self._metadata_fetcher_key({"name": name, "version": version}) + self._metadata_fetcher_key(MetadataFetcher(name=name, version=version)) ) - def metadata_authority_add( - self, type: str, url: str, metadata: Dict[str, Any] - ) -> None: - authority = { - "type": type, - "url": url, - "metadata": metadata, - } - key = self._metadata_authority_key(authority) - self._metadata_authorities[key] = authority + def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None: + for authority in authorities: + key = self._metadata_authority_key(authority) + self._metadata_authorities[key] = authority - def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]: + def metadata_authority_get( + self, type: str, url: str + ) -> Optional[MetadataAuthority]: return self._metadata_authorities.get( - self._metadata_authority_key({"type": type, "url": url}) + self._metadata_authority_key( + MetadataAuthority(type=MetadataAuthorityType(type), url=url) + ) ) def _get_origin_url(self, origin): @@ -1266,12 +1190,12 @@ return tuple((key, content.get(key)) for key in sorted(DEFAULT_ALGORITHMS)) @staticmethod - def _metadata_fetcher_key(fetcher: Dict) -> FetcherKey: - return (fetcher["name"], fetcher["version"]) + def _metadata_fetcher_key(fetcher: MetadataFetcher) -> FetcherKey: + return (fetcher.name, fetcher.version) @staticmethod - def _metadata_authority_key(authority: Dict) -> Hashable: - return (authority["type"], authority["url"]) + def _metadata_authority_key(authority: MetadataAuthority) -> Hashable: + return (authority.type, authority.url) def diff_directories(self, from_dir, to_dir, track_renaming=False): raise NotImplementedError("InMemoryStorage.diff_directories") diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -8,6 +8,7 @@ from typing import Any, Dict, Iterable, List, Optional, Union from swh.core.api import remote_api_endpoint +from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, @@ -18,6 +19,10 @@ Release, Snapshot, SkippedContent, + MetadataAuthority, + MetadataFetcher, + MetadataTargetType, + RawExtrinsicMetadata, ) @@ -1107,118 +1112,38 @@ """Recomputes the statistics for `stat_counters`.""" ... - @remote_api_endpoint("content/metadata/add") - def content_metadata_add( - self, - id: str, - context: Dict[str, Union[str, bytes, int]], - discovery_date: datetime.datetime, - authority: Dict[str, Any], - fetcher: Dict[str, Any], - format: str, - metadata: bytes, - ) -> None: - """Add a content_metadata for the content at discovery_date, - obtained using the `fetcher` from the `authority`. - - The authority and fetcher must be known to the storage before - using this endpoint. - - If there is already content metadata for the same content, authority, - fetcher, and at the same date; the new one will be either dropped or - will replace the existing one - (it is unspecified which one of these two behaviors happens). - - Args: - discovery_date: when the metadata was fetched. - authority: a dict containing keys `type` and `url`. - fetcher: a dict containing keys `name` and `version`. - format: text field indicating the format of the content of the - metadata: blob of raw metadata - """ - ... - - @remote_api_endpoint("content/metadata/get") - def content_metadata_get( - self, - id: str, - authority: Dict[str, str], - after: Optional[datetime.datetime] = None, - page_token: Optional[bytes] = None, - limit: int = 1000, - ) -> Dict[str, Any]: - """Retrieve list of all content_metadata entries for the id - - Args: - id: the content's SWHID - authority: a dict containing keys `type` and `url`. - after: minimum discovery_date for a result to be returned - page_token: opaque token, used to get the next page of results - limit: maximum number of results to be returned - - Returns: - dict with keys `next_page_token` and `results`. - `next_page_token` is an opaque token that is used to get the - next page of results, or `None` if there are no more results. - `results` is a list of dicts in the format: - - .. code-block: python - - { - 'authority': {'type': ..., 'url': ...}, - 'fetcher': {'name': ..., 'version': ...}, - 'discovery_date': ..., - 'format': '...', - 'metadata': b'...', - 'context': { ... }, - } - - """ - ... - - @remote_api_endpoint("origin/metadata/add") - def origin_metadata_add( - self, - origin_url: str, - discovery_date: datetime.datetime, - authority: Dict[str, Any], - fetcher: Dict[str, Any], - format: str, - metadata: bytes, - ) -> None: - """Add an origin_metadata for the origin at discovery_date, - obtained using the `fetcher` from the `authority`. + @remote_api_endpoint("object_metadata/add") + def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: + """Add extrinsic metadata on objects (contents, directories, ...). The authority and fetcher must be known to the storage before using this endpoint. - If there is already origin metadata for the same origin, authority, + If there is already content metadata for the same object, authority, fetcher, and at the same date; the new one will be either dropped or will replace the existing one (it is unspecified which one of these two behaviors happens). Args: - discovery_date: when the metadata was fetched. - authority: a dict containing keys `type` and `url`. - fetcher: a dict containing keys `name` and `version`. - format: text field indicating the format of the content of the - metadata: blob of raw metadata + metadata: iterable of RawExtrinsicMetadata objects to be inserted. """ ... - @remote_api_endpoint("origin/metadata/get") - def origin_metadata_get( + @remote_api_endpoint("object_metadata/get") + def object_metadata_get( self, - origin_url: str, - authority: Dict[str, str], + object_type: MetadataTargetType, + id: Union[str, SWHID], + authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, - ) -> Dict[str, Any]: - """Retrieve list of all origin_metadata entries for the origin_url + ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: + """Retrieve list of all object_metadata entries for the id Args: - origin_url: the origin's URL + object_type: one of the values of swh.model.model.MetadataTargetType + id: an URL if object_type is 'origin', else a core SWHID authority: a dict containing keys `type` and `url`. after: minimum discovery_date for a result to be returned page_token: opaque token, used to get the next page of results @@ -1228,40 +1153,30 @@ dict with keys `next_page_token` and `results`. `next_page_token` is an opaque token that is used to get the next page of results, or `None` if there are no more results. - `results` is a list of dicts in the format: - - .. code-block: python - - { - 'authority': {'type': ..., 'url': ...}, - 'fetcher': {'name': ..., 'version': ...}, - 'discovery_date': ..., - 'format': '...', - 'metadata': b'...' - } + `results` is a list of RawExtrinsicMetadata objects.: """ ... - @remote_api_endpoint("fetcher/add") - def metadata_fetcher_add( - self, name: str, version: str, metadata: Dict[str, Any] - ) -> None: - """Add a new metadata fetcher to the storage. + @remote_api_endpoint("metadata_fetcher/add") + def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None: + """Add new metadata fetchers to the storage. - `name` and `version` together are a unique identifier of this + Their `name` and `version` together are unique identifiers of this fetcher; and `metadata` is an arbitrary dict of JSONable data - with information about this fetcher. + with information about this fetcher, which must not be `None` + (but may be empty). Args: - name: the name of the fetcher - version: version of the fetcher + fetchers: iterable of MetadataFetcher to be inserted """ ... - @remote_api_endpoint("fetcher/get") - def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]: + @remote_api_endpoint("metadata_fetcher/get") + def metadata_fetcher_get( + self, name: str, version: str + ) -> Optional[MetadataFetcher]: """Retrieve information about a fetcher Args: @@ -1269,27 +1184,30 @@ version: version of the fetcher Returns: - dictionary with keys `name`, `version`, and `metadata`; or None - if the fetcher is not known + a MetadataFetcher object (with a non-None metadata field) if it is known, + else None. """ ... - @remote_api_endpoint("authority/add") - def metadata_authority_add( - self, type: str, url: str, metadata: Dict[str, Any] - ) -> None: - """Add a metadata authority + @remote_api_endpoint("metadata_authority/add") + def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None: + """Add new metadata authorities to the storage. + + Their `type` and `url` together are unique identifiers of this + authority; and `metadata` is an arbitrary dict of JSONable data + with information about this authority, which must not be `None` + (but may be empty). Args: - type: one of "deposit", "forge", or "registry" - url: unique URI identifying the authority - metadata: JSON-encodable object + authorities: iterable of MetadataAuthority to be inserted """ ... - @remote_api_endpoint("authority/get") - def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]: + @remote_api_endpoint("metadata_authority/get") + def metadata_authority_get( + self, type: str, url: str + ) -> Optional[MetadataAuthority]: """Retrieve information about an authority Args: @@ -1297,8 +1215,8 @@ url: unique URI identifying the authority Returns: - dictionary with keys `type`, `url`, and `metadata`; or None - if the authority is not known + a MetadataAuthority object (with a non-None metadata field) if it is known, + else None. """ ... diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -6,6 +6,14 @@ import datetime from swh.model.hashutil import hash_to_bytes, hash_to_hex from swh.model import from_disk +from swh.model.identifiers import parse_swhid +from swh.model.model import ( + MetadataAuthority, + MetadataAuthorityType, + MetadataFetcher, + RawExtrinsicMetadata, + MetadataTargetType, +) class StorageData: @@ -332,27 +340,21 @@ origins = (origin, origin2) -metadata_authority = { - "type": "deposit", - "url": "http://hal.inria.example.com/", - "metadata": {"location": "France"}, -} -metadata_authority2 = { - "type": "registry", - "url": "http://wikidata.example.com/", - "metadata": {}, -} +metadata_authority = MetadataAuthority( + type=MetadataAuthorityType.DEPOSIT, + url="http://hal.inria.example.com/", + metadata={"location": "France"}, +) +metadata_authority2 = MetadataAuthority( + type=MetadataAuthorityType.REGISTRY, + url="http://wikidata.example.com/", + metadata={}, +) -metadata_fetcher = { - "name": "swh-deposit", - "version": "0.0.1", - "metadata": {"sword_version": "2"}, -} -metadata_fetcher2 = { - "name": "swh-example", - "version": "0.0.1", - "metadata": {}, -} +metadata_fetcher = MetadataFetcher( + name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"}, +) +metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1", metadata={},) date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) type_visit1 = "git" @@ -472,114 +474,82 @@ snapshots = (snapshot, empty_snapshot, complete_snapshot) -content_metadata = { - "id": f"swh:1:cnt:{cont['sha1_git']}", - "context": {"origin": origin["url"]}, - "discovery_date": datetime.datetime( +content_metadata = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + origin=origin["url"], + discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority["type"], - "url": metadata_authority["url"], - }, - "fetcher": { - "name": metadata_fetcher["name"], - "version": metadata_fetcher["version"], - }, - "format": "json", - "metadata": b'{"foo": "bar"}', -} -content_metadata2 = { - "id": f"swh:1:cnt:{cont['sha1_git']}", - "context": {"origin": origin2["url"]}, - "discovery_date": datetime.datetime( + authority=metadata_authority, + fetcher=metadata_fetcher, + format="json", + metadata=b'{"foo": "bar"}', +) +content_metadata2 = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + origin=origin2["url"], + discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority["type"], - "url": metadata_authority["url"], - }, - "fetcher": { - "name": metadata_fetcher["name"], - "version": metadata_fetcher["version"], - }, - "format": "yaml", - "metadata": b"foo: bar", -} -content_metadata3 = { - "id": f"swh:1:cnt:{cont['sha1_git']}", - "context": { - "origin": origin["url"], - "visit": 42, - "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}", - "release": f"swh:1:rel:{hash_to_hex(release['id'])}", - "revision": f"swh:1:rev:{hash_to_hex(revision['id'])}", - "directory": f"swh:1:dir:{hash_to_hex(dir['id'])}", - "path": b"/foo/bar", - }, - "discovery_date": datetime.datetime( + authority=metadata_authority, + fetcher=metadata_fetcher, + format="yaml", + metadata=b"foo: bar", +) +content_metadata3 = RawExtrinsicMetadata( + type=MetadataTargetType.CONTENT, + id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"), + discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority2["type"], - "url": metadata_authority2["url"], - }, - "fetcher": { - "name": metadata_fetcher2["name"], - "version": metadata_fetcher2["version"], - }, - "format": "yaml", - "metadata": b"foo: bar", -} - -origin_metadata = { - "origin_url": origin["url"], - "discovery_date": datetime.datetime( + authority=metadata_authority2, + fetcher=metadata_fetcher2, + format="yaml", + metadata=b"foo: bar", + origin=origin["url"], + visit=42, + snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot['id'])}"), + release=parse_swhid(f"swh:1:rel:{hash_to_hex(release['id'])}"), + revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision['id'])}"), + directory=parse_swhid(f"swh:1:dir:{hash_to_hex(dir['id'])}"), + path=b"/foo/bar", +) + +origin_metadata = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin["url"], + discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority["type"], - "url": metadata_authority["url"], - }, - "fetcher": { - "name": metadata_fetcher["name"], - "version": metadata_fetcher["version"], - }, - "format": "json", - "metadata": b'{"foo": "bar"}', -} -origin_metadata2 = { - "origin_url": origin["url"], - "discovery_date": datetime.datetime( + authority=metadata_authority, + fetcher=metadata_fetcher, + format="json", + metadata=b'{"foo": "bar"}', +) +origin_metadata2 = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin["url"], + discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority["type"], - "url": metadata_authority["url"], - }, - "fetcher": { - "name": metadata_fetcher["name"], - "version": metadata_fetcher["version"], - }, - "format": "yaml", - "metadata": b"foo: bar", -} -origin_metadata3 = { - "origin_url": origin["url"], - "discovery_date": datetime.datetime( + authority=metadata_authority, + fetcher=metadata_fetcher, + format="yaml", + metadata=b"foo: bar", +) +origin_metadata3 = RawExtrinsicMetadata( + type=MetadataTargetType.ORIGIN, + id=origin["url"], + discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - "authority": { - "type": metadata_authority2["type"], - "url": metadata_authority2["url"], - }, - "fetcher": { - "name": metadata_fetcher2["name"], - "version": metadata_fetcher2["version"], - }, - "format": "yaml", - "metadata": b"foo: bar", -} + authority=metadata_authority2, + fetcher=metadata_fetcher2, + format="yaml", + metadata=b"foo: bar", +) person = { "name": b"John Doe", diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -17,6 +17,7 @@ from datetime import timedelta from unittest.mock import Mock +import attr import psycopg2 import pytest @@ -26,6 +27,7 @@ from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes +from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, @@ -35,6 +37,7 @@ Release, Revision, Snapshot, + MetadataTargetType, ) from swh.model.hypothesis_strategies import objects from swh.model.hashutil import hash_to_hex @@ -3221,50 +3224,51 @@ def test_metadata_fetcher_add_get(self, swh_storage): actual_fetcher = swh_storage.metadata_fetcher_get( - data.metadata_fetcher["name"], data.metadata_fetcher["version"] + data.metadata_fetcher.name, data.metadata_fetcher.version ) assert actual_fetcher is None # does not exist - swh_storage.metadata_fetcher_add(**data.metadata_fetcher) + swh_storage.metadata_fetcher_add([data.metadata_fetcher]) res = swh_storage.metadata_fetcher_get( - data.metadata_fetcher["name"], data.metadata_fetcher["version"] + data.metadata_fetcher.name, data.metadata_fetcher.version ) - assert res is not data.metadata_fetcher assert res == data.metadata_fetcher def test_metadata_authority_add_get(self, swh_storage): actual_authority = swh_storage.metadata_authority_get( - data.metadata_authority["type"], data.metadata_authority["url"] + data.metadata_authority.type, data.metadata_authority.url ) assert actual_authority is None # does not exist - swh_storage.metadata_authority_add(**data.metadata_authority) + swh_storage.metadata_authority_add([data.metadata_authority]) res = swh_storage.metadata_authority_get( - data.metadata_authority["type"], data.metadata_authority["url"] + data.metadata_authority.type, data.metadata_authority.url ) - assert res is not data.metadata_authority assert res == data.metadata_authority def test_content_metadata_add(self, swh_storage): content = data.cont fetcher = data.metadata_fetcher authority = data.metadata_authority - content_swhid = f"swh:1:cnt:{content['sha1_git']}" + content_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(content["sha1_git"]) + ) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.content_metadata_add(**data.content_metadata) - swh_storage.content_metadata_add(**data.content_metadata2) + swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) - result = swh_storage.content_metadata_get(content_swhid, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority + ) assert result["next_page_token"] is None assert [data.content_metadata, data.content_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) def test_content_metadata_add_duplicate(self, swh_storage): @@ -3272,81 +3276,81 @@ content = data.cont fetcher = data.metadata_fetcher authority = data.metadata_authority - content_swhid = f"swh:1:cnt:{content['sha1_git']}" + content_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(content["sha1_git"]) + ) - new_content_metadata2 = { - **data.content_metadata2, - "format": "new-format", - "metadata": b"new-metadata", - } + new_content_metadata2 = attr.evolve( + data.content_metadata2, format="new-format", metadata=b"new-metadata", + ) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.content_metadata_add(**data.content_metadata) - swh_storage.content_metadata_add(**data.content_metadata2) - swh_storage.content_metadata_add(**new_content_metadata2) + swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) + swh_storage.object_metadata_add([new_content_metadata2]) - result = swh_storage.content_metadata_get(content_swhid, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority + ) assert result["next_page_token"] is None expected_results1 = (data.content_metadata, new_content_metadata2) expected_results2 = (data.content_metadata, data.content_metadata2) - assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( expected_results1, # cassandra expected_results2, # postgresql ) - def test_content_metadata_add_dict(self, swh_storage): - fetcher = data.metadata_fetcher - authority = data.metadata_authority - - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) - - kwargs = data.content_metadata.copy() - kwargs["metadata"] = {"foo": "bar"} - - with pytest.raises(StorageArgumentException): - swh_storage.content_metadata_add(**kwargs) - def test_content_metadata_get(self, swh_storage): authority = data.metadata_authority fetcher = data.metadata_fetcher authority2 = data.metadata_authority2 fetcher2 = data.metadata_fetcher2 - content1_swhid = f"swh:1:cnt:{data.cont['sha1_git']}" - content2_swhid = f"swh:1:cnt:{data.cont2['sha1_git']}" + content1_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(data.cont["sha1_git"]) + ) + content2_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(data.cont2["sha1_git"]) + ) content1_metadata1 = data.content_metadata content1_metadata2 = data.content_metadata2 content1_metadata3 = data.content_metadata3 - content2_metadata = {**data.content_metadata2, "id": content2_swhid} + content2_metadata = attr.evolve(data.content_metadata2, id=content2_swhid) - swh_storage.metadata_authority_add(**authority) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority2) - swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add([authority, authority2]) + swh_storage.metadata_fetcher_add([fetcher, fetcher2]) - swh_storage.content_metadata_add(**content1_metadata1) - swh_storage.content_metadata_add(**content1_metadata2) - swh_storage.content_metadata_add(**content1_metadata3) - swh_storage.content_metadata_add(**content2_metadata) + swh_storage.object_metadata_add( + [ + content1_metadata1, + content1_metadata2, + content1_metadata3, + content2_metadata, + ] + ) - result = swh_storage.content_metadata_get(content1_swhid, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content1_swhid, authority + ) assert result["next_page_token"] is None assert [content1_metadata1, content1_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.content_metadata_get(content1_swhid, authority2) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content1_swhid, authority2 + ) assert result["next_page_token"] is None assert [content1_metadata3] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.content_metadata_get(content2_swhid, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content2_swhid, authority + ) assert result["next_page_token"] is None assert [content2_metadata] == list(result["results"],) @@ -3354,32 +3358,40 @@ content = data.cont fetcher = data.metadata_fetcher authority = data.metadata_authority - content_swhid = f"swh:1:cnt:{content['sha1_git']}" + content_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(content["sha1_git"]) + ) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.content_metadata_add(**data.content_metadata) - swh_storage.content_metadata_add(**data.content_metadata2) + swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) - result = swh_storage.content_metadata_get( + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority, - after=data.content_metadata["discovery_date"] - timedelta(seconds=1), + after=data.content_metadata.discovery_date - timedelta(seconds=1), ) assert result["next_page_token"] is None assert [data.content_metadata, data.content_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.content_metadata_get( - content_swhid, authority, after=data.content_metadata["discovery_date"] + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, + content_swhid, + authority, + after=data.content_metadata.discovery_date, ) assert result["next_page_token"] is None assert [data.content_metadata2] == result["results"] - result = swh_storage.content_metadata_get( - content_swhid, authority, after=data.content_metadata2["discovery_date"] + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, + content_swhid, + authority, + after=data.content_metadata2.discovery_date, ) assert result["next_page_token"] is None assert [] == result["results"] @@ -3388,22 +3400,31 @@ content = data.cont fetcher = data.metadata_fetcher authority = data.metadata_authority - content_swhid = f"swh:1:cnt:{content['sha1_git']}" + content_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(content["sha1_git"]) + ) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.content_metadata_add(**data.content_metadata) - swh_storage.content_metadata_add(**data.content_metadata2) + swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2]) - swh_storage.content_metadata_get(content_swhid, authority) + swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority + ) - result = swh_storage.content_metadata_get(content_swhid, authority, limit=1) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + ) assert result["next_page_token"] is not None assert [data.content_metadata] == result["results"] - result = swh_storage.content_metadata_get( - content_swhid, authority, limit=1, page_token=result["next_page_token"] + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, + content_swhid, + authority, + limit=1, + page_token=result["next_page_token"], ) assert result["next_page_token"] is None assert [data.content_metadata2] == result["results"] @@ -3413,27 +3434,33 @@ fetcher1 = data.metadata_fetcher fetcher2 = data.metadata_fetcher2 authority = data.metadata_authority - content_swhid = f"swh:1:cnt:{content['sha1_git']}" + content_swhid = SWHID( + object_type="content", object_id=hash_to_bytes(content["sha1_git"]) + ) - swh_storage.metadata_fetcher_add(**fetcher1) - swh_storage.metadata_fetcher_add(**fetcher2) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) + swh_storage.metadata_authority_add([authority]) - content_metadata2 = { - **data.content_metadata2, - "discovery_date": data.content_metadata2["discovery_date"], - "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, - } + content_metadata2 = attr.evolve( + data.content_metadata2, + discovery_date=data.content_metadata2.discovery_date, + fetcher=fetcher2, + ) - swh_storage.content_metadata_add(**data.content_metadata) - swh_storage.content_metadata_add(**content_metadata2) + swh_storage.object_metadata_add([data.content_metadata, content_metadata2]) - result = swh_storage.content_metadata_get(content_swhid, authority, limit=1) + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + ) assert result["next_page_token"] is not None assert [data.content_metadata] == result["results"] - result = swh_storage.content_metadata_get( - content_swhid, authority, limit=1, page_token=result["next_page_token"] + result = swh_storage.object_metadata_get( + MetadataTargetType.CONTENT, + content_swhid, + authority, + limit=1, + page_token=result["next_page_token"], ) assert result["next_page_token"] is None assert [content_metadata2] == result["results"] @@ -3444,16 +3471,17 @@ authority = data.metadata_authority assert swh_storage.origin_add([origin]) == {"origin:add": 1} - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.origin_metadata_add(**data.origin_metadata) - swh_storage.origin_metadata_add(**data.origin_metadata2) + swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) - result = swh_storage.origin_metadata_get(origin["url"], authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority + ) assert result["next_page_token"] is None assert [data.origin_metadata, data.origin_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date) ) def test_origin_metadata_add_duplicate(self, swh_storage): @@ -3463,46 +3491,30 @@ authority = data.metadata_authority assert swh_storage.origin_add([origin]) == {"origin:add": 1} - new_origin_metadata2 = { - **data.origin_metadata2, - "format": "new-format", - "metadata": b"new-metadata", - } + new_origin_metadata2 = attr.evolve( + data.origin_metadata2, format="new-format", metadata=b"new-metadata", + ) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.origin_metadata_add(**data.origin_metadata) - swh_storage.origin_metadata_add(**data.origin_metadata2) - swh_storage.origin_metadata_add(**new_origin_metadata2) + swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) + swh_storage.object_metadata_add([new_origin_metadata2]) - result = swh_storage.origin_metadata_get(origin["url"], authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority + ) assert result["next_page_token"] is None # which of the two behavior happens is backend-specific. expected_results1 = (data.origin_metadata, new_origin_metadata2) expected_results2 = (data.origin_metadata, data.origin_metadata2) - assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in ( + assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in ( expected_results1, # cassandra expected_results2, # postgresql ) - def test_origin_metadata_add_dict(self, swh_storage): - origin = data.origin - fetcher = data.metadata_fetcher - authority = data.metadata_authority - assert swh_storage.origin_add([origin]) == {"origin:add": 1} - - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) - - kwargs = data.origin_metadata.copy() - kwargs["metadata"] = {"foo": "bar"} - - with pytest.raises(StorageArgumentException): - swh_storage.origin_metadata_add(**kwargs) - def test_origin_metadata_get(self, swh_storage): authority = data.metadata_authority fetcher = data.metadata_fetcher @@ -3515,31 +3527,34 @@ origin1_metadata1 = data.origin_metadata origin1_metadata2 = data.origin_metadata2 origin1_metadata3 = data.origin_metadata3 - origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2} + origin2_metadata = attr.evolve(data.origin_metadata2, id=origin_url2) - swh_storage.metadata_authority_add(**authority) - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority2) - swh_storage.metadata_fetcher_add(**fetcher2) + swh_storage.metadata_authority_add([authority, authority2]) + swh_storage.metadata_fetcher_add([fetcher, fetcher2]) - swh_storage.origin_metadata_add(**origin1_metadata1) - swh_storage.origin_metadata_add(**origin1_metadata2) - swh_storage.origin_metadata_add(**origin1_metadata3) - swh_storage.origin_metadata_add(**origin2_metadata) + swh_storage.object_metadata_add( + [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] + ) - result = swh_storage.origin_metadata_get(origin_url1, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin_url1, authority + ) assert result["next_page_token"] is None assert [origin1_metadata1, origin1_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.origin_metadata_get(origin_url1, authority2) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin_url1, authority2 + ) assert result["next_page_token"] is None assert [origin1_metadata3] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.origin_metadata_get(origin_url2, authority) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin_url2, authority + ) assert result["next_page_token"] is None assert [origin2_metadata] == list(result["results"],) @@ -3549,30 +3564,36 @@ authority = data.metadata_authority assert swh_storage.origin_add([origin]) == {"origin:add": 1} - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.origin_metadata_add(**data.origin_metadata) - swh_storage.origin_metadata_add(**data.origin_metadata2) + swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) - result = swh_storage.origin_metadata_get( + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority, - after=data.origin_metadata["discovery_date"] - timedelta(seconds=1), + after=data.origin_metadata.discovery_date - timedelta(seconds=1), ) assert result["next_page_token"] is None assert [data.origin_metadata, data.origin_metadata2] == list( - sorted(result["results"], key=lambda x: x["discovery_date"],) + sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.origin_metadata_get( - origin["url"], authority, after=data.origin_metadata["discovery_date"] + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, + origin["url"], + authority, + after=data.origin_metadata.discovery_date, ) assert result["next_page_token"] is None assert [data.origin_metadata2] == result["results"] - result = swh_storage.origin_metadata_get( - origin["url"], authority, after=data.origin_metadata2["discovery_date"] + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, + origin["url"], + authority, + after=data.origin_metadata2.discovery_date, ) assert result["next_page_token"] is None assert [] == result["results"] @@ -3583,20 +3604,27 @@ authority = data.metadata_authority assert swh_storage.origin_add([origin]) == {"origin:add": 1} - swh_storage.metadata_fetcher_add(**fetcher) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher]) + swh_storage.metadata_authority_add([authority]) - swh_storage.origin_metadata_add(**data.origin_metadata) - swh_storage.origin_metadata_add(**data.origin_metadata2) + swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2]) - swh_storage.origin_metadata_get(origin["url"], authority) + swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority + ) - result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority, limit=1 + ) assert result["next_page_token"] is not None assert [data.origin_metadata] == result["results"] - result = swh_storage.origin_metadata_get( - origin["url"], authority, limit=1, page_token=result["next_page_token"] + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, + origin["url"], + authority, + limit=1, + page_token=result["next_page_token"], ) assert result["next_page_token"] is None assert [data.origin_metadata2] == result["results"] @@ -3608,25 +3636,30 @@ authority = data.metadata_authority assert swh_storage.origin_add([origin]) == {"origin:add": 1} - swh_storage.metadata_fetcher_add(**fetcher1) - swh_storage.metadata_fetcher_add(**fetcher2) - swh_storage.metadata_authority_add(**authority) + swh_storage.metadata_fetcher_add([fetcher1]) + swh_storage.metadata_fetcher_add([fetcher2]) + swh_storage.metadata_authority_add([authority]) - origin_metadata2 = { - **data.origin_metadata2, - "discovery_date": data.origin_metadata2["discovery_date"], - "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],}, - } + origin_metadata2 = attr.evolve( + data.origin_metadata2, + discovery_date=data.origin_metadata2.discovery_date, + fetcher=fetcher2, + ) - swh_storage.origin_metadata_add(**data.origin_metadata) - swh_storage.origin_metadata_add(**origin_metadata2) + swh_storage.object_metadata_add([data.origin_metadata, origin_metadata2]) - result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1) + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, origin["url"], authority, limit=1 + ) assert result["next_page_token"] is not None assert [data.origin_metadata] == result["results"] - result = swh_storage.origin_metadata_get( - origin["url"], authority, limit=1, page_token=result["next_page_token"] + result = swh_storage.object_metadata_get( + MetadataTargetType.ORIGIN, + origin["url"], + authority, + limit=1, + page_token=result["next_page_token"], ) assert result["next_page_token"] is None assert [origin_metadata2] == result["results"]