Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123016
D3456.id12343.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
107 KB
Subscribers
None
D3456.id12343.diff
View Options
diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py
--- a/swh/storage/api/serializers.py
+++ b/swh/storage/api/serializers.py
@@ -7,6 +7,7 @@
from typing import Callable, Dict, List, Tuple
+from swh.model.identifiers import SWHID, parse_swhid
import swh.model.model as model
@@ -16,11 +17,23 @@
return d
+def _encode_model_enum(obj):
+ return {
+ "value": obj.value,
+ "__type__": type(obj).__name__,
+ }
+
+
ENCODERS: List[Tuple[type, str, Callable]] = [
(model.BaseModel, "model", _encode_model_object),
+ (SWHID, "swhid", str),
+ (model.MetadataTargetType, "model_enum", _encode_model_enum),
+ (model.MetadataAuthorityType, "model_enum", _encode_model_enum),
]
DECODERS: Dict[str, Callable] = {
- "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d)
+ "swhid": parse_swhid,
+ "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d),
+ "model_enum": lambda d: getattr(model, d.pop("__type__"))(d["value"]),
}
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -18,7 +18,6 @@
Optional,
Tuple,
TypeVar,
- Union,
)
from cassandra import CoordinationFailure
@@ -46,7 +45,6 @@
from .common import Row, TOKEN_BEGIN, TOKEN_END, hash_url
from .schema import CREATE_TABLES_QUERIES, HASH_ALGORITHMS
-from .. import extrinsic_metadata
logger = logging.getLogger(__name__)
@@ -899,36 +897,12 @@
f"VALUES ({', '.join('?' for _ in _object_metadata_keys)})"
)
def object_metadata_add(
- self,
- object_type: str,
- id: str,
- authority_type,
- authority_url,
- discovery_date,
- fetcher_name,
- fetcher_version,
- format,
- metadata,
- context: Dict[str, Union[str, bytes, int]],
- *,
- statement,
+ self, statement, **kwargs,
):
- params = [
- object_type,
- id,
- authority_type,
- authority_url,
- discovery_date,
- fetcher_name,
- fetcher_version,
- format,
- metadata,
- ]
-
- params.extend(
- context.get(key) for key in extrinsic_metadata.CONTEXT_KEYS[object_type]
- )
-
+ assert set(kwargs) == set(
+ self._object_metadata_keys
+ ), f"Bad kwargs: {set(kwargs)}"
+ params = [kwargs[key] for key in self._object_metadata_keys]
return self._execute_with_retries(statement, params,)
@_prepared_statement(
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -14,6 +14,8 @@
from deprecated import deprecated
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import parse_swhid, SWHID
+from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.model.model import (
Revision,
Release,
@@ -25,14 +27,17 @@
OriginVisitStatus,
Snapshot,
Origin,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
-from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.storage.objstorage import ObjStorage
from swh.storage.writer import JournalWriter
-from swh.storage.utils import now
+from swh.storage.utils import map_optional, now
from ..exc import StorageArgumentException, HashCollision
-from ..extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS
from .common import TOKEN_BEGIN, TOKEN_END
from .converters import (
revision_to_db,
@@ -1009,128 +1014,65 @@
def refresh_stat_counters(self):
pass
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- self._object_metadata_add(
- "content",
- id,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
-
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit,
- )
-
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- if not isinstance(origin_url, str):
- raise StorageArgumentException(
- "origin_url must be str, not %r" % (origin_url,)
- )
-
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata]) -> None:
+ for metadata_entry in metadata:
+ if not self._cql_runner.metadata_authority_get(
+ metadata_entry.authority.type.value, metadata_entry.authority.url
+ ):
+ raise StorageArgumentException(
+ f"Unknown authority {metadata_entry.authority}"
+ )
+ if not self._cql_runner.metadata_fetcher_get(
+ metadata_entry.fetcher.name, metadata_entry.fetcher.version
+ ):
+ raise StorageArgumentException(
+ f"Unknown fetcher {metadata_entry.fetcher}"
+ )
- self._object_metadata_add(
- "origin",
- origin_url,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
+ try:
+ self._cql_runner.object_metadata_add(
+ type=metadata_entry.type.value,
+ id=str(metadata_entry.id),
+ authority_type=metadata_entry.authority.type.value,
+ authority_url=metadata_entry.authority.url,
+ discovery_date=metadata_entry.discovery_date,
+ fetcher_name=metadata_entry.fetcher.name,
+ fetcher_version=metadata_entry.fetcher.version,
+ format=metadata_entry.format,
+ metadata=metadata_entry.metadata,
+ origin=metadata_entry.origin,
+ visit=metadata_entry.visit,
+ snapshot=map_optional(str, metadata_entry.snapshot),
+ release=map_optional(str, metadata_entry.release),
+ revision=map_optional(str, metadata_entry.revision),
+ path=metadata_entry.path,
+ directory=map_optional(str, metadata_entry.directory),
+ )
+ except TypeError as e:
+ raise StorageArgumentException(*e.args)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- if not isinstance(origin_url, str):
- raise TypeError("origin_url must be str, not %r" % (origin_url,))
-
- res = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit
- )
- for result in res["results"]:
- result["origin_url"] = result.pop("id")
-
- return res
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- context: Dict[str, Union[str, bytes, int]],
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
-
- if not self._cql_runner.metadata_authority_get(**authority):
- raise StorageArgumentException(f"Unknown authority {authority}")
- if not self._cql_runner.metadata_fetcher_get(**fetcher):
- raise StorageArgumentException(f"Unknown fetcher {fetcher}")
-
- try:
- self._cql_runner.object_metadata_add(
- object_type,
- id,
- authority["type"],
- authority["url"],
- discovery_date,
- fetcher["name"],
- fetcher["version"],
- format,
- metadata,
- context,
- )
- except TypeError as e:
- raise StorageArgumentException(*e.args)
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
+ else:
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
if page_token is not None:
(after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads(
page_token
@@ -1140,20 +1082,20 @@
"page_token is inconsistent with the value of 'after'."
)
entries = self._cql_runner.object_metadata_get_after_date_and_fetcher(
- id,
- authority["type"],
- authority["url"],
+ str(id),
+ authority.type.value,
+ authority.url,
after_date,
after_fetcher_name,
after_fetcher_url,
)
elif after is not None:
entries = self._cql_runner.object_metadata_get_after_date(
- id, authority["type"], authority["url"], after
+ str(id), authority.type.value, authority.url, after
)
else:
entries = self._cql_runner.object_metadata_get(
- id, authority["type"], authority["url"]
+ str(id), authority.type.value, authority.url
)
if limit:
@@ -1163,28 +1105,29 @@
for entry in entries:
discovery_date = entry.discovery_date.replace(tzinfo=datetime.timezone.utc)
- result = {
- "id": entry.id,
- "authority": {
- "type": entry.authority_type,
- "url": entry.authority_url,
- },
- "fetcher": {
- "name": entry.fetcher_name,
- "version": entry.fetcher_version,
- },
- "discovery_date": discovery_date,
- "format": entry.format,
- "metadata": entry.metadata,
- }
-
- if CONTEXT_KEYS[object_type]:
- context = {}
- for key in CONTEXT_KEYS[object_type]:
- value = getattr(entry, key)
- if value is not None:
- context[key] = value
- result["context"] = context
+ assert str(id) == entry.id
+
+ result = RawExtrinsicMetadata(
+ type=MetadataTargetType(entry.type),
+ id=id,
+ authority=MetadataAuthority(
+ type=MetadataAuthorityType(entry.authority_type),
+ url=entry.authority_url,
+ ),
+ fetcher=MetadataFetcher(
+ name=entry.fetcher_name, version=entry.fetcher_version,
+ ),
+ discovery_date=discovery_date,
+ format=entry.format,
+ metadata=entry.metadata,
+ origin=entry.origin,
+ visit=entry.visit,
+ snapshot=map_optional(parse_swhid, entry.snapshot),
+ release=map_optional(parse_swhid, entry.release),
+ revision=map_optional(parse_swhid, entry.revision),
+ path=entry.path,
+ directory=map_optional(parse_swhid, entry.directory),
+ )
results.append(result)
@@ -1194,9 +1137,9 @@
last_result = results[-1]
next_page_token: Optional[bytes] = msgpack_dumps(
(
- last_result["discovery_date"],
- last_result["fetcher"]["name"],
- last_result["fetcher"]["version"],
+ last_result.discovery_date,
+ last_result.fetcher.name,
+ last_result.fetcher.version,
)
)
else:
@@ -1207,35 +1150,45 @@
"results": results,
}
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- self._cql_runner.metadata_fetcher_add(name, version, json.dumps(metadata))
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher]) -> None:
+ for fetcher in fetchers:
+ self._cql_runner.metadata_fetcher_add(
+ fetcher.name,
+ fetcher.version,
+ json.dumps(map_optional(dict, fetcher.metadata)),
+ )
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
fetcher = self._cql_runner.metadata_fetcher_get(name, version)
if fetcher:
- return {
- "name": fetcher.name,
- "version": fetcher.version,
- "metadata": json.loads(fetcher.metadata),
- }
+ return MetadataFetcher(
+ name=fetcher.name,
+ version=fetcher.version,
+ metadata=json.loads(fetcher.metadata),
+ )
else:
return None
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- self._cql_runner.metadata_authority_add(url, type, json.dumps(metadata))
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ for authority in authorities:
+ self._cql_runner.metadata_authority_add(
+ authority.url,
+ authority.type.value,
+ json.dumps(map_optional(dict, authority.metadata)),
+ )
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
- authority = self._cql_runner.metadata_authority_get(type, url)
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
+ authority = self._cql_runner.metadata_authority_get(type.value, url)
if authority:
- return {
- "type": authority.type,
- "url": authority.url,
- "metadata": json.loads(authority.metadata),
- }
+ return MetadataAuthority(
+ type=MetadataAuthorityType(authority.type),
+ url=authority.url,
+ metadata=json.loads(authority.metadata),
+ )
else:
return None
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -6,7 +6,7 @@
import datetime
import random
import select
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
from swh.core.db import BaseDb
from swh.core.db.db_utils import stored_procedure, jsonize as _jsonize
@@ -1119,7 +1119,8 @@
"""
object_metadata_get_cols = [
- "id",
+ "object_metadata.id",
+ "object_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1128,16 +1129,14 @@
"metadata_fetcher.version",
*_object_metadata_context_cols,
"format",
- "metadata",
+ "object_metadata.metadata",
]
"""List of columns of the object_metadata, metadata_authority,
and metadata_fetcher tables, used when reading object metadata."""
_object_metadata_select_query = f"""
SELECT
- object_metadata.id AS id,
- {', '.join(object_metadata_get_cols[1:-1])},
- object_metadata.metadata AS metadata
+ {', '.join(object_metadata_get_cols)}
FROM object_metadata
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
@@ -1149,12 +1148,18 @@
self,
object_type: str,
id: str,
- context: Dict[str, Union[str, bytes, int]],
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
+ origin: Optional[str],
+ visit: Optional[int],
+ snapshot: Optional[str],
+ release: Optional[str],
+ revision: Optional[str],
+ path: Optional[bytes],
+ directory: Optional[str],
cur,
):
query = self._object_metadata_insert_query
@@ -1166,9 +1171,14 @@
discovery_date=discovery_date,
format=format,
metadata=metadata,
+ origin=origin,
+ visit=visit,
+ snapshot=snapshot,
+ release=release,
+ revision=revision,
+ path=path,
+ directory=directory,
)
- for col in self._object_metadata_context_cols:
- args[col] = context.get(col)
params = [args[col] for col in self._object_metadata_insert_cols]
diff --git a/swh/storage/extrinsic_metadata.py b/swh/storage/extrinsic_metadata.py
deleted file mode 100644
--- a/swh/storage/extrinsic_metadata.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2020 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-from typing import Any, cast, Dict
-
-from swh.model.identifiers import PersistentId, parse_persistent_identifier
-
-from .exc import StorageArgumentException
-
-CONTEXT_KEYS: Dict[str, Dict[str, type]] = {}
-CONTEXT_KEYS["origin"] = {}
-CONTEXT_KEYS["snapshot"] = {"origin": str, "visit": int}
-CONTEXT_KEYS["release"] = {**CONTEXT_KEYS["snapshot"], "snapshot": PersistentId}
-CONTEXT_KEYS["revision"] = {**CONTEXT_KEYS["release"], "release": PersistentId}
-CONTEXT_KEYS["directory"] = {
- **CONTEXT_KEYS["revision"],
- "revision": PersistentId,
- "path": bytes,
-}
-CONTEXT_KEYS["content"] = {**CONTEXT_KEYS["directory"], "directory": PersistentId}
-
-
-def check_extrinsic_metadata_context(object_type: str, context: Dict[str, Any]):
- key_types = CONTEXT_KEYS[object_type]
-
- extra_keys = set(context) - set(key_types)
- if extra_keys:
- raise StorageArgumentException(f"Unknown context keys: {', '.join(extra_keys)}")
-
- for (key, value) in context.items():
- expected_type = key_types[key]
- expected_type_str = str(expected_type) # for display
-
- # If an SWHID is expected and a string is given, parse it
- if expected_type is PersistentId and isinstance(value, str):
- value = parse_persistent_identifier(value)
- expected_type_str = "PersistentId or str"
-
- # Check the type of the context value
- if not isinstance(value, expected_type):
- raise StorageArgumentException(
- f"Context key {key} must have type {expected_type_str}, "
- f"but is {value!r}"
- )
-
- # If it is an SWHID, check it is also a core SWHID.
- if expected_type is PersistentId:
- value = cast(PersistentId, value)
- if value.metadata != {}:
- raise StorageArgumentException(
- f"Context key {key} must be a core SWHID, "
- f"but it has qualifiers {', '.join(value.metadata)}."
- )
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -33,6 +33,7 @@
from deprecated import deprecated
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import SWHID
from swh.model.model import (
BaseContent,
Content,
@@ -45,6 +46,11 @@
OriginVisitStatus,
Origin,
SHA1_SIZE,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
@@ -52,7 +58,6 @@
from .converters import origin_url_to_sha1
from .exc import StorageArgumentException, HashCollision
-from .extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS
from .utils import get_partition_bounds_bytes
from .writer import JournalWriter
@@ -138,21 +143,33 @@
self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {}
self._persons = {}
- # {origin_url: {authority: [metadata]}}
+ # {object_type: {id: {authority: [metadata]}}}
self._object_metadata: Dict[
- str,
+ MetadataTargetType,
Dict[
- Hashable,
- SortedList[Tuple[datetime.datetime, FetcherKey], Dict[str, Any]],
+ Union[str, SWHID],
+ Dict[
+ Hashable,
+ SortedList[
+ Tuple[datetime.datetime, FetcherKey], RawExtrinsicMetadata
+ ],
+ ],
],
] = defaultdict(
lambda: defaultdict(
- lambda: SortedList(key=lambda x: (x["discovery_date"], x["fetcher"]))
+ lambda: defaultdict(
+ lambda: SortedList(
+ key=lambda x: (
+ x.discovery_date,
+ self._metadata_fetcher_key(x.fetcher),
+ )
+ )
+ )
)
) # noqa
- self._metadata_fetchers: Dict[FetcherKey, Dict[str, Any]] = {}
- self._metadata_authorities: Dict[Hashable, Dict[str, Any]] = {}
+ self._metadata_fetchers: Dict[FetcherKey, MetadataFetcher] = {}
+ self._metadata_authorities: Dict[Hashable, MetadataAuthority] = {}
self._objects = defaultdict(list)
self._sorted_sha1s = SortedList[bytes, bytes]()
@@ -1019,144 +1036,58 @@
def refresh_stat_counters(self):
pass
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- self._object_metadata_add(
- "content",
- id,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
-
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit
- )
-
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- if not isinstance(origin_url, str):
- raise StorageArgumentException(
- "origin_url must be str, not %r" % (origin_url,)
- )
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ for metadata_entry in metadata:
+ authority_key = self._metadata_authority_key(metadata_entry.authority)
+ if authority_key not in self._metadata_authorities:
+ raise StorageArgumentException(
+ f"Unknown authority {metadata_entry.authority}"
+ )
+ fetcher_key = self._metadata_fetcher_key(metadata_entry.fetcher)
+ if fetcher_key not in self._metadata_fetchers:
+ raise StorageArgumentException(
+ f"Unknown fetcher {metadata_entry.fetcher}"
+ )
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
+ object_metadata_list = self._object_metadata[metadata_entry.type][
+ metadata_entry.id
+ ][authority_key]
- self._object_metadata_add(
- "origin",
- origin_url,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
+ for existing_object_metadata in object_metadata_list:
+ if (
+ self._metadata_fetcher_key(existing_object_metadata.fetcher)
+ == fetcher_key
+ and existing_object_metadata.discovery_date
+ == metadata_entry.discovery_date
+ ):
+ # Duplicate of an existing one; ignore it.
+ break
+ else:
+ object_metadata_list.add(metadata_entry)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- if not isinstance(origin_url, str):
- raise TypeError("origin_url must be str, not %r" % (origin_url,))
-
- res = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit
- )
- res["results"] = copy.deepcopy(res["results"])
- for result in res["results"]:
- result["origin_url"] = result.pop("id")
-
- return res
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- context: Dict[str, Union[str, bytes, int]],
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
- if not isinstance(metadata, bytes):
- raise StorageArgumentException(
- "metadata must be bytes, not %r" % (metadata,)
- )
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
authority_key = self._metadata_authority_key(authority)
- if authority_key not in self._metadata_authorities:
- raise StorageArgumentException(f"Unknown authority {authority}")
- fetcher_key = self._metadata_fetcher_key(fetcher)
- if fetcher_key not in self._metadata_fetchers:
- raise StorageArgumentException(f"Unknown fetcher {fetcher}")
-
- object_metadata_list = self._object_metadata[id][authority_key]
-
- object_metadata: Dict[str, Any] = {
- "id": id,
- "discovery_date": discovery_date,
- "authority": authority_key,
- "fetcher": fetcher_key,
- "format": format,
- "metadata": metadata,
- }
-
- if CONTEXT_KEYS[object_type]:
- object_metadata["context"] = context
- for existing_object_metadata in object_metadata_list:
- if (
- existing_object_metadata["fetcher"] == fetcher_key
- and existing_object_metadata["discovery_date"] == discovery_date
- ):
- # Duplicate of an existing one; replace it.
- existing_object_metadata.update(object_metadata)
- break
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
else:
- object_metadata_list.add(object_metadata)
-
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- authority_key = self._metadata_authority_key(authority)
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
if page_token is not None:
(after_time, after_fetcher) = msgpack_loads(page_token)
@@ -1165,33 +1096,36 @@
raise StorageArgumentException(
"page_token is inconsistent with the value of 'after'."
)
- entries = self._object_metadata[id][authority_key].iter_after(
+ entries = self._object_metadata[object_type][id][authority_key].iter_after(
(after_time, after_fetcher)
)
elif after is not None:
- entries = self._object_metadata[id][authority_key].iter_from((after,))
- entries = (entry for entry in entries if entry["discovery_date"] > after)
+ entries = self._object_metadata[object_type][id][authority_key].iter_from(
+ (after,)
+ )
+ entries = (entry for entry in entries if entry.discovery_date > after)
else:
- entries = iter(self._object_metadata[id][authority_key])
+ entries = iter(self._object_metadata[object_type][id][authority_key])
if limit:
entries = itertools.islice(entries, 0, limit + 1)
results = []
for entry in entries:
- authority = self._metadata_authorities[entry["authority"]]
- fetcher = self._metadata_fetchers[entry["fetcher"]]
+ entry_authority = self._metadata_authorities[
+ self._metadata_authority_key(entry.authority)
+ ]
+ entry_fetcher = self._metadata_fetchers[
+ self._metadata_fetcher_key(entry.fetcher)
+ ]
if after:
- assert entry["discovery_date"] > after
+ assert entry.discovery_date > after
results.append(
- {
- **entry,
- "authority": {"type": authority["type"], "url": authority["url"],},
- "fetcher": {
- "name": fetcher["name"],
- "version": fetcher["version"],
- },
- }
+ attr.evolve(
+ entry,
+ authority=attr.evolve(entry_authority, metadata=None),
+ fetcher=attr.evolve(entry_fetcher, metadata=None),
+ )
)
if len(results) > limit:
@@ -1200,8 +1134,8 @@
last_result = results[-1]
next_page_token: Optional[bytes] = msgpack_dumps(
(
- last_result["discovery_date"],
- self._metadata_fetcher_key(last_result["fetcher"]),
+ last_result.discovery_date,
+ self._metadata_fetcher_key(last_result.fetcher),
)
)
else:
@@ -1212,37 +1146,38 @@
"results": results,
}
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- fetcher = {
- "name": name,
- "version": version,
- "metadata": metadata,
- }
- key = self._metadata_fetcher_key(fetcher)
- if key not in self._metadata_fetchers:
- self._metadata_fetchers[key] = fetcher
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher]) -> None:
+ for fetcher in fetchers:
+ if fetcher.metadata is None:
+ raise StorageArgumentException(
+ "MetadataFetcher.metadata may not be None in metadata_fetcher_add."
+ )
+ key = self._metadata_fetcher_key(fetcher)
+ if key not in self._metadata_fetchers:
+ self._metadata_fetchers[key] = fetcher
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
return self._metadata_fetchers.get(
- self._metadata_fetcher_key({"name": name, "version": version})
+ self._metadata_fetcher_key(MetadataFetcher(name=name, version=version))
)
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- authority = {
- "type": type,
- "url": url,
- "metadata": metadata,
- }
- key = self._metadata_authority_key(authority)
- self._metadata_authorities[key] = authority
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ for authority in authorities:
+ if authority.metadata is None:
+ raise StorageArgumentException(
+ "MetadataAuthority.metadata may not be None in "
+ "metadata_authority_add."
+ )
+ key = self._metadata_authority_key(authority)
+ self._metadata_authorities[key] = authority
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
return self._metadata_authorities.get(
- self._metadata_authority_key({"type": type, "url": url})
+ self._metadata_authority_key(MetadataAuthority(type=type, url=url))
)
def _get_origin_url(self, origin):
@@ -1267,12 +1202,12 @@
return tuple((key, content.get(key)) for key in sorted(DEFAULT_ALGORITHMS))
@staticmethod
- def _metadata_fetcher_key(fetcher: Dict) -> FetcherKey:
- return (fetcher["name"], fetcher["version"])
+ def _metadata_fetcher_key(fetcher: MetadataFetcher) -> FetcherKey:
+ return (fetcher.name, fetcher.version)
@staticmethod
- def _metadata_authority_key(authority: Dict) -> Hashable:
- return (authority["type"], authority["url"])
+ def _metadata_authority_key(authority: MetadataAuthority) -> Hashable:
+ return (authority.type, authority.url)
def diff_directories(self, from_dir, to_dir, track_renaming=False):
raise NotImplementedError("InMemoryStorage.diff_directories")
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -8,6 +8,7 @@
from typing import Any, Dict, Iterable, List, Optional, Union
from swh.core.api import remote_api_endpoint
+from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
@@ -18,6 +19,11 @@
Release,
Snapshot,
SkippedContent,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
@@ -1107,118 +1113,38 @@
"""Recomputes the statistics for `stat_counters`."""
...
- @remote_api_endpoint("content/metadata/add")
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- """Add a content_metadata for the content at discovery_date,
- obtained using the `fetcher` from the `authority`.
-
- The authority and fetcher must be known to the storage before
- using this endpoint.
-
- If there is already content metadata for the same content, authority,
- fetcher, and at the same date; the new one will be either dropped or
- will replace the existing one
- (it is unspecified which one of these two behaviors happens).
-
- Args:
- discovery_date: when the metadata was fetched.
- authority: a dict containing keys `type` and `url`.
- fetcher: a dict containing keys `name` and `version`.
- format: text field indicating the format of the content of the
- metadata: blob of raw metadata
- """
- ...
-
- @remote_api_endpoint("content/metadata/get")
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- """Retrieve list of all content_metadata entries for the id
-
- Args:
- id: the content's SWHID
- authority: a dict containing keys `type` and `url`.
- after: minimum discovery_date for a result to be returned
- page_token: opaque token, used to get the next page of results
- limit: maximum number of results to be returned
-
- Returns:
- dict with keys `next_page_token` and `results`.
- `next_page_token` is an opaque token that is used to get the
- next page of results, or `None` if there are no more results.
- `results` is a list of dicts in the format:
-
- .. code-block: python
-
- {
- 'authority': {'type': ..., 'url': ...},
- 'fetcher': {'name': ..., 'version': ...},
- 'discovery_date': ...,
- 'format': '...',
- 'metadata': b'...',
- 'context': { ... },
- }
-
- """
- ...
-
- @remote_api_endpoint("origin/metadata/add")
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- """Add an origin_metadata for the origin at discovery_date,
- obtained using the `fetcher` from the `authority`.
+ @remote_api_endpoint("object_metadata/add")
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ """Add extrinsic metadata on objects (contents, directories, ...).
The authority and fetcher must be known to the storage before
using this endpoint.
- If there is already origin metadata for the same origin, authority,
+ If there is already metadata for the same object, authority,
fetcher, and at the same date; the new one will be either dropped or
will replace the existing one
(it is unspecified which one of these two behaviors happens).
Args:
- discovery_date: when the metadata was fetched.
- authority: a dict containing keys `type` and `url`.
- fetcher: a dict containing keys `name` and `version`.
- format: text field indicating the format of the content of the
- metadata: blob of raw metadata
+ metadata: iterable of RawExtrinsicMetadata objects to be inserted.
"""
...
- @remote_api_endpoint("origin/metadata/get")
- def origin_metadata_get(
+ @remote_api_endpoint("object_metadata/get")
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- """Retrieve list of all origin_metadata entries for the origin_url
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ """Retrieve list of all object_metadata entries for the id
Args:
- origin_url: the origin's URL
+ object_type: one of the values of swh.model.model.MetadataTargetType
+ id: an URL if object_type is 'origin', else a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
@@ -1228,40 +1154,30 @@
dict with keys `next_page_token` and `results`.
`next_page_token` is an opaque token that is used to get the
next page of results, or `None` if there are no more results.
- `results` is a list of dicts in the format:
-
- .. code-block: python
-
- {
- 'authority': {'type': ..., 'url': ...},
- 'fetcher': {'name': ..., 'version': ...},
- 'discovery_date': ...,
- 'format': '...',
- 'metadata': b'...'
- }
+ `results` is a list of RawExtrinsicMetadata objects:
"""
...
- @remote_api_endpoint("fetcher/add")
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- """Add a new metadata fetcher to the storage.
+ @remote_api_endpoint("metadata_fetcher/add")
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None:
+ """Add new metadata fetchers to the storage.
- `name` and `version` together are a unique identifier of this
+ Their `name` and `version` together are unique identifiers of this
fetcher; and `metadata` is an arbitrary dict of JSONable data
- with information about this fetcher.
+ with information about this fetcher, which must not be `None`
+ (but may be empty).
Args:
- name: the name of the fetcher
- version: version of the fetcher
+ fetchers: iterable of MetadataFetcher to be inserted
"""
...
- @remote_api_endpoint("fetcher/get")
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ @remote_api_endpoint("metadata_fetcher/get")
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
"""Retrieve information about a fetcher
Args:
@@ -1269,27 +1185,30 @@
version: version of the fetcher
Returns:
- dictionary with keys `name`, `version`, and `metadata`; or None
- if the fetcher is not known
+ a MetadataFetcher object (with a non-None metadata field) if it is known,
+ else None.
"""
...
- @remote_api_endpoint("authority/add")
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- """Add a metadata authority
+ @remote_api_endpoint("metadata_authority/add")
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ """Add new metadata authorities to the storage.
+
+ Their `type` and `url` together are unique identifiers of this
+ authority; and `metadata` is an arbitrary dict of JSONable data
+ with information about this authority, which must not be `None`
+ (but may be empty).
Args:
- type: one of "deposit", "forge", or "registry"
- url: unique URI identifying the authority
- metadata: JSON-encodable object
+ authorities: iterable of MetadataAuthority to be inserted
"""
...
- @remote_api_endpoint("authority/get")
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
+ @remote_api_endpoint("metadata_authority/get")
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
"""Retrieve information about an authority
Args:
@@ -1297,8 +1216,8 @@
url: unique URI identifying the authority
Returns:
- dictionary with keys `type`, `url`, and `metadata`; or None
- if the authority is not known
+ a MetadataAuthority object (with a non-None metadata field) if it is known,
+ else None.
"""
...
diff --git a/swh/storage/retry.py b/swh/storage/retry.py
--- a/swh/storage/retry.py
+++ b/swh/storage/retry.py
@@ -6,8 +6,7 @@
import logging
import traceback
-from datetime import datetime
-from typing import Any, Dict, Iterable, Optional
+from typing import Dict, Iterable, Optional
from tenacity import (
retry,
@@ -24,6 +23,9 @@
Snapshot,
Origin,
OriginVisit,
+ MetadataAuthority,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
)
from swh.storage import get_storage
@@ -112,30 +114,16 @@
return self.storage.origin_visit_add(visits)
@swh_retry
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- return self.storage.metadata_fetcher_add(name, version, metadata)
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None:
+ return self.storage.metadata_fetcher_add(fetchers)
@swh_retry
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- return self.storage.metadata_authority_add(type, url, metadata)
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ return self.storage.metadata_authority_add(authorities)
@swh_retry
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- return self.storage.origin_metadata_add(
- origin_url, discovery_date, authority, fetcher, format, metadata
- )
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ return self.storage.object_metadata_add(metadata)
@swh_retry
def directory_add(self, directories: Iterable[Directory]) -> Dict:
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -10,7 +10,15 @@
from collections import defaultdict
from contextlib import contextmanager
from deprecated import deprecated
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import (
+ Any,
+ Counter,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Union,
+)
import attr
import psycopg2
@@ -18,6 +26,7 @@
import psycopg2.errors
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import parse_swhid, SWHID
from swh.model.model import (
Content,
Directory,
@@ -29,6 +38,11 @@
SkippedContent,
Snapshot,
SHA1_SIZE,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
@@ -36,16 +50,12 @@
from swh.storage.utils import now
from . import converters
-from .extrinsic_metadata import (
- check_extrinsic_metadata_context,
- CONTEXT_KEYS,
-)
from .common import db_transaction_generator, db_transaction
from .db import Db
from .exc import StorageArgumentException, StorageDBError, HashCollision
from .algos import diff
from .metrics import timed, send_metric, process_metrics
-from .utils import get_partition_bounds_bytes, extract_collision_hash
+from .utils import get_partition_bounds_bytes, extract_collision_hash, map_optional
from .writer import JournalWriter
@@ -1129,150 +1139,66 @@
for key in keys:
cur.execute("select * from swh_update_counter(%s)", (key,))
- @timed
@db_transaction()
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db=None,
- cur=None,
+ def object_metadata_add(
+ self, metadata: Iterable[RawExtrinsicMetadata], db, cur,
) -> None:
- self._object_metadata_add(
- "content",
- id,
- context,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- db,
- cur,
- )
+ counter = Counter[MetadataTargetType]()
+ for metadata_entry in metadata:
+ authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
+ fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
+
+ db.object_metadata_add(
+ object_type=metadata_entry.type.value,
+ id=str(metadata_entry.id),
+ discovery_date=metadata_entry.discovery_date,
+ authority_id=authority_id,
+ fetcher_id=fetcher_id,
+ format=metadata_entry.format,
+ metadata=metadata_entry.metadata,
+ origin=metadata_entry.origin,
+ visit=metadata_entry.visit,
+ snapshot=map_optional(str, metadata_entry.snapshot),
+ release=map_optional(str, metadata_entry.release),
+ revision=map_optional(str, metadata_entry.revision),
+ path=metadata_entry.path,
+ directory=map_optional(str, metadata_entry.directory),
+ cur=cur,
+ )
+ counter[metadata_entry.type] += 1
- @timed
- @db_transaction()
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- db=None,
- cur=None,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit, db, cur
- )
+ for (object_type, count) in counter.items():
+ send_metric(
+ f"{object_type.value}_metadata:add",
+ count=count,
+ method_name=f"{object_type.value}_metadata_add",
+ )
- @timed
@db_transaction()
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db=None,
- cur=None,
- ) -> None:
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
-
- self._object_metadata_add(
- "origin",
- origin_url,
- context,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- db,
- cur,
- )
-
- @timed
- @db_transaction(statement_timeout=500)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
db=None,
cur=None,
- ) -> Dict[str, Any]:
- result = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit, db, cur
- )
-
- for res in result["results"]:
- res.pop("id")
- res["origin_url"] = origin_url
-
- return result
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db,
- cur,
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
-
- authority_id = self._get_authority_id(authority, db, cur)
- fetcher_id = self._get_fetcher_id(fetcher, db, cur)
- if not isinstance(metadata, bytes):
- raise StorageArgumentException(
- "metadata must be bytes, not %r" % (metadata,)
- )
-
- db.object_metadata_add(
- object_type,
- id,
- context,
- discovery_date,
- authority_id,
- fetcher_id,
- format,
- metadata,
- cur,
- )
-
- send_metric(
- f"{object_type}_metadata:add",
- count=1,
- method_name=f"{object_type}_metadata_add",
- )
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
+ else:
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime],
- page_token: Optional[bytes],
- limit: int,
- db,
- cur,
- ) -> Dict[str, Any]:
if page_token:
(after_time, after_fetcher) = msgpack_loads(page_token)
if after and after_time < after:
@@ -1283,9 +1209,7 @@
after_time = after
after_fetcher = None
- authority_id = db.metadata_authority_get_id(
- authority["type"], authority["url"], cur
- )
+ authority_id = self._get_authority_id(authority, db, cur)
if not authority_id:
return {
"next_page_token": None,
@@ -1293,36 +1217,44 @@
}
rows = db.object_metadata_get(
- object_type, id, authority_id, after_time, after_fetcher, limit + 1, cur
+ object_type,
+ str(id),
+ authority_id,
+ after_time,
+ after_fetcher,
+ limit + 1,
+ cur,
)
rows = [dict(zip(db.object_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
row = row.copy()
row.pop("metadata_fetcher.id")
- context = {}
- for key in CONTEXT_KEYS[object_type]:
- value = row[key]
- if value is not None:
- context[key] = value
-
- result = {
- "id": row["id"],
- "authority": {
- "type": row.pop("metadata_authority.type"),
- "url": row.pop("metadata_authority.url"),
- },
- "fetcher": {
- "name": row.pop("metadata_fetcher.name"),
- "version": row.pop("metadata_fetcher.version"),
- },
- "discovery_date": row["discovery_date"],
- "format": row["format"],
- "metadata": row["metadata"],
- }
- if CONTEXT_KEYS[object_type]:
- result["context"] = context
+ assert str(id) == row["object_metadata.id"]
+
+ result = RawExtrinsicMetadata(
+ type=MetadataTargetType(row["object_metadata.type"]),
+ id=id,
+ authority=MetadataAuthority(
+ type=MetadataAuthorityType(row["metadata_authority.type"]),
+ url=row["metadata_authority.url"],
+ ),
+ fetcher=MetadataFetcher(
+ name=row["metadata_fetcher.name"],
+ version=row["metadata_fetcher.version"],
+ ),
+ discovery_date=row["discovery_date"],
+ format=row["format"],
+ metadata=row["object_metadata.metadata"],
+ origin=row["origin"],
+ visit=row["visit"],
+ snapshot=map_optional(parse_swhid, row["snapshot"]),
+ release=map_optional(parse_swhid, row["release"]),
+ revision=map_optional(parse_swhid, row["revision"]),
+ path=row["path"],
+ directory=map_optional(parse_swhid, row["directory"]),
+ )
results.append(result)
@@ -1347,38 +1279,55 @@
@timed
@db_transaction()
def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any], db=None, cur=None
+ self, fetchers: Iterable[MetadataFetcher], db=None, cur=None
) -> None:
- db.metadata_fetcher_add(name, version, metadata)
- send_metric("metadata_fetcher:add", count=1, method_name="metadata_fetcher")
+ for (i, fetcher) in enumerate(fetchers):
+ if fetcher.metadata is None:
+ raise StorageArgumentException(
+ "MetadataFetcher.metadata may not be None in metadata_fetcher_add."
+ )
+ db.metadata_fetcher_add(
+ fetcher.name, fetcher.version, dict(fetcher.metadata), cur=cur
+ )
+ send_metric("metadata_fetcher:add", count=i + 1, method_name="metadata_fetcher")
@timed
@db_transaction(statement_timeout=500)
def metadata_fetcher_get(
self, name: str, version: str, db=None, cur=None
- ) -> Optional[Dict[str, Any]]:
+ ) -> Optional[MetadataFetcher]:
row = db.metadata_fetcher_get(name, version, cur=cur)
if not row:
return None
- return dict(zip(db.metadata_fetcher_cols, row))
+ return MetadataFetcher.from_dict(dict(zip(db.metadata_fetcher_cols, row)))
@timed
@db_transaction()
def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any], db=None, cur=None
+ self, authorities: Iterable[MetadataAuthority], db=None, cur=None
) -> None:
- db.metadata_authority_add(type, url, metadata, cur)
- send_metric("metadata_authority:add", count=1, method_name="metadata_authority")
+ for (i, authority) in enumerate(authorities):
+ if authority.metadata is None:
+ raise StorageArgumentException(
+ "MetadataAuthority.metadata may not be None in "
+ "metadata_authority_add."
+ )
+ db.metadata_authority_add(
+ authority.type.value, authority.url, dict(authority.metadata), cur=cur
+ )
+ send_metric(
+ "metadata_authority:add", count=i + 1, method_name="metadata_authority"
+ )
@timed
@db_transaction()
def metadata_authority_get(
- self, type: str, url: str, db=None, cur=None
- ) -> Optional[Dict[str, Any]]:
- row = db.metadata_authority_get(type, url, cur=cur)
+ self, type: MetadataAuthorityType, url: str, db=None, cur=None
+ ) -> Optional[MetadataAuthority]:
+ row = db.metadata_authority_get(type.value, url, cur=cur)
if not row:
return None
- return dict(zip(db.metadata_authority_cols, row))
+ return MetadataAuthority.from_dict(dict(zip(db.metadata_authority_cols, row)))
@timed
def diff_directories(self, from_dir, to_dir, track_renaming=False):
@@ -1401,18 +1350,16 @@
def flush(self, object_types: Optional[Iterable[str]] = None) -> Dict:
return {}
- def _get_authority_id(self, authority: Dict[str, Any], db, cur):
+ def _get_authority_id(self, authority: MetadataAuthority, db, cur):
authority_id = db.metadata_authority_get_id(
- authority["type"], authority["url"], cur
+ authority.type.value, authority.url, cur
)
if not authority_id:
raise StorageArgumentException(f"Unknown authority {authority}")
return authority_id
- def _get_fetcher_id(self, fetcher: Dict[str, Any], db, cur):
- fetcher_id = db.metadata_fetcher_get_id(
- fetcher["name"], fetcher["version"], cur
- )
+ def _get_fetcher_id(self, fetcher: MetadataFetcher, db, cur):
+ fetcher_id = db.metadata_fetcher_get_id(fetcher.name, fetcher.version, cur)
if not fetcher_id:
raise StorageArgumentException(f"Unknown fetcher {fetcher}")
return fetcher_id
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -4,8 +4,19 @@
# See top-level LICENSE file for more information
import datetime
+
+import attr
+
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model import from_disk
+from swh.model.identifiers import parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
+ MetadataTargetType,
+)
class StorageData:
@@ -335,27 +346,21 @@
origins = (origin, origin2)
-metadata_authority = {
- "type": "deposit",
- "url": "http://hal.inria.example.com/",
- "metadata": {"location": "France"},
-}
-metadata_authority2 = {
- "type": "registry",
- "url": "http://wikidata.example.com/",
- "metadata": {},
-}
+metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT,
+ url="http://hal.inria.example.com/",
+ metadata={"location": "France"},
+)
+metadata_authority2 = MetadataAuthority(
+ type=MetadataAuthorityType.REGISTRY,
+ url="http://wikidata.example.com/",
+ metadata={},
+)
-metadata_fetcher = {
- "name": "swh-deposit",
- "version": "0.0.1",
- "metadata": {"sword_version": "2"},
-}
-metadata_fetcher2 = {
- "name": "swh-example",
- "version": "0.0.1",
- "metadata": {},
-}
+metadata_fetcher = MetadataFetcher(
+ name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"},
+)
+metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1", metadata={},)
date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc)
type_visit1 = "git"
@@ -475,114 +480,82 @@
snapshots = (snapshot, empty_snapshot, complete_snapshot)
-content_metadata = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {"origin": origin["url"]},
- "discovery_date": datetime.datetime(
+content_metadata = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ origin=origin["url"],
+ discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "json",
- "metadata": b'{"foo": "bar"}',
-}
-content_metadata2 = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {"origin": origin2["url"]},
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="json",
+ metadata=b'{"foo": "bar"}',
+)
+content_metadata2 = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ origin=origin2["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-content_metadata3 = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {
- "origin": origin["url"],
- "visit": 42,
- "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}",
- "release": f"swh:1:rel:{hash_to_hex(release['id'])}",
- "revision": f"swh:1:rev:{hash_to_hex(revision['id'])}",
- "directory": f"swh:1:dir:{hash_to_hex(dir['id'])}",
- "path": b"/foo/bar",
- },
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
+content_metadata3 = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority2["type"],
- "url": metadata_authority2["url"],
- },
- "fetcher": {
- "name": metadata_fetcher2["name"],
- "version": metadata_fetcher2["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-
-origin_metadata = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority2, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher2, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+ origin=origin["url"],
+ visit=42,
+ snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot['id'])}"),
+ release=parse_swhid(f"swh:1:rel:{hash_to_hex(release['id'])}"),
+ revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision['id'])}"),
+ directory=parse_swhid(f"swh:1:dir:{hash_to_hex(dir['id'])}"),
+ path=b"/foo/bar",
+)
+
+origin_metadata = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "json",
- "metadata": b'{"foo": "bar"}',
-}
-origin_metadata2 = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="json",
+ metadata=b'{"foo": "bar"}',
+)
+origin_metadata2 = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-origin_metadata3 = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
+origin_metadata3 = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority2["type"],
- "url": metadata_authority2["url"],
- },
- "fetcher": {
- "name": metadata_fetcher2["name"],
- "version": metadata_fetcher2["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
+ authority=attr.evolve(metadata_authority2, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher2, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
person = {
"name": b"John Doe",
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -17,6 +17,7 @@
SkippedContent,
Origin,
OriginVisit,
+ MetadataTargetType,
)
from swh.storage import get_storage
@@ -426,16 +427,12 @@
"""
fetcher = sample_data["fetcher"][0]
- metadata_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ metadata_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not metadata_fetcher
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert actual_fetcher == fetcher
@@ -458,19 +455,13 @@
[fetcher],
]
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not actual_fetcher
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
mock_memory.assert_has_calls(
- [
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- ]
+ [call([fetcher]), call([fetcher]), call([fetcher]),]
)
@@ -489,13 +480,11 @@
fetcher = sample_data["fetcher"][0]
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not actual_fetcher
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
assert mock_memory.call_count == 1
@@ -506,13 +495,11 @@
"""
authority = sample_data["authority"][0]
- assert not swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ assert not swh_storage.metadata_authority_get(authority.type, authority.url)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
- actual_authority = swh_storage.metadata_authority_get(
- authority["type"], authority["url"]
- )
+ actual_authority = swh_storage.metadata_authority_get(authority.type, authority.url)
assert actual_authority == authority
@@ -536,14 +523,12 @@
None,
]
- assert not swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ assert not swh_storage.metadata_authority_get(authority.type, authority.url)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
- authority_arg_names = ("type", "url", "metadata")
- authority_args = [authority[key] for key in authority_arg_names]
mock_memory.assert_has_calls(
- [call(*authority_args), call(*authority_args), call(*authority_args),]
+ [call([authority]), call([authority]), call([authority])]
)
@@ -562,49 +547,49 @@
authority = sample_data["authority"][0]
- swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ swh_storage.metadata_authority_get(authority.type, authority.url)
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
assert mock_memory.call_count == 1
-def test_retrying_proxy_storage_origin_metadata_add(swh_storage, sample_data):
- """Standard origin_metadata_add works as before
+def test_retrying_proxy_storage_object_metadata_add(swh_storage, sample_data):
+ """Standard object_metadata_add works as before
"""
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
- swh_storage.metadata_authority_add(**sample_data["authority"][0])
- swh_storage.metadata_fetcher_add(**sample_data["fetcher"][0])
+ swh_storage.origin_add_one({"url": ori_meta.id})
+ swh_storage.metadata_authority_add([sample_data["authority"][0]])
+ swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]])
- origin_metadata = swh_storage.origin_metadata_get(
- ori_meta["origin_url"], ori_meta["authority"]
+ origin_metadata = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority
)
assert origin_metadata["next_page_token"] is None
assert not origin_metadata["results"]
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
- origin_metadata = swh_storage.origin_metadata_get(
- ori_meta["origin_url"], ori_meta["authority"]
+ origin_metadata = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority
)
assert origin_metadata
-def test_retrying_proxy_storage_origin_metadata_add_with_retry(
+def test_retrying_proxy_storage_object_metadata_add_with_retry(
monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision
):
"""Multiple retries for hash collision and psycopg2 error but finally ok
"""
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
- swh_storage.metadata_authority_add(**sample_data["authority"][0])
- swh_storage.metadata_fetcher_add(**sample_data["fetcher"][0])
+ swh_storage.origin_add_one({"url": ori_meta.id})
+ swh_storage.metadata_authority_add([sample_data["authority"][0]])
+ swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]])
mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_metadata_add"
+ "swh.storage.in_memory.InMemoryStorage.object_metadata_add"
)
mock_memory.side_effect = [
@@ -617,54 +602,33 @@
]
# No exception raised as insertion finally came through
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
mock_memory.assert_has_calls(
[ # 3 calls, as long as error raised
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
+ call([ori_meta]),
+ call([ori_meta]),
+ call([ori_meta]),
]
)
-def test_retrying_proxy_swh_storage_origin_metadata_add_failure(
+def test_retrying_proxy_swh_storage_object_metadata_add_failure(
swh_storage, sample_data, mocker
):
"""Unfiltered errors are raising without retry
"""
mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_metadata_add"
+ "swh.storage.in_memory.InMemoryStorage.object_metadata_add"
)
mock_memory.side_effect = StorageArgumentException("Refuse to add always!")
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
+ swh_storage.origin_add_one({"url": ori_meta.id})
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
assert mock_memory.call_count == 1
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -17,6 +17,7 @@
from datetime import timedelta
from unittest.mock import Mock
+import attr
import psycopg2
import pytest
@@ -26,6 +27,7 @@
from swh.model import from_disk, identifiers
from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
@@ -35,6 +37,7 @@
Release,
Revision,
Snapshot,
+ MetadataTargetType,
)
from swh.model.hypothesis_strategies import objects
from swh.model.hashutil import hash_to_hex
@@ -3221,50 +3224,51 @@
def test_metadata_fetcher_add_get(self, swh_storage):
actual_fetcher = swh_storage.metadata_fetcher_get(
- data.metadata_fetcher["name"], data.metadata_fetcher["version"]
+ data.metadata_fetcher.name, data.metadata_fetcher.version
)
assert actual_fetcher is None # does not exist
- swh_storage.metadata_fetcher_add(**data.metadata_fetcher)
+ swh_storage.metadata_fetcher_add([data.metadata_fetcher])
res = swh_storage.metadata_fetcher_get(
- data.metadata_fetcher["name"], data.metadata_fetcher["version"]
+ data.metadata_fetcher.name, data.metadata_fetcher.version
)
- assert res is not data.metadata_fetcher
assert res == data.metadata_fetcher
def test_metadata_authority_add_get(self, swh_storage):
actual_authority = swh_storage.metadata_authority_get(
- data.metadata_authority["type"], data.metadata_authority["url"]
+ data.metadata_authority.type, data.metadata_authority.url
)
assert actual_authority is None # does not exist
- swh_storage.metadata_authority_add(**data.metadata_authority)
+ swh_storage.metadata_authority_add([data.metadata_authority])
res = swh_storage.metadata_authority_get(
- data.metadata_authority["type"], data.metadata_authority["url"]
+ data.metadata_authority.type, data.metadata_authority.url
)
- assert res is not data.metadata_authority
assert res == data.metadata_authority
def test_content_metadata_add(self, swh_storage):
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
assert result["next_page_token"] is None
assert [data.content_metadata, data.content_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
def test_content_metadata_add_duplicate(self, swh_storage):
@@ -3272,81 +3276,81 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- new_content_metadata2 = {
- **data.content_metadata2,
- "format": "new-format",
- "metadata": b"new-metadata",
- }
+ new_content_metadata2 = attr.evolve(
+ data.content_metadata2, format="new-format", metadata=b"new-metadata",
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
- swh_storage.content_metadata_add(**new_content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
+ swh_storage.object_metadata_add([new_content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
assert result["next_page_token"] is None
expected_results1 = (data.content_metadata, new_content_metadata2)
expected_results2 = (data.content_metadata, data.content_metadata2)
- assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in (
+ assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in (
expected_results1, # cassandra
expected_results2, # postgresql
)
- def test_content_metadata_add_dict(self, swh_storage):
- fetcher = data.metadata_fetcher
- authority = data.metadata_authority
-
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
-
- kwargs = data.content_metadata.copy()
- kwargs["metadata"] = {"foo": "bar"}
-
- with pytest.raises(StorageArgumentException):
- swh_storage.content_metadata_add(**kwargs)
-
def test_content_metadata_get(self, swh_storage):
authority = data.metadata_authority
fetcher = data.metadata_fetcher
authority2 = data.metadata_authority2
fetcher2 = data.metadata_fetcher2
- content1_swhid = f"swh:1:cnt:{data.cont['sha1_git']}"
- content2_swhid = f"swh:1:cnt:{data.cont2['sha1_git']}"
+ content1_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(data.cont["sha1_git"])
+ )
+ content2_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(data.cont2["sha1_git"])
+ )
content1_metadata1 = data.content_metadata
content1_metadata2 = data.content_metadata2
content1_metadata3 = data.content_metadata3
- content2_metadata = {**data.content_metadata2, "id": content2_swhid}
+ content2_metadata = attr.evolve(data.content_metadata2, id=content2_swhid)
- swh_storage.metadata_authority_add(**authority)
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority2)
- swh_storage.metadata_fetcher_add(**fetcher2)
+ swh_storage.metadata_authority_add([authority, authority2])
+ swh_storage.metadata_fetcher_add([fetcher, fetcher2])
- swh_storage.content_metadata_add(**content1_metadata1)
- swh_storage.content_metadata_add(**content1_metadata2)
- swh_storage.content_metadata_add(**content1_metadata3)
- swh_storage.content_metadata_add(**content2_metadata)
+ swh_storage.object_metadata_add(
+ [
+ content1_metadata1,
+ content1_metadata2,
+ content1_metadata3,
+ content2_metadata,
+ ]
+ )
- result = swh_storage.content_metadata_get(content1_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content1_swhid, authority
+ )
assert result["next_page_token"] is None
assert [content1_metadata1, content1_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(content1_swhid, authority2)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content1_swhid, authority2
+ )
assert result["next_page_token"] is None
assert [content1_metadata3] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(content2_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content2_swhid, authority
+ )
assert result["next_page_token"] is None
assert [content2_metadata] == list(result["results"],)
@@ -3354,32 +3358,40 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- result = swh_storage.content_metadata_get(
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
content_swhid,
authority,
- after=data.content_metadata["discovery_date"] - timedelta(seconds=1),
+ after=data.content_metadata.discovery_date - timedelta(seconds=1),
)
assert result["next_page_token"] is None
assert [data.content_metadata, data.content_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(
- content_swhid, authority, after=data.content_metadata["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ after=data.content_metadata.discovery_date,
)
assert result["next_page_token"] is None
assert [data.content_metadata2] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, after=data.content_metadata2["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ after=data.content_metadata2.discovery_date,
)
assert result["next_page_token"] is None
assert [] == result["results"]
@@ -3388,22 +3400,31 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- swh_storage.content_metadata_get(content_swhid, authority)
+ swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
- result = swh_storage.content_metadata_get(content_swhid, authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.content_metadata] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [data.content_metadata2] == result["results"]
@@ -3413,47 +3434,68 @@
fetcher1 = data.metadata_fetcher
fetcher2 = data.metadata_fetcher2
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher1)
- swh_storage.metadata_fetcher_add(**fetcher2)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
+ swh_storage.metadata_authority_add([authority])
- content_metadata2 = {
- **data.content_metadata2,
- "discovery_date": data.content_metadata2["discovery_date"],
- "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],},
- }
+ content_metadata2 = attr.evolve(
+ data.content_metadata2,
+ discovery_date=data.content_metadata2.discovery_date,
+ fetcher=attr.evolve(fetcher2, metadata=None),
+ )
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.content_metadata] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [content_metadata2] == result["results"]
+ def test_content_metadata_get__invalid_id(self, swh_storage):
+ fetcher = data.metadata_fetcher
+ authority = data.metadata_authority
+
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
+
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
+
+ with pytest.raises(StorageArgumentException, match="SWHID"):
+ swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, data.origin["url"], authority
+ )
+
def test_origin_metadata_add(self, swh_storage):
origin = data.origin
fetcher = data.metadata_fetcher
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
assert result["next_page_token"] is None
assert [data.origin_metadata, data.origin_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date)
)
def test_origin_metadata_add_duplicate(self, swh_storage):
@@ -3463,46 +3505,30 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- new_origin_metadata2 = {
- **data.origin_metadata2,
- "format": "new-format",
- "metadata": b"new-metadata",
- }
+ new_origin_metadata2 = attr.evolve(
+ data.origin_metadata2, format="new-format", metadata=b"new-metadata",
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
- swh_storage.origin_metadata_add(**new_origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
+ swh_storage.object_metadata_add([new_origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
assert result["next_page_token"] is None
# which of the two behavior happens is backend-specific.
expected_results1 = (data.origin_metadata, new_origin_metadata2)
expected_results2 = (data.origin_metadata, data.origin_metadata2)
- assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in (
+ assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in (
expected_results1, # cassandra
expected_results2, # postgresql
)
- def test_origin_metadata_add_dict(self, swh_storage):
- origin = data.origin
- fetcher = data.metadata_fetcher
- authority = data.metadata_authority
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
-
- kwargs = data.origin_metadata.copy()
- kwargs["metadata"] = {"foo": "bar"}
-
- with pytest.raises(StorageArgumentException):
- swh_storage.origin_metadata_add(**kwargs)
-
def test_origin_metadata_get(self, swh_storage):
authority = data.metadata_authority
fetcher = data.metadata_fetcher
@@ -3515,31 +3541,34 @@
origin1_metadata1 = data.origin_metadata
origin1_metadata2 = data.origin_metadata2
origin1_metadata3 = data.origin_metadata3
- origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2}
+ origin2_metadata = attr.evolve(data.origin_metadata2, id=origin_url2)
- swh_storage.metadata_authority_add(**authority)
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority2)
- swh_storage.metadata_fetcher_add(**fetcher2)
+ swh_storage.metadata_authority_add([authority, authority2])
+ swh_storage.metadata_fetcher_add([fetcher, fetcher2])
- swh_storage.origin_metadata_add(**origin1_metadata1)
- swh_storage.origin_metadata_add(**origin1_metadata2)
- swh_storage.origin_metadata_add(**origin1_metadata3)
- swh_storage.origin_metadata_add(**origin2_metadata)
+ swh_storage.object_metadata_add(
+ [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
+ )
- result = swh_storage.origin_metadata_get(origin_url1, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url1, authority
+ )
assert result["next_page_token"] is None
assert [origin1_metadata1, origin1_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(origin_url1, authority2)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url1, authority2
+ )
assert result["next_page_token"] is None
assert [origin1_metadata3] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(origin_url2, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url2, authority
+ )
assert result["next_page_token"] is None
assert [origin2_metadata] == list(result["results"],)
@@ -3549,30 +3578,36 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- result = swh_storage.origin_metadata_get(
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
origin["url"],
authority,
- after=data.origin_metadata["discovery_date"] - timedelta(seconds=1),
+ after=data.origin_metadata.discovery_date - timedelta(seconds=1),
)
assert result["next_page_token"] is None
assert [data.origin_metadata, data.origin_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, after=data.origin_metadata["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ after=data.origin_metadata.discovery_date,
)
assert result["next_page_token"] is None
assert [data.origin_metadata2] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, after=data.origin_metadata2["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ after=data.origin_metadata2.discovery_date,
)
assert result["next_page_token"] is None
assert [] == result["results"]
@@ -3583,20 +3618,27 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- swh_storage.origin_metadata_get(origin["url"], authority)
+ swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
- result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.origin_metadata] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [data.origin_metadata2] == result["results"]
@@ -3608,29 +3650,74 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher1)
- swh_storage.metadata_fetcher_add(**fetcher2)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher1])
+ swh_storage.metadata_fetcher_add([fetcher2])
+ swh_storage.metadata_authority_add([authority])
- origin_metadata2 = {
- **data.origin_metadata2,
- "discovery_date": data.origin_metadata2["discovery_date"],
- "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],},
- }
+ origin_metadata2 = attr.evolve(
+ data.origin_metadata2,
+ discovery_date=data.origin_metadata2.discovery_date,
+ fetcher=attr.evolve(fetcher2, metadata=None),
+ )
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.origin_metadata] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [origin_metadata2] == result["results"]
+ def test_origin_metadata_add_missing_authority(self, swh_storage):
+ origin = data.origin
+ fetcher = data.metadata_fetcher
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_fetcher_add([fetcher])
+
+ with pytest.raises(StorageArgumentException, match="authority"):
+ swh_storage.object_metadata_add(
+ [data.origin_metadata, data.origin_metadata2]
+ )
+
+ def test_origin_metadata_add_missing_fetcher(self, swh_storage):
+ origin = data.origin
+ authority = data.metadata_authority
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_authority_add([authority])
+
+ with pytest.raises(StorageArgumentException, match="fetcher"):
+ swh_storage.object_metadata_add(
+ [data.origin_metadata, data.origin_metadata2]
+ )
+
+ def test_origin_metadata_get__invalid_id_type(self, swh_storage):
+ origin = data.origin
+ fetcher = data.metadata_fetcher
+ authority = data.metadata_authority
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
+
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
+
+ with pytest.raises(StorageArgumentException, match="SWHID"):
+ swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, data.content_metadata.id, authority,
+ )
+
class TestStorageGeneratedData:
def test_generate_content_get(self, swh_storage, swh_contents):
diff --git a/swh/storage/utils.py b/swh/storage/utils.py
--- a/swh/storage/utils.py
+++ b/swh/storage/utils.py
@@ -6,7 +6,7 @@
import re
from datetime import datetime, timezone
-from typing import Dict, Optional, Tuple
+from typing import Callable, Dict, Optional, Tuple, TypeVar
from swh.model.hashutil import hash_to_bytes, hash_to_hex, DEFAULT_ALGORITHMS
@@ -15,6 +15,17 @@
return datetime.now(tz=timezone.utc)
+T1 = TypeVar("T1")
+T2 = TypeVar("T2")
+
+
+def map_optional(f: Callable[[T1], T2], x: Optional[T1]) -> Optional[T2]:
+ if x is None:
+ return None
+ else:
+ return f(x)
+
+
def _is_power_of_two(n: int) -> bool:
return n > 0 and n & (n - 1) == 0
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 3:59 PM (2 d, 20 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227523
Attached To
D3456: Make metadata-related endpoints consistent with other endpoints by using Iterables of swh-model objects instead of a dict.
Event Timeline
Log In to Comment