Page MenuHomeSoftware Heritage

D3456.id12343.diff
No OneTemporary

D3456.id12343.diff

diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py
--- a/swh/storage/api/serializers.py
+++ b/swh/storage/api/serializers.py
@@ -7,6 +7,7 @@
from typing import Callable, Dict, List, Tuple
+from swh.model.identifiers import SWHID, parse_swhid
import swh.model.model as model
@@ -16,11 +17,23 @@
return d
+def _encode_model_enum(obj):
+ return {
+ "value": obj.value,
+ "__type__": type(obj).__name__,
+ }
+
+
ENCODERS: List[Tuple[type, str, Callable]] = [
(model.BaseModel, "model", _encode_model_object),
+ (SWHID, "swhid", str),
+ (model.MetadataTargetType, "model_enum", _encode_model_enum),
+ (model.MetadataAuthorityType, "model_enum", _encode_model_enum),
]
DECODERS: Dict[str, Callable] = {
- "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d)
+ "swhid": parse_swhid,
+ "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d),
+ "model_enum": lambda d: getattr(model, d.pop("__type__"))(d["value"]),
}
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -18,7 +18,6 @@
Optional,
Tuple,
TypeVar,
- Union,
)
from cassandra import CoordinationFailure
@@ -46,7 +45,6 @@
from .common import Row, TOKEN_BEGIN, TOKEN_END, hash_url
from .schema import CREATE_TABLES_QUERIES, HASH_ALGORITHMS
-from .. import extrinsic_metadata
logger = logging.getLogger(__name__)
@@ -899,36 +897,12 @@
f"VALUES ({', '.join('?' for _ in _object_metadata_keys)})"
)
def object_metadata_add(
- self,
- object_type: str,
- id: str,
- authority_type,
- authority_url,
- discovery_date,
- fetcher_name,
- fetcher_version,
- format,
- metadata,
- context: Dict[str, Union[str, bytes, int]],
- *,
- statement,
+ self, statement, **kwargs,
):
- params = [
- object_type,
- id,
- authority_type,
- authority_url,
- discovery_date,
- fetcher_name,
- fetcher_version,
- format,
- metadata,
- ]
-
- params.extend(
- context.get(key) for key in extrinsic_metadata.CONTEXT_KEYS[object_type]
- )
-
+ assert set(kwargs) == set(
+ self._object_metadata_keys
+ ), f"Bad kwargs: {set(kwargs)}"
+ params = [kwargs[key] for key in self._object_metadata_keys]
return self._execute_with_retries(statement, params,)
@_prepared_statement(
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -14,6 +14,8 @@
from deprecated import deprecated
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import parse_swhid, SWHID
+from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.model.model import (
Revision,
Release,
@@ -25,14 +27,17 @@
OriginVisitStatus,
Snapshot,
Origin,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
-from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.storage.objstorage import ObjStorage
from swh.storage.writer import JournalWriter
-from swh.storage.utils import now
+from swh.storage.utils import map_optional, now
from ..exc import StorageArgumentException, HashCollision
-from ..extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS
from .common import TOKEN_BEGIN, TOKEN_END
from .converters import (
revision_to_db,
@@ -1009,128 +1014,65 @@
def refresh_stat_counters(self):
pass
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- self._object_metadata_add(
- "content",
- id,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
-
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit,
- )
-
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- if not isinstance(origin_url, str):
- raise StorageArgumentException(
- "origin_url must be str, not %r" % (origin_url,)
- )
-
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata]) -> None:
+ for metadata_entry in metadata:
+ if not self._cql_runner.metadata_authority_get(
+ metadata_entry.authority.type.value, metadata_entry.authority.url
+ ):
+ raise StorageArgumentException(
+ f"Unknown authority {metadata_entry.authority}"
+ )
+ if not self._cql_runner.metadata_fetcher_get(
+ metadata_entry.fetcher.name, metadata_entry.fetcher.version
+ ):
+ raise StorageArgumentException(
+ f"Unknown fetcher {metadata_entry.fetcher}"
+ )
- self._object_metadata_add(
- "origin",
- origin_url,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
+ try:
+ self._cql_runner.object_metadata_add(
+ type=metadata_entry.type.value,
+ id=str(metadata_entry.id),
+ authority_type=metadata_entry.authority.type.value,
+ authority_url=metadata_entry.authority.url,
+ discovery_date=metadata_entry.discovery_date,
+ fetcher_name=metadata_entry.fetcher.name,
+ fetcher_version=metadata_entry.fetcher.version,
+ format=metadata_entry.format,
+ metadata=metadata_entry.metadata,
+ origin=metadata_entry.origin,
+ visit=metadata_entry.visit,
+ snapshot=map_optional(str, metadata_entry.snapshot),
+ release=map_optional(str, metadata_entry.release),
+ revision=map_optional(str, metadata_entry.revision),
+ path=metadata_entry.path,
+ directory=map_optional(str, metadata_entry.directory),
+ )
+ except TypeError as e:
+ raise StorageArgumentException(*e.args)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- if not isinstance(origin_url, str):
- raise TypeError("origin_url must be str, not %r" % (origin_url,))
-
- res = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit
- )
- for result in res["results"]:
- result["origin_url"] = result.pop("id")
-
- return res
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- context: Dict[str, Union[str, bytes, int]],
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
-
- if not self._cql_runner.metadata_authority_get(**authority):
- raise StorageArgumentException(f"Unknown authority {authority}")
- if not self._cql_runner.metadata_fetcher_get(**fetcher):
- raise StorageArgumentException(f"Unknown fetcher {fetcher}")
-
- try:
- self._cql_runner.object_metadata_add(
- object_type,
- id,
- authority["type"],
- authority["url"],
- discovery_date,
- fetcher["name"],
- fetcher["version"],
- format,
- metadata,
- context,
- )
- except TypeError as e:
- raise StorageArgumentException(*e.args)
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
+ else:
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
if page_token is not None:
(after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads(
page_token
@@ -1140,20 +1082,20 @@
"page_token is inconsistent with the value of 'after'."
)
entries = self._cql_runner.object_metadata_get_after_date_and_fetcher(
- id,
- authority["type"],
- authority["url"],
+ str(id),
+ authority.type.value,
+ authority.url,
after_date,
after_fetcher_name,
after_fetcher_url,
)
elif after is not None:
entries = self._cql_runner.object_metadata_get_after_date(
- id, authority["type"], authority["url"], after
+ str(id), authority.type.value, authority.url, after
)
else:
entries = self._cql_runner.object_metadata_get(
- id, authority["type"], authority["url"]
+ str(id), authority.type.value, authority.url
)
if limit:
@@ -1163,28 +1105,29 @@
for entry in entries:
discovery_date = entry.discovery_date.replace(tzinfo=datetime.timezone.utc)
- result = {
- "id": entry.id,
- "authority": {
- "type": entry.authority_type,
- "url": entry.authority_url,
- },
- "fetcher": {
- "name": entry.fetcher_name,
- "version": entry.fetcher_version,
- },
- "discovery_date": discovery_date,
- "format": entry.format,
- "metadata": entry.metadata,
- }
-
- if CONTEXT_KEYS[object_type]:
- context = {}
- for key in CONTEXT_KEYS[object_type]:
- value = getattr(entry, key)
- if value is not None:
- context[key] = value
- result["context"] = context
+ assert str(id) == entry.id
+
+ result = RawExtrinsicMetadata(
+ type=MetadataTargetType(entry.type),
+ id=id,
+ authority=MetadataAuthority(
+ type=MetadataAuthorityType(entry.authority_type),
+ url=entry.authority_url,
+ ),
+ fetcher=MetadataFetcher(
+ name=entry.fetcher_name, version=entry.fetcher_version,
+ ),
+ discovery_date=discovery_date,
+ format=entry.format,
+ metadata=entry.metadata,
+ origin=entry.origin,
+ visit=entry.visit,
+ snapshot=map_optional(parse_swhid, entry.snapshot),
+ release=map_optional(parse_swhid, entry.release),
+ revision=map_optional(parse_swhid, entry.revision),
+ path=entry.path,
+ directory=map_optional(parse_swhid, entry.directory),
+ )
results.append(result)
@@ -1194,9 +1137,9 @@
last_result = results[-1]
next_page_token: Optional[bytes] = msgpack_dumps(
(
- last_result["discovery_date"],
- last_result["fetcher"]["name"],
- last_result["fetcher"]["version"],
+ last_result.discovery_date,
+ last_result.fetcher.name,
+ last_result.fetcher.version,
)
)
else:
@@ -1207,35 +1150,45 @@
"results": results,
}
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- self._cql_runner.metadata_fetcher_add(name, version, json.dumps(metadata))
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher]) -> None:
+ for fetcher in fetchers:
+ self._cql_runner.metadata_fetcher_add(
+ fetcher.name,
+ fetcher.version,
+ json.dumps(map_optional(dict, fetcher.metadata)),
+ )
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
fetcher = self._cql_runner.metadata_fetcher_get(name, version)
if fetcher:
- return {
- "name": fetcher.name,
- "version": fetcher.version,
- "metadata": json.loads(fetcher.metadata),
- }
+ return MetadataFetcher(
+ name=fetcher.name,
+ version=fetcher.version,
+ metadata=json.loads(fetcher.metadata),
+ )
else:
return None
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- self._cql_runner.metadata_authority_add(url, type, json.dumps(metadata))
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ for authority in authorities:
+ self._cql_runner.metadata_authority_add(
+ authority.url,
+ authority.type.value,
+ json.dumps(map_optional(dict, authority.metadata)),
+ )
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
- authority = self._cql_runner.metadata_authority_get(type, url)
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
+ authority = self._cql_runner.metadata_authority_get(type.value, url)
if authority:
- return {
- "type": authority.type,
- "url": authority.url,
- "metadata": json.loads(authority.metadata),
- }
+ return MetadataAuthority(
+ type=MetadataAuthorityType(authority.type),
+ url=authority.url,
+ metadata=json.loads(authority.metadata),
+ )
else:
return None
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -6,7 +6,7 @@
import datetime
import random
import select
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, Optional, Tuple
from swh.core.db import BaseDb
from swh.core.db.db_utils import stored_procedure, jsonize as _jsonize
@@ -1119,7 +1119,8 @@
"""
object_metadata_get_cols = [
- "id",
+ "object_metadata.id",
+ "object_metadata.type",
"discovery_date",
"metadata_authority.type",
"metadata_authority.url",
@@ -1128,16 +1129,14 @@
"metadata_fetcher.version",
*_object_metadata_context_cols,
"format",
- "metadata",
+ "object_metadata.metadata",
]
"""List of columns of the object_metadata, metadata_authority,
and metadata_fetcher tables, used when reading object metadata."""
_object_metadata_select_query = f"""
SELECT
- object_metadata.id AS id,
- {', '.join(object_metadata_get_cols[1:-1])},
- object_metadata.metadata AS metadata
+ {', '.join(object_metadata_get_cols)}
FROM object_metadata
INNER JOIN metadata_authority
ON (metadata_authority.id=authority_id)
@@ -1149,12 +1148,18 @@
self,
object_type: str,
id: str,
- context: Dict[str, Union[str, bytes, int]],
discovery_date: datetime.datetime,
authority_id: int,
fetcher_id: int,
format: str,
metadata: bytes,
+ origin: Optional[str],
+ visit: Optional[int],
+ snapshot: Optional[str],
+ release: Optional[str],
+ revision: Optional[str],
+ path: Optional[bytes],
+ directory: Optional[str],
cur,
):
query = self._object_metadata_insert_query
@@ -1166,9 +1171,14 @@
discovery_date=discovery_date,
format=format,
metadata=metadata,
+ origin=origin,
+ visit=visit,
+ snapshot=snapshot,
+ release=release,
+ revision=revision,
+ path=path,
+ directory=directory,
)
- for col in self._object_metadata_context_cols:
- args[col] = context.get(col)
params = [args[col] for col in self._object_metadata_insert_cols]
diff --git a/swh/storage/extrinsic_metadata.py b/swh/storage/extrinsic_metadata.py
deleted file mode 100644
--- a/swh/storage/extrinsic_metadata.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) 2020 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-from typing import Any, cast, Dict
-
-from swh.model.identifiers import PersistentId, parse_persistent_identifier
-
-from .exc import StorageArgumentException
-
-CONTEXT_KEYS: Dict[str, Dict[str, type]] = {}
-CONTEXT_KEYS["origin"] = {}
-CONTEXT_KEYS["snapshot"] = {"origin": str, "visit": int}
-CONTEXT_KEYS["release"] = {**CONTEXT_KEYS["snapshot"], "snapshot": PersistentId}
-CONTEXT_KEYS["revision"] = {**CONTEXT_KEYS["release"], "release": PersistentId}
-CONTEXT_KEYS["directory"] = {
- **CONTEXT_KEYS["revision"],
- "revision": PersistentId,
- "path": bytes,
-}
-CONTEXT_KEYS["content"] = {**CONTEXT_KEYS["directory"], "directory": PersistentId}
-
-
-def check_extrinsic_metadata_context(object_type: str, context: Dict[str, Any]):
- key_types = CONTEXT_KEYS[object_type]
-
- extra_keys = set(context) - set(key_types)
- if extra_keys:
- raise StorageArgumentException(f"Unknown context keys: {', '.join(extra_keys)}")
-
- for (key, value) in context.items():
- expected_type = key_types[key]
- expected_type_str = str(expected_type) # for display
-
- # If an SWHID is expected and a string is given, parse it
- if expected_type is PersistentId and isinstance(value, str):
- value = parse_persistent_identifier(value)
- expected_type_str = "PersistentId or str"
-
- # Check the type of the context value
- if not isinstance(value, expected_type):
- raise StorageArgumentException(
- f"Context key {key} must have type {expected_type_str}, "
- f"but is {value!r}"
- )
-
- # If it is an SWHID, check it is also a core SWHID.
- if expected_type is PersistentId:
- value = cast(PersistentId, value)
- if value.metadata != {}:
- raise StorageArgumentException(
- f"Context key {key} must be a core SWHID, "
- f"but it has qualifiers {', '.join(value.metadata)}."
- )
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -33,6 +33,7 @@
from deprecated import deprecated
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import SWHID
from swh.model.model import (
BaseContent,
Content,
@@ -45,6 +46,11 @@
OriginVisitStatus,
Origin,
SHA1_SIZE,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
@@ -52,7 +58,6 @@
from .converters import origin_url_to_sha1
from .exc import StorageArgumentException, HashCollision
-from .extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS
from .utils import get_partition_bounds_bytes
from .writer import JournalWriter
@@ -138,21 +143,33 @@
self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {}
self._persons = {}
- # {origin_url: {authority: [metadata]}}
+ # {object_type: {id: {authority: [metadata]}}}
self._object_metadata: Dict[
- str,
+ MetadataTargetType,
Dict[
- Hashable,
- SortedList[Tuple[datetime.datetime, FetcherKey], Dict[str, Any]],
+ Union[str, SWHID],
+ Dict[
+ Hashable,
+ SortedList[
+ Tuple[datetime.datetime, FetcherKey], RawExtrinsicMetadata
+ ],
+ ],
],
] = defaultdict(
lambda: defaultdict(
- lambda: SortedList(key=lambda x: (x["discovery_date"], x["fetcher"]))
+ lambda: defaultdict(
+ lambda: SortedList(
+ key=lambda x: (
+ x.discovery_date,
+ self._metadata_fetcher_key(x.fetcher),
+ )
+ )
+ )
)
) # noqa
- self._metadata_fetchers: Dict[FetcherKey, Dict[str, Any]] = {}
- self._metadata_authorities: Dict[Hashable, Dict[str, Any]] = {}
+ self._metadata_fetchers: Dict[FetcherKey, MetadataFetcher] = {}
+ self._metadata_authorities: Dict[Hashable, MetadataAuthority] = {}
self._objects = defaultdict(list)
self._sorted_sha1s = SortedList[bytes, bytes]()
@@ -1019,144 +1036,58 @@
def refresh_stat_counters(self):
pass
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- self._object_metadata_add(
- "content",
- id,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
-
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit
- )
-
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- if not isinstance(origin_url, str):
- raise StorageArgumentException(
- "origin_url must be str, not %r" % (origin_url,)
- )
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ for metadata_entry in metadata:
+ authority_key = self._metadata_authority_key(metadata_entry.authority)
+ if authority_key not in self._metadata_authorities:
+ raise StorageArgumentException(
+ f"Unknown authority {metadata_entry.authority}"
+ )
+ fetcher_key = self._metadata_fetcher_key(metadata_entry.fetcher)
+ if fetcher_key not in self._metadata_fetchers:
+ raise StorageArgumentException(
+ f"Unknown fetcher {metadata_entry.fetcher}"
+ )
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
+ object_metadata_list = self._object_metadata[metadata_entry.type][
+ metadata_entry.id
+ ][authority_key]
- self._object_metadata_add(
- "origin",
- origin_url,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- context,
- )
+ for existing_object_metadata in object_metadata_list:
+ if (
+ self._metadata_fetcher_key(existing_object_metadata.fetcher)
+ == fetcher_key
+ and existing_object_metadata.discovery_date
+ == metadata_entry.discovery_date
+ ):
+ # Duplicate of an existing one; ignore it.
+ break
+ else:
+ object_metadata_list.add(metadata_entry)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- if not isinstance(origin_url, str):
- raise TypeError("origin_url must be str, not %r" % (origin_url,))
-
- res = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit
- )
- res["results"] = copy.deepcopy(res["results"])
- for result in res["results"]:
- result["origin_url"] = result.pop("id")
-
- return res
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- context: Dict[str, Union[str, bytes, int]],
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
- if not isinstance(metadata, bytes):
- raise StorageArgumentException(
- "metadata must be bytes, not %r" % (metadata,)
- )
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
authority_key = self._metadata_authority_key(authority)
- if authority_key not in self._metadata_authorities:
- raise StorageArgumentException(f"Unknown authority {authority}")
- fetcher_key = self._metadata_fetcher_key(fetcher)
- if fetcher_key not in self._metadata_fetchers:
- raise StorageArgumentException(f"Unknown fetcher {fetcher}")
-
- object_metadata_list = self._object_metadata[id][authority_key]
-
- object_metadata: Dict[str, Any] = {
- "id": id,
- "discovery_date": discovery_date,
- "authority": authority_key,
- "fetcher": fetcher_key,
- "format": format,
- "metadata": metadata,
- }
-
- if CONTEXT_KEYS[object_type]:
- object_metadata["context"] = context
- for existing_object_metadata in object_metadata_list:
- if (
- existing_object_metadata["fetcher"] == fetcher_key
- and existing_object_metadata["discovery_date"] == discovery_date
- ):
- # Duplicate of an existing one; replace it.
- existing_object_metadata.update(object_metadata)
- break
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
else:
- object_metadata_list.add(object_metadata)
-
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- authority_key = self._metadata_authority_key(authority)
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
if page_token is not None:
(after_time, after_fetcher) = msgpack_loads(page_token)
@@ -1165,33 +1096,36 @@
raise StorageArgumentException(
"page_token is inconsistent with the value of 'after'."
)
- entries = self._object_metadata[id][authority_key].iter_after(
+ entries = self._object_metadata[object_type][id][authority_key].iter_after(
(after_time, after_fetcher)
)
elif after is not None:
- entries = self._object_metadata[id][authority_key].iter_from((after,))
- entries = (entry for entry in entries if entry["discovery_date"] > after)
+ entries = self._object_metadata[object_type][id][authority_key].iter_from(
+ (after,)
+ )
+ entries = (entry for entry in entries if entry.discovery_date > after)
else:
- entries = iter(self._object_metadata[id][authority_key])
+ entries = iter(self._object_metadata[object_type][id][authority_key])
if limit:
entries = itertools.islice(entries, 0, limit + 1)
results = []
for entry in entries:
- authority = self._metadata_authorities[entry["authority"]]
- fetcher = self._metadata_fetchers[entry["fetcher"]]
+ entry_authority = self._metadata_authorities[
+ self._metadata_authority_key(entry.authority)
+ ]
+ entry_fetcher = self._metadata_fetchers[
+ self._metadata_fetcher_key(entry.fetcher)
+ ]
if after:
- assert entry["discovery_date"] > after
+ assert entry.discovery_date > after
results.append(
- {
- **entry,
- "authority": {"type": authority["type"], "url": authority["url"],},
- "fetcher": {
- "name": fetcher["name"],
- "version": fetcher["version"],
- },
- }
+ attr.evolve(
+ entry,
+ authority=attr.evolve(entry_authority, metadata=None),
+ fetcher=attr.evolve(entry_fetcher, metadata=None),
+ )
)
if len(results) > limit:
@@ -1200,8 +1134,8 @@
last_result = results[-1]
next_page_token: Optional[bytes] = msgpack_dumps(
(
- last_result["discovery_date"],
- self._metadata_fetcher_key(last_result["fetcher"]),
+ last_result.discovery_date,
+ self._metadata_fetcher_key(last_result.fetcher),
)
)
else:
@@ -1212,37 +1146,38 @@
"results": results,
}
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- fetcher = {
- "name": name,
- "version": version,
- "metadata": metadata,
- }
- key = self._metadata_fetcher_key(fetcher)
- if key not in self._metadata_fetchers:
- self._metadata_fetchers[key] = fetcher
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher]) -> None:
+ for fetcher in fetchers:
+ if fetcher.metadata is None:
+ raise StorageArgumentException(
+ "MetadataFetcher.metadata may not be None in metadata_fetcher_add."
+ )
+ key = self._metadata_fetcher_key(fetcher)
+ if key not in self._metadata_fetchers:
+ self._metadata_fetchers[key] = fetcher
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
return self._metadata_fetchers.get(
- self._metadata_fetcher_key({"name": name, "version": version})
+ self._metadata_fetcher_key(MetadataFetcher(name=name, version=version))
)
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- authority = {
- "type": type,
- "url": url,
- "metadata": metadata,
- }
- key = self._metadata_authority_key(authority)
- self._metadata_authorities[key] = authority
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ for authority in authorities:
+ if authority.metadata is None:
+ raise StorageArgumentException(
+ "MetadataAuthority.metadata may not be None in "
+ "metadata_authority_add."
+ )
+ key = self._metadata_authority_key(authority)
+ self._metadata_authorities[key] = authority
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
return self._metadata_authorities.get(
- self._metadata_authority_key({"type": type, "url": url})
+ self._metadata_authority_key(MetadataAuthority(type=type, url=url))
)
def _get_origin_url(self, origin):
@@ -1267,12 +1202,12 @@
return tuple((key, content.get(key)) for key in sorted(DEFAULT_ALGORITHMS))
@staticmethod
- def _metadata_fetcher_key(fetcher: Dict) -> FetcherKey:
- return (fetcher["name"], fetcher["version"])
+ def _metadata_fetcher_key(fetcher: MetadataFetcher) -> FetcherKey:
+ return (fetcher.name, fetcher.version)
@staticmethod
- def _metadata_authority_key(authority: Dict) -> Hashable:
- return (authority["type"], authority["url"])
+ def _metadata_authority_key(authority: MetadataAuthority) -> Hashable:
+ return (authority.type, authority.url)
def diff_directories(self, from_dir, to_dir, track_renaming=False):
raise NotImplementedError("InMemoryStorage.diff_directories")
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -8,6 +8,7 @@
from typing import Any, Dict, Iterable, List, Optional, Union
from swh.core.api import remote_api_endpoint
+from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
@@ -18,6 +19,11 @@
Release,
Snapshot,
SkippedContent,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
@@ -1107,118 +1113,38 @@
"""Recomputes the statistics for `stat_counters`."""
...
- @remote_api_endpoint("content/metadata/add")
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- """Add a content_metadata for the content at discovery_date,
- obtained using the `fetcher` from the `authority`.
-
- The authority and fetcher must be known to the storage before
- using this endpoint.
-
- If there is already content metadata for the same content, authority,
- fetcher, and at the same date; the new one will be either dropped or
- will replace the existing one
- (it is unspecified which one of these two behaviors happens).
-
- Args:
- discovery_date: when the metadata was fetched.
- authority: a dict containing keys `type` and `url`.
- fetcher: a dict containing keys `name` and `version`.
- format: text field indicating the format of the content of the
- metadata: blob of raw metadata
- """
- ...
-
- @remote_api_endpoint("content/metadata/get")
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- ) -> Dict[str, Any]:
- """Retrieve list of all content_metadata entries for the id
-
- Args:
- id: the content's SWHID
- authority: a dict containing keys `type` and `url`.
- after: minimum discovery_date for a result to be returned
- page_token: opaque token, used to get the next page of results
- limit: maximum number of results to be returned
-
- Returns:
- dict with keys `next_page_token` and `results`.
- `next_page_token` is an opaque token that is used to get the
- next page of results, or `None` if there are no more results.
- `results` is a list of dicts in the format:
-
- .. code-block: python
-
- {
- 'authority': {'type': ..., 'url': ...},
- 'fetcher': {'name': ..., 'version': ...},
- 'discovery_date': ...,
- 'format': '...',
- 'metadata': b'...',
- 'context': { ... },
- }
-
- """
- ...
-
- @remote_api_endpoint("origin/metadata/add")
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- """Add an origin_metadata for the origin at discovery_date,
- obtained using the `fetcher` from the `authority`.
+ @remote_api_endpoint("object_metadata/add")
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ """Add extrinsic metadata on objects (contents, directories, ...).
The authority and fetcher must be known to the storage before
using this endpoint.
- If there is already origin metadata for the same origin, authority,
+ If there is already metadata for the same object, authority,
fetcher, and at the same date; the new one will be either dropped or
will replace the existing one
(it is unspecified which one of these two behaviors happens).
Args:
- discovery_date: when the metadata was fetched.
- authority: a dict containing keys `type` and `url`.
- fetcher: a dict containing keys `name` and `version`.
- format: text field indicating the format of the content of the
- metadata: blob of raw metadata
+ metadata: iterable of RawExtrinsicMetadata objects to be inserted.
"""
...
- @remote_api_endpoint("origin/metadata/get")
- def origin_metadata_get(
+ @remote_api_endpoint("object_metadata/get")
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
- ) -> Dict[str, Any]:
- """Retrieve list of all origin_metadata entries for the origin_url
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ """Retrieve list of all object_metadata entries for the id
Args:
- origin_url: the origin's URL
+ object_type: one of the values of swh.model.model.MetadataTargetType
+ id: an URL if object_type is 'origin', else a core SWHID
authority: a dict containing keys `type` and `url`.
after: minimum discovery_date for a result to be returned
page_token: opaque token, used to get the next page of results
@@ -1228,40 +1154,30 @@
dict with keys `next_page_token` and `results`.
`next_page_token` is an opaque token that is used to get the
next page of results, or `None` if there are no more results.
- `results` is a list of dicts in the format:
-
- .. code-block: python
-
- {
- 'authority': {'type': ..., 'url': ...},
- 'fetcher': {'name': ..., 'version': ...},
- 'discovery_date': ...,
- 'format': '...',
- 'metadata': b'...'
- }
+ `results` is a list of RawExtrinsicMetadata objects:
"""
...
- @remote_api_endpoint("fetcher/add")
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- """Add a new metadata fetcher to the storage.
+ @remote_api_endpoint("metadata_fetcher/add")
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None:
+ """Add new metadata fetchers to the storage.
- `name` and `version` together are a unique identifier of this
+ Their `name` and `version` together are unique identifiers of this
fetcher; and `metadata` is an arbitrary dict of JSONable data
- with information about this fetcher.
+ with information about this fetcher, which must not be `None`
+ (but may be empty).
Args:
- name: the name of the fetcher
- version: version of the fetcher
+ fetchers: iterable of MetadataFetcher to be inserted
"""
...
- @remote_api_endpoint("fetcher/get")
- def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]:
+ @remote_api_endpoint("metadata_fetcher/get")
+ def metadata_fetcher_get(
+ self, name: str, version: str
+ ) -> Optional[MetadataFetcher]:
"""Retrieve information about a fetcher
Args:
@@ -1269,27 +1185,30 @@
version: version of the fetcher
Returns:
- dictionary with keys `name`, `version`, and `metadata`; or None
- if the fetcher is not known
+ a MetadataFetcher object (with a non-None metadata field) if it is known,
+ else None.
"""
...
- @remote_api_endpoint("authority/add")
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- """Add a metadata authority
+ @remote_api_endpoint("metadata_authority/add")
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ """Add new metadata authorities to the storage.
+
+ Their `type` and `url` together are unique identifiers of this
+ authority; and `metadata` is an arbitrary dict of JSONable data
+ with information about this authority, which must not be `None`
+ (but may be empty).
Args:
- type: one of "deposit", "forge", or "registry"
- url: unique URI identifying the authority
- metadata: JSON-encodable object
+ authorities: iterable of MetadataAuthority to be inserted
"""
...
- @remote_api_endpoint("authority/get")
- def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]:
+ @remote_api_endpoint("metadata_authority/get")
+ def metadata_authority_get(
+ self, type: MetadataAuthorityType, url: str
+ ) -> Optional[MetadataAuthority]:
"""Retrieve information about an authority
Args:
@@ -1297,8 +1216,8 @@
url: unique URI identifying the authority
Returns:
- dictionary with keys `type`, `url`, and `metadata`; or None
- if the authority is not known
+ a MetadataAuthority object (with a non-None metadata field) if it is known,
+ else None.
"""
...
diff --git a/swh/storage/retry.py b/swh/storage/retry.py
--- a/swh/storage/retry.py
+++ b/swh/storage/retry.py
@@ -6,8 +6,7 @@
import logging
import traceback
-from datetime import datetime
-from typing import Any, Dict, Iterable, Optional
+from typing import Dict, Iterable, Optional
from tenacity import (
retry,
@@ -24,6 +23,9 @@
Snapshot,
Origin,
OriginVisit,
+ MetadataAuthority,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
)
from swh.storage import get_storage
@@ -112,30 +114,16 @@
return self.storage.origin_visit_add(visits)
@swh_retry
- def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any]
- ) -> None:
- return self.storage.metadata_fetcher_add(name, version, metadata)
+ def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None:
+ return self.storage.metadata_fetcher_add(fetchers)
@swh_retry
- def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any]
- ) -> None:
- return self.storage.metadata_authority_add(type, url, metadata)
+ def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None:
+ return self.storage.metadata_authority_add(authorities)
@swh_retry
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- ) -> None:
- return self.storage.origin_metadata_add(
- origin_url, discovery_date, authority, fetcher, format, metadata
- )
+ def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None:
+ return self.storage.object_metadata_add(metadata)
@swh_retry
def directory_add(self, directories: Iterable[Directory]) -> Dict:
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -10,7 +10,15 @@
from collections import defaultdict
from contextlib import contextmanager
from deprecated import deprecated
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import (
+ Any,
+ Counter,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Union,
+)
import attr
import psycopg2
@@ -18,6 +26,7 @@
import psycopg2.errors
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
+from swh.model.identifiers import parse_swhid, SWHID
from swh.model.model import (
Content,
Directory,
@@ -29,6 +38,11 @@
SkippedContent,
Snapshot,
SHA1_SIZE,
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
@@ -36,16 +50,12 @@
from swh.storage.utils import now
from . import converters
-from .extrinsic_metadata import (
- check_extrinsic_metadata_context,
- CONTEXT_KEYS,
-)
from .common import db_transaction_generator, db_transaction
from .db import Db
from .exc import StorageArgumentException, StorageDBError, HashCollision
from .algos import diff
from .metrics import timed, send_metric, process_metrics
-from .utils import get_partition_bounds_bytes, extract_collision_hash
+from .utils import get_partition_bounds_bytes, extract_collision_hash, map_optional
from .writer import JournalWriter
@@ -1129,150 +1139,66 @@
for key in keys:
cur.execute("select * from swh_update_counter(%s)", (key,))
- @timed
@db_transaction()
- def content_metadata_add(
- self,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db=None,
- cur=None,
+ def object_metadata_add(
+ self, metadata: Iterable[RawExtrinsicMetadata], db, cur,
) -> None:
- self._object_metadata_add(
- "content",
- id,
- context,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- db,
- cur,
- )
+ counter = Counter[MetadataTargetType]()
+ for metadata_entry in metadata:
+ authority_id = self._get_authority_id(metadata_entry.authority, db, cur)
+ fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
+
+ db.object_metadata_add(
+ object_type=metadata_entry.type.value,
+ id=str(metadata_entry.id),
+ discovery_date=metadata_entry.discovery_date,
+ authority_id=authority_id,
+ fetcher_id=fetcher_id,
+ format=metadata_entry.format,
+ metadata=metadata_entry.metadata,
+ origin=metadata_entry.origin,
+ visit=metadata_entry.visit,
+ snapshot=map_optional(str, metadata_entry.snapshot),
+ release=map_optional(str, metadata_entry.release),
+ revision=map_optional(str, metadata_entry.revision),
+ path=metadata_entry.path,
+ directory=map_optional(str, metadata_entry.directory),
+ cur=cur,
+ )
+ counter[metadata_entry.type] += 1
- @timed
- @db_transaction()
- def content_metadata_get(
- self,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime] = None,
- page_token: Optional[bytes] = None,
- limit: int = 1000,
- db=None,
- cur=None,
- ) -> Dict[str, Any]:
- return self._object_metadata_get(
- "content", id, authority, after, page_token, limit, db, cur
- )
+ for (object_type, count) in counter.items():
+ send_metric(
+ f"{object_type.value}_metadata:add",
+ count=count,
+ method_name=f"{object_type.value}_metadata_add",
+ )
- @timed
@db_transaction()
- def origin_metadata_add(
- self,
- origin_url: str,
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db=None,
- cur=None,
- ) -> None:
- context: Dict[str, Union[str, bytes, int]] = {} # origins have no context
-
- self._object_metadata_add(
- "origin",
- origin_url,
- context,
- discovery_date,
- authority,
- fetcher,
- format,
- metadata,
- db,
- cur,
- )
-
- @timed
- @db_transaction(statement_timeout=500)
- def origin_metadata_get(
+ def object_metadata_get(
self,
- origin_url: str,
- authority: Dict[str, str],
+ object_type: MetadataTargetType,
+ id: Union[str, SWHID],
+ authority: MetadataAuthority,
after: Optional[datetime.datetime] = None,
page_token: Optional[bytes] = None,
limit: int = 1000,
db=None,
cur=None,
- ) -> Dict[str, Any]:
- result = self._object_metadata_get(
- "origin", origin_url, authority, after, page_token, limit, db, cur
- )
-
- for res in result["results"]:
- res.pop("id")
- res["origin_url"] = origin_url
-
- return result
-
- def _object_metadata_add(
- self,
- object_type: str,
- id: str,
- context: Dict[str, Union[str, bytes, int]],
- discovery_date: datetime.datetime,
- authority: Dict[str, Any],
- fetcher: Dict[str, Any],
- format: str,
- metadata: bytes,
- db,
- cur,
- ) -> None:
- check_extrinsic_metadata_context(object_type, context)
-
- authority_id = self._get_authority_id(authority, db, cur)
- fetcher_id = self._get_fetcher_id(fetcher, db, cur)
- if not isinstance(metadata, bytes):
- raise StorageArgumentException(
- "metadata must be bytes, not %r" % (metadata,)
- )
-
- db.object_metadata_add(
- object_type,
- id,
- context,
- discovery_date,
- authority_id,
- fetcher_id,
- format,
- metadata,
- cur,
- )
-
- send_metric(
- f"{object_type}_metadata:add",
- count=1,
- method_name=f"{object_type}_metadata_add",
- )
+ ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]:
+ if object_type == MetadataTargetType.ORIGIN:
+ if isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type='origin', but "
+ f"provided id is an SWHID: {id!r}"
+ )
+ else:
+ if not isinstance(id, SWHID):
+ raise StorageArgumentException(
+ f"object_metadata_get called with object_type!='origin', but "
+ f"provided id is not an SWHID: {id!r}"
+ )
- def _object_metadata_get(
- self,
- object_type: str,
- id: str,
- authority: Dict[str, str],
- after: Optional[datetime.datetime],
- page_token: Optional[bytes],
- limit: int,
- db,
- cur,
- ) -> Dict[str, Any]:
if page_token:
(after_time, after_fetcher) = msgpack_loads(page_token)
if after and after_time < after:
@@ -1283,9 +1209,7 @@
after_time = after
after_fetcher = None
- authority_id = db.metadata_authority_get_id(
- authority["type"], authority["url"], cur
- )
+ authority_id = self._get_authority_id(authority, db, cur)
if not authority_id:
return {
"next_page_token": None,
@@ -1293,36 +1217,44 @@
}
rows = db.object_metadata_get(
- object_type, id, authority_id, after_time, after_fetcher, limit + 1, cur
+ object_type,
+ str(id),
+ authority_id,
+ after_time,
+ after_fetcher,
+ limit + 1,
+ cur,
)
rows = [dict(zip(db.object_metadata_get_cols, row)) for row in rows]
results = []
for row in rows:
row = row.copy()
row.pop("metadata_fetcher.id")
- context = {}
- for key in CONTEXT_KEYS[object_type]:
- value = row[key]
- if value is not None:
- context[key] = value
-
- result = {
- "id": row["id"],
- "authority": {
- "type": row.pop("metadata_authority.type"),
- "url": row.pop("metadata_authority.url"),
- },
- "fetcher": {
- "name": row.pop("metadata_fetcher.name"),
- "version": row.pop("metadata_fetcher.version"),
- },
- "discovery_date": row["discovery_date"],
- "format": row["format"],
- "metadata": row["metadata"],
- }
- if CONTEXT_KEYS[object_type]:
- result["context"] = context
+ assert str(id) == row["object_metadata.id"]
+
+ result = RawExtrinsicMetadata(
+ type=MetadataTargetType(row["object_metadata.type"]),
+ id=id,
+ authority=MetadataAuthority(
+ type=MetadataAuthorityType(row["metadata_authority.type"]),
+ url=row["metadata_authority.url"],
+ ),
+ fetcher=MetadataFetcher(
+ name=row["metadata_fetcher.name"],
+ version=row["metadata_fetcher.version"],
+ ),
+ discovery_date=row["discovery_date"],
+ format=row["format"],
+ metadata=row["object_metadata.metadata"],
+ origin=row["origin"],
+ visit=row["visit"],
+ snapshot=map_optional(parse_swhid, row["snapshot"]),
+ release=map_optional(parse_swhid, row["release"]),
+ revision=map_optional(parse_swhid, row["revision"]),
+ path=row["path"],
+ directory=map_optional(parse_swhid, row["directory"]),
+ )
results.append(result)
@@ -1347,38 +1279,55 @@
@timed
@db_transaction()
def metadata_fetcher_add(
- self, name: str, version: str, metadata: Dict[str, Any], db=None, cur=None
+ self, fetchers: Iterable[MetadataFetcher], db=None, cur=None
) -> None:
- db.metadata_fetcher_add(name, version, metadata)
- send_metric("metadata_fetcher:add", count=1, method_name="metadata_fetcher")
+ for (i, fetcher) in enumerate(fetchers):
+ if fetcher.metadata is None:
+ raise StorageArgumentException(
+ "MetadataFetcher.metadata may not be None in metadata_fetcher_add."
+ )
+ db.metadata_fetcher_add(
+ fetcher.name, fetcher.version, dict(fetcher.metadata), cur=cur
+ )
+ send_metric("metadata_fetcher:add", count=i + 1, method_name="metadata_fetcher")
@timed
@db_transaction(statement_timeout=500)
def metadata_fetcher_get(
self, name: str, version: str, db=None, cur=None
- ) -> Optional[Dict[str, Any]]:
+ ) -> Optional[MetadataFetcher]:
row = db.metadata_fetcher_get(name, version, cur=cur)
if not row:
return None
- return dict(zip(db.metadata_fetcher_cols, row))
+ return MetadataFetcher.from_dict(dict(zip(db.metadata_fetcher_cols, row)))
@timed
@db_transaction()
def metadata_authority_add(
- self, type: str, url: str, metadata: Dict[str, Any], db=None, cur=None
+ self, authorities: Iterable[MetadataAuthority], db=None, cur=None
) -> None:
- db.metadata_authority_add(type, url, metadata, cur)
- send_metric("metadata_authority:add", count=1, method_name="metadata_authority")
+ for (i, authority) in enumerate(authorities):
+ if authority.metadata is None:
+ raise StorageArgumentException(
+ "MetadataAuthority.metadata may not be None in "
+ "metadata_authority_add."
+ )
+ db.metadata_authority_add(
+ authority.type.value, authority.url, dict(authority.metadata), cur=cur
+ )
+ send_metric(
+ "metadata_authority:add", count=i + 1, method_name="metadata_authority"
+ )
@timed
@db_transaction()
def metadata_authority_get(
- self, type: str, url: str, db=None, cur=None
- ) -> Optional[Dict[str, Any]]:
- row = db.metadata_authority_get(type, url, cur=cur)
+ self, type: MetadataAuthorityType, url: str, db=None, cur=None
+ ) -> Optional[MetadataAuthority]:
+ row = db.metadata_authority_get(type.value, url, cur=cur)
if not row:
return None
- return dict(zip(db.metadata_authority_cols, row))
+ return MetadataAuthority.from_dict(dict(zip(db.metadata_authority_cols, row)))
@timed
def diff_directories(self, from_dir, to_dir, track_renaming=False):
@@ -1401,18 +1350,16 @@
def flush(self, object_types: Optional[Iterable[str]] = None) -> Dict:
return {}
- def _get_authority_id(self, authority: Dict[str, Any], db, cur):
+ def _get_authority_id(self, authority: MetadataAuthority, db, cur):
authority_id = db.metadata_authority_get_id(
- authority["type"], authority["url"], cur
+ authority.type.value, authority.url, cur
)
if not authority_id:
raise StorageArgumentException(f"Unknown authority {authority}")
return authority_id
- def _get_fetcher_id(self, fetcher: Dict[str, Any], db, cur):
- fetcher_id = db.metadata_fetcher_get_id(
- fetcher["name"], fetcher["version"], cur
- )
+ def _get_fetcher_id(self, fetcher: MetadataFetcher, db, cur):
+ fetcher_id = db.metadata_fetcher_get_id(fetcher.name, fetcher.version, cur)
if not fetcher_id:
raise StorageArgumentException(f"Unknown fetcher {fetcher}")
return fetcher_id
diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py
--- a/swh/storage/tests/storage_data.py
+++ b/swh/storage/tests/storage_data.py
@@ -4,8 +4,19 @@
# See top-level LICENSE file for more information
import datetime
+
+import attr
+
from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.model import from_disk
+from swh.model.identifiers import parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ RawExtrinsicMetadata,
+ MetadataTargetType,
+)
class StorageData:
@@ -335,27 +346,21 @@
origins = (origin, origin2)
-metadata_authority = {
- "type": "deposit",
- "url": "http://hal.inria.example.com/",
- "metadata": {"location": "France"},
-}
-metadata_authority2 = {
- "type": "registry",
- "url": "http://wikidata.example.com/",
- "metadata": {},
-}
+metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT,
+ url="http://hal.inria.example.com/",
+ metadata={"location": "France"},
+)
+metadata_authority2 = MetadataAuthority(
+ type=MetadataAuthorityType.REGISTRY,
+ url="http://wikidata.example.com/",
+ metadata={},
+)
-metadata_fetcher = {
- "name": "swh-deposit",
- "version": "0.0.1",
- "metadata": {"sword_version": "2"},
-}
-metadata_fetcher2 = {
- "name": "swh-example",
- "version": "0.0.1",
- "metadata": {},
-}
+metadata_fetcher = MetadataFetcher(
+ name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"},
+)
+metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1", metadata={},)
date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc)
type_visit1 = "git"
@@ -475,114 +480,82 @@
snapshots = (snapshot, empty_snapshot, complete_snapshot)
-content_metadata = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {"origin": origin["url"]},
- "discovery_date": datetime.datetime(
+content_metadata = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ origin=origin["url"],
+ discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "json",
- "metadata": b'{"foo": "bar"}',
-}
-content_metadata2 = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {"origin": origin2["url"]},
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="json",
+ metadata=b'{"foo": "bar"}',
+)
+content_metadata2 = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ origin=origin2["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-content_metadata3 = {
- "id": f"swh:1:cnt:{cont['sha1_git']}",
- "context": {
- "origin": origin["url"],
- "visit": 42,
- "snapshot": f"swh:1:snp:{hash_to_hex(snapshot['id'])}",
- "release": f"swh:1:rel:{hash_to_hex(release['id'])}",
- "revision": f"swh:1:rev:{hash_to_hex(revision['id'])}",
- "directory": f"swh:1:dir:{hash_to_hex(dir['id'])}",
- "path": b"/foo/bar",
- },
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
+content_metadata3 = RawExtrinsicMetadata(
+ type=MetadataTargetType.CONTENT,
+ id=parse_swhid(f"swh:1:cnt:{hash_to_hex(cont['sha1_git'])}"),
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority2["type"],
- "url": metadata_authority2["url"],
- },
- "fetcher": {
- "name": metadata_fetcher2["name"],
- "version": metadata_fetcher2["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-
-origin_metadata = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority2, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher2, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+ origin=origin["url"],
+ visit=42,
+ snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot['id'])}"),
+ release=parse_swhid(f"swh:1:rel:{hash_to_hex(release['id'])}"),
+ revision=parse_swhid(f"swh:1:rev:{hash_to_hex(revision['id'])}"),
+ directory=parse_swhid(f"swh:1:dir:{hash_to_hex(dir['id'])}"),
+ path=b"/foo/bar",
+)
+
+origin_metadata = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "json",
- "metadata": b'{"foo": "bar"}',
-}
-origin_metadata2 = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="json",
+ metadata=b'{"foo": "bar"}',
+)
+origin_metadata2 = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority["type"],
- "url": metadata_authority["url"],
- },
- "fetcher": {
- "name": metadata_fetcher["name"],
- "version": metadata_fetcher["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
-origin_metadata3 = {
- "origin_url": origin["url"],
- "discovery_date": datetime.datetime(
+ authority=attr.evolve(metadata_authority, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
+origin_metadata3 = RawExtrinsicMetadata(
+ type=MetadataTargetType.ORIGIN,
+ id=origin["url"],
+ discovery_date=datetime.datetime(
2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc
),
- "authority": {
- "type": metadata_authority2["type"],
- "url": metadata_authority2["url"],
- },
- "fetcher": {
- "name": metadata_fetcher2["name"],
- "version": metadata_fetcher2["version"],
- },
- "format": "yaml",
- "metadata": b"foo: bar",
-}
+ authority=attr.evolve(metadata_authority2, metadata=None),
+ fetcher=attr.evolve(metadata_fetcher2, metadata=None),
+ format="yaml",
+ metadata=b"foo: bar",
+)
person = {
"name": b"John Doe",
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -17,6 +17,7 @@
SkippedContent,
Origin,
OriginVisit,
+ MetadataTargetType,
)
from swh.storage import get_storage
@@ -426,16 +427,12 @@
"""
fetcher = sample_data["fetcher"][0]
- metadata_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ metadata_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not metadata_fetcher
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert actual_fetcher == fetcher
@@ -458,19 +455,13 @@
[fetcher],
]
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not actual_fetcher
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
mock_memory.assert_has_calls(
- [
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- call(fetcher["name"], fetcher["version"], fetcher["metadata"]),
- ]
+ [call([fetcher]), call([fetcher]), call([fetcher]),]
)
@@ -489,13 +480,11 @@
fetcher = sample_data["fetcher"][0]
- actual_fetcher = swh_storage.metadata_fetcher_get(
- fetcher["name"], fetcher["version"]
- )
+ actual_fetcher = swh_storage.metadata_fetcher_get(fetcher.name, fetcher.version)
assert not actual_fetcher
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.metadata_fetcher_add(**fetcher)
+ swh_storage.metadata_fetcher_add([fetcher])
assert mock_memory.call_count == 1
@@ -506,13 +495,11 @@
"""
authority = sample_data["authority"][0]
- assert not swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ assert not swh_storage.metadata_authority_get(authority.type, authority.url)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
- actual_authority = swh_storage.metadata_authority_get(
- authority["type"], authority["url"]
- )
+ actual_authority = swh_storage.metadata_authority_get(authority.type, authority.url)
assert actual_authority == authority
@@ -536,14 +523,12 @@
None,
]
- assert not swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ assert not swh_storage.metadata_authority_get(authority.type, authority.url)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
- authority_arg_names = ("type", "url", "metadata")
- authority_args = [authority[key] for key in authority_arg_names]
mock_memory.assert_has_calls(
- [call(*authority_args), call(*authority_args), call(*authority_args),]
+ [call([authority]), call([authority]), call([authority])]
)
@@ -562,49 +547,49 @@
authority = sample_data["authority"][0]
- swh_storage.metadata_authority_get(authority["type"], authority["url"])
+ swh_storage.metadata_authority_get(authority.type, authority.url)
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_authority_add([authority])
assert mock_memory.call_count == 1
-def test_retrying_proxy_storage_origin_metadata_add(swh_storage, sample_data):
- """Standard origin_metadata_add works as before
+def test_retrying_proxy_storage_object_metadata_add(swh_storage, sample_data):
+ """Standard object_metadata_add works as before
"""
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
- swh_storage.metadata_authority_add(**sample_data["authority"][0])
- swh_storage.metadata_fetcher_add(**sample_data["fetcher"][0])
+ swh_storage.origin_add_one({"url": ori_meta.id})
+ swh_storage.metadata_authority_add([sample_data["authority"][0]])
+ swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]])
- origin_metadata = swh_storage.origin_metadata_get(
- ori_meta["origin_url"], ori_meta["authority"]
+ origin_metadata = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority
)
assert origin_metadata["next_page_token"] is None
assert not origin_metadata["results"]
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
- origin_metadata = swh_storage.origin_metadata_get(
- ori_meta["origin_url"], ori_meta["authority"]
+ origin_metadata = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority
)
assert origin_metadata
-def test_retrying_proxy_storage_origin_metadata_add_with_retry(
+def test_retrying_proxy_storage_object_metadata_add_with_retry(
monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision
):
"""Multiple retries for hash collision and psycopg2 error but finally ok
"""
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
- swh_storage.metadata_authority_add(**sample_data["authority"][0])
- swh_storage.metadata_fetcher_add(**sample_data["fetcher"][0])
+ swh_storage.origin_add_one({"url": ori_meta.id})
+ swh_storage.metadata_authority_add([sample_data["authority"][0]])
+ swh_storage.metadata_fetcher_add([sample_data["fetcher"][0]])
mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_metadata_add"
+ "swh.storage.in_memory.InMemoryStorage.object_metadata_add"
)
mock_memory.side_effect = [
@@ -617,54 +602,33 @@
]
# No exception raised as insertion finally came through
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
mock_memory.assert_has_calls(
[ # 3 calls, as long as error raised
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
- call(
- ori_meta["origin_url"],
- ori_meta["discovery_date"],
- ori_meta["authority"],
- ori_meta["fetcher"],
- ori_meta["format"],
- ori_meta["metadata"],
- ),
+ call([ori_meta]),
+ call([ori_meta]),
+ call([ori_meta]),
]
)
-def test_retrying_proxy_swh_storage_origin_metadata_add_failure(
+def test_retrying_proxy_swh_storage_object_metadata_add_failure(
swh_storage, sample_data, mocker
):
"""Unfiltered errors are raising without retry
"""
mock_memory = mocker.patch(
- "swh.storage.in_memory.InMemoryStorage.origin_metadata_add"
+ "swh.storage.in_memory.InMemoryStorage.object_metadata_add"
)
mock_memory.side_effect = StorageArgumentException("Refuse to add always!")
ori_meta = sample_data["origin_metadata"][0]
- swh_storage.origin_add_one({"url": ori_meta["origin_url"]})
+ swh_storage.origin_add_one({"url": ori_meta.id})
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.origin_metadata_add(**ori_meta)
+ swh_storage.object_metadata_add([ori_meta])
assert mock_memory.call_count == 1
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -17,6 +17,7 @@
from datetime import timedelta
from unittest.mock import Mock
+import attr
import psycopg2
import pytest
@@ -26,6 +27,7 @@
from swh.model import from_disk, identifiers
from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import SWHID
from swh.model.model import (
Content,
Directory,
@@ -35,6 +37,7 @@
Release,
Revision,
Snapshot,
+ MetadataTargetType,
)
from swh.model.hypothesis_strategies import objects
from swh.model.hashutil import hash_to_hex
@@ -3221,50 +3224,51 @@
def test_metadata_fetcher_add_get(self, swh_storage):
actual_fetcher = swh_storage.metadata_fetcher_get(
- data.metadata_fetcher["name"], data.metadata_fetcher["version"]
+ data.metadata_fetcher.name, data.metadata_fetcher.version
)
assert actual_fetcher is None # does not exist
- swh_storage.metadata_fetcher_add(**data.metadata_fetcher)
+ swh_storage.metadata_fetcher_add([data.metadata_fetcher])
res = swh_storage.metadata_fetcher_get(
- data.metadata_fetcher["name"], data.metadata_fetcher["version"]
+ data.metadata_fetcher.name, data.metadata_fetcher.version
)
- assert res is not data.metadata_fetcher
assert res == data.metadata_fetcher
def test_metadata_authority_add_get(self, swh_storage):
actual_authority = swh_storage.metadata_authority_get(
- data.metadata_authority["type"], data.metadata_authority["url"]
+ data.metadata_authority.type, data.metadata_authority.url
)
assert actual_authority is None # does not exist
- swh_storage.metadata_authority_add(**data.metadata_authority)
+ swh_storage.metadata_authority_add([data.metadata_authority])
res = swh_storage.metadata_authority_get(
- data.metadata_authority["type"], data.metadata_authority["url"]
+ data.metadata_authority.type, data.metadata_authority.url
)
- assert res is not data.metadata_authority
assert res == data.metadata_authority
def test_content_metadata_add(self, swh_storage):
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
assert result["next_page_token"] is None
assert [data.content_metadata, data.content_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
def test_content_metadata_add_duplicate(self, swh_storage):
@@ -3272,81 +3276,81 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- new_content_metadata2 = {
- **data.content_metadata2,
- "format": "new-format",
- "metadata": b"new-metadata",
- }
+ new_content_metadata2 = attr.evolve(
+ data.content_metadata2, format="new-format", metadata=b"new-metadata",
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
- swh_storage.content_metadata_add(**new_content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
+ swh_storage.object_metadata_add([new_content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
assert result["next_page_token"] is None
expected_results1 = (data.content_metadata, new_content_metadata2)
expected_results2 = (data.content_metadata, data.content_metadata2)
- assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in (
+ assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in (
expected_results1, # cassandra
expected_results2, # postgresql
)
- def test_content_metadata_add_dict(self, swh_storage):
- fetcher = data.metadata_fetcher
- authority = data.metadata_authority
-
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
-
- kwargs = data.content_metadata.copy()
- kwargs["metadata"] = {"foo": "bar"}
-
- with pytest.raises(StorageArgumentException):
- swh_storage.content_metadata_add(**kwargs)
-
def test_content_metadata_get(self, swh_storage):
authority = data.metadata_authority
fetcher = data.metadata_fetcher
authority2 = data.metadata_authority2
fetcher2 = data.metadata_fetcher2
- content1_swhid = f"swh:1:cnt:{data.cont['sha1_git']}"
- content2_swhid = f"swh:1:cnt:{data.cont2['sha1_git']}"
+ content1_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(data.cont["sha1_git"])
+ )
+ content2_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(data.cont2["sha1_git"])
+ )
content1_metadata1 = data.content_metadata
content1_metadata2 = data.content_metadata2
content1_metadata3 = data.content_metadata3
- content2_metadata = {**data.content_metadata2, "id": content2_swhid}
+ content2_metadata = attr.evolve(data.content_metadata2, id=content2_swhid)
- swh_storage.metadata_authority_add(**authority)
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority2)
- swh_storage.metadata_fetcher_add(**fetcher2)
+ swh_storage.metadata_authority_add([authority, authority2])
+ swh_storage.metadata_fetcher_add([fetcher, fetcher2])
- swh_storage.content_metadata_add(**content1_metadata1)
- swh_storage.content_metadata_add(**content1_metadata2)
- swh_storage.content_metadata_add(**content1_metadata3)
- swh_storage.content_metadata_add(**content2_metadata)
+ swh_storage.object_metadata_add(
+ [
+ content1_metadata1,
+ content1_metadata2,
+ content1_metadata3,
+ content2_metadata,
+ ]
+ )
- result = swh_storage.content_metadata_get(content1_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content1_swhid, authority
+ )
assert result["next_page_token"] is None
assert [content1_metadata1, content1_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(content1_swhid, authority2)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content1_swhid, authority2
+ )
assert result["next_page_token"] is None
assert [content1_metadata3] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(content2_swhid, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content2_swhid, authority
+ )
assert result["next_page_token"] is None
assert [content2_metadata] == list(result["results"],)
@@ -3354,32 +3358,40 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- result = swh_storage.content_metadata_get(
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
content_swhid,
authority,
- after=data.content_metadata["discovery_date"] - timedelta(seconds=1),
+ after=data.content_metadata.discovery_date - timedelta(seconds=1),
)
assert result["next_page_token"] is None
assert [data.content_metadata, data.content_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.content_metadata_get(
- content_swhid, authority, after=data.content_metadata["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ after=data.content_metadata.discovery_date,
)
assert result["next_page_token"] is None
assert [data.content_metadata2] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, after=data.content_metadata2["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ after=data.content_metadata2.discovery_date,
)
assert result["next_page_token"] is None
assert [] == result["results"]
@@ -3388,22 +3400,31 @@
content = data.cont
fetcher = data.metadata_fetcher
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**data.content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
- swh_storage.content_metadata_get(content_swhid, authority)
+ swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority
+ )
- result = swh_storage.content_metadata_get(content_swhid, authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.content_metadata] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [data.content_metadata2] == result["results"]
@@ -3413,47 +3434,68 @@
fetcher1 = data.metadata_fetcher
fetcher2 = data.metadata_fetcher2
authority = data.metadata_authority
- content_swhid = f"swh:1:cnt:{content['sha1_git']}"
+ content_swhid = SWHID(
+ object_type="content", object_id=hash_to_bytes(content["sha1_git"])
+ )
- swh_storage.metadata_fetcher_add(**fetcher1)
- swh_storage.metadata_fetcher_add(**fetcher2)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher1, fetcher2])
+ swh_storage.metadata_authority_add([authority])
- content_metadata2 = {
- **data.content_metadata2,
- "discovery_date": data.content_metadata2["discovery_date"],
- "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],},
- }
+ content_metadata2 = attr.evolve(
+ data.content_metadata2,
+ discovery_date=data.content_metadata2.discovery_date,
+ fetcher=attr.evolve(fetcher2, metadata=None),
+ )
- swh_storage.content_metadata_add(**data.content_metadata)
- swh_storage.content_metadata_add(**content_metadata2)
+ swh_storage.object_metadata_add([data.content_metadata, content_metadata2])
- result = swh_storage.content_metadata_get(content_swhid, authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, content_swhid, authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.content_metadata] == result["results"]
- result = swh_storage.content_metadata_get(
- content_swhid, authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT,
+ content_swhid,
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [content_metadata2] == result["results"]
+ def test_content_metadata_get__invalid_id(self, swh_storage):
+ fetcher = data.metadata_fetcher
+ authority = data.metadata_authority
+
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
+
+ swh_storage.object_metadata_add([data.content_metadata, data.content_metadata2])
+
+ with pytest.raises(StorageArgumentException, match="SWHID"):
+ swh_storage.object_metadata_get(
+ MetadataTargetType.CONTENT, data.origin["url"], authority
+ )
+
def test_origin_metadata_add(self, swh_storage):
origin = data.origin
fetcher = data.metadata_fetcher
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
assert result["next_page_token"] is None
assert [data.origin_metadata, data.origin_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date)
)
def test_origin_metadata_add_duplicate(self, swh_storage):
@@ -3463,46 +3505,30 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- new_origin_metadata2 = {
- **data.origin_metadata2,
- "format": "new-format",
- "metadata": b"new-metadata",
- }
+ new_origin_metadata2 = attr.evolve(
+ data.origin_metadata2, format="new-format", metadata=b"new-metadata",
+ )
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
- swh_storage.origin_metadata_add(**new_origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
+ swh_storage.object_metadata_add([new_origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
assert result["next_page_token"] is None
# which of the two behavior happens is backend-specific.
expected_results1 = (data.origin_metadata, new_origin_metadata2)
expected_results2 = (data.origin_metadata, data.origin_metadata2)
- assert tuple(sorted(result["results"], key=lambda x: x["discovery_date"],)) in (
+ assert tuple(sorted(result["results"], key=lambda x: x.discovery_date,)) in (
expected_results1, # cassandra
expected_results2, # postgresql
)
- def test_origin_metadata_add_dict(self, swh_storage):
- origin = data.origin
- fetcher = data.metadata_fetcher
- authority = data.metadata_authority
- assert swh_storage.origin_add([origin]) == {"origin:add": 1}
-
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
-
- kwargs = data.origin_metadata.copy()
- kwargs["metadata"] = {"foo": "bar"}
-
- with pytest.raises(StorageArgumentException):
- swh_storage.origin_metadata_add(**kwargs)
-
def test_origin_metadata_get(self, swh_storage):
authority = data.metadata_authority
fetcher = data.metadata_fetcher
@@ -3515,31 +3541,34 @@
origin1_metadata1 = data.origin_metadata
origin1_metadata2 = data.origin_metadata2
origin1_metadata3 = data.origin_metadata3
- origin2_metadata = {**data.origin_metadata2, "origin_url": origin_url2}
+ origin2_metadata = attr.evolve(data.origin_metadata2, id=origin_url2)
- swh_storage.metadata_authority_add(**authority)
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority2)
- swh_storage.metadata_fetcher_add(**fetcher2)
+ swh_storage.metadata_authority_add([authority, authority2])
+ swh_storage.metadata_fetcher_add([fetcher, fetcher2])
- swh_storage.origin_metadata_add(**origin1_metadata1)
- swh_storage.origin_metadata_add(**origin1_metadata2)
- swh_storage.origin_metadata_add(**origin1_metadata3)
- swh_storage.origin_metadata_add(**origin2_metadata)
+ swh_storage.object_metadata_add(
+ [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata]
+ )
- result = swh_storage.origin_metadata_get(origin_url1, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url1, authority
+ )
assert result["next_page_token"] is None
assert [origin1_metadata1, origin1_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(origin_url1, authority2)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url1, authority2
+ )
assert result["next_page_token"] is None
assert [origin1_metadata3] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(origin_url2, authority)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin_url2, authority
+ )
assert result["next_page_token"] is None
assert [origin2_metadata] == list(result["results"],)
@@ -3549,30 +3578,36 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- result = swh_storage.origin_metadata_get(
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
origin["url"],
authority,
- after=data.origin_metadata["discovery_date"] - timedelta(seconds=1),
+ after=data.origin_metadata.discovery_date - timedelta(seconds=1),
)
assert result["next_page_token"] is None
assert [data.origin_metadata, data.origin_metadata2] == list(
- sorted(result["results"], key=lambda x: x["discovery_date"],)
+ sorted(result["results"], key=lambda x: x.discovery_date,)
)
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, after=data.origin_metadata["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ after=data.origin_metadata.discovery_date,
)
assert result["next_page_token"] is None
assert [data.origin_metadata2] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, after=data.origin_metadata2["discovery_date"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ after=data.origin_metadata2.discovery_date,
)
assert result["next_page_token"] is None
assert [] == result["results"]
@@ -3583,20 +3618,27 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**data.origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
- swh_storage.origin_metadata_get(origin["url"], authority)
+ swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority
+ )
- result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.origin_metadata] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [data.origin_metadata2] == result["results"]
@@ -3608,29 +3650,74 @@
authority = data.metadata_authority
assert swh_storage.origin_add([origin]) == {"origin:add": 1}
- swh_storage.metadata_fetcher_add(**fetcher1)
- swh_storage.metadata_fetcher_add(**fetcher2)
- swh_storage.metadata_authority_add(**authority)
+ swh_storage.metadata_fetcher_add([fetcher1])
+ swh_storage.metadata_fetcher_add([fetcher2])
+ swh_storage.metadata_authority_add([authority])
- origin_metadata2 = {
- **data.origin_metadata2,
- "discovery_date": data.origin_metadata2["discovery_date"],
- "fetcher": {"name": fetcher2["name"], "version": fetcher2["version"],},
- }
+ origin_metadata2 = attr.evolve(
+ data.origin_metadata2,
+ discovery_date=data.origin_metadata2.discovery_date,
+ fetcher=attr.evolve(fetcher2, metadata=None),
+ )
- swh_storage.origin_metadata_add(**data.origin_metadata)
- swh_storage.origin_metadata_add(**origin_metadata2)
+ swh_storage.object_metadata_add([data.origin_metadata, origin_metadata2])
- result = swh_storage.origin_metadata_get(origin["url"], authority, limit=1)
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, origin["url"], authority, limit=1
+ )
assert result["next_page_token"] is not None
assert [data.origin_metadata] == result["results"]
- result = swh_storage.origin_metadata_get(
- origin["url"], authority, limit=1, page_token=result["next_page_token"]
+ result = swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN,
+ origin["url"],
+ authority,
+ limit=1,
+ page_token=result["next_page_token"],
)
assert result["next_page_token"] is None
assert [origin_metadata2] == result["results"]
+ def test_origin_metadata_add_missing_authority(self, swh_storage):
+ origin = data.origin
+ fetcher = data.metadata_fetcher
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_fetcher_add([fetcher])
+
+ with pytest.raises(StorageArgumentException, match="authority"):
+ swh_storage.object_metadata_add(
+ [data.origin_metadata, data.origin_metadata2]
+ )
+
+ def test_origin_metadata_add_missing_fetcher(self, swh_storage):
+ origin = data.origin
+ authority = data.metadata_authority
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_authority_add([authority])
+
+ with pytest.raises(StorageArgumentException, match="fetcher"):
+ swh_storage.object_metadata_add(
+ [data.origin_metadata, data.origin_metadata2]
+ )
+
+ def test_origin_metadata_get__invalid_id_type(self, swh_storage):
+ origin = data.origin
+ fetcher = data.metadata_fetcher
+ authority = data.metadata_authority
+ assert swh_storage.origin_add([origin]) == {"origin:add": 1}
+
+ swh_storage.metadata_fetcher_add([fetcher])
+ swh_storage.metadata_authority_add([authority])
+
+ swh_storage.object_metadata_add([data.origin_metadata, data.origin_metadata2])
+
+ with pytest.raises(StorageArgumentException, match="SWHID"):
+ swh_storage.object_metadata_get(
+ MetadataTargetType.ORIGIN, data.content_metadata.id, authority,
+ )
+
class TestStorageGeneratedData:
def test_generate_content_get(self, swh_storage, swh_contents):
diff --git a/swh/storage/utils.py b/swh/storage/utils.py
--- a/swh/storage/utils.py
+++ b/swh/storage/utils.py
@@ -6,7 +6,7 @@
import re
from datetime import datetime, timezone
-from typing import Dict, Optional, Tuple
+from typing import Callable, Dict, Optional, Tuple, TypeVar
from swh.model.hashutil import hash_to_bytes, hash_to_hex, DEFAULT_ALGORITHMS
@@ -15,6 +15,17 @@
return datetime.now(tz=timezone.utc)
+T1 = TypeVar("T1")
+T2 = TypeVar("T2")
+
+
+def map_optional(f: Callable[[T1], T2], x: Optional[T1]) -> Optional[T2]:
+ if x is None:
+ return None
+ else:
+ return f(x)
+
+
def _is_power_of_two(n: int) -> bool:
return n > 0 and n & (n - 1) == 0

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 3:59 PM (2 d, 20 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227523

Event Timeline