Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/extrinsic_metadata.py
- This file was added.
# Copyright (C) 2020 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
from typing import Any, cast, Dict | |||||
from swh.model.identifiers import PersistentId, parse_persistent_identifier | |||||
from .exc import StorageArgumentException | |||||
CONTEXT_KEYS: Dict[str, Dict[str, type]] = {} | |||||
CONTEXT_KEYS["origin"] = {} | |||||
CONTEXT_KEYS["snapshot"] = {"origin": str, "visit": int} | |||||
CONTEXT_KEYS["release"] = {**CONTEXT_KEYS["snapshot"], "snapshot": PersistentId} | |||||
CONTEXT_KEYS["revision"] = {**CONTEXT_KEYS["release"], "release": PersistentId} | |||||
CONTEXT_KEYS["directory"] = { | |||||
**CONTEXT_KEYS["revision"], | |||||
"revision": PersistentId, | |||||
"path": bytes, | |||||
} | |||||
CONTEXT_KEYS["content"] = {**CONTEXT_KEYS["directory"], "directory": PersistentId} | |||||
douardda: Not sure I find this way of writing the dict easier to read than writing it as a literal, but I… | |||||
Done Inline ActionsI'd rather keep it in swh-storage for now, and move it in swh-model later if need be. vlorentz: I'd rather keep it in swh-storage for now, and move it in swh-model later if need be. | |||||
ALL_CONTEXT_KEYS = set(CONTEXT_KEYS["content"]) | |||||
def check_extrinsic_metadata_context(object_type: str, context: Dict[str, Any]): | |||||
key_types = CONTEXT_KEYS[object_type] | |||||
extra_keys = set(context) - set(key_types) | |||||
if extra_keys: | |||||
raise StorageArgumentException(f"Unknown context keys: {', '.join(extra_keys)}") | |||||
for (key, value) in context.items(): | |||||
expected_type = key_types[key] | |||||
expected_type_str = str(expected_type) # for display | |||||
# If an SWHID is expected and a string is given, parse it | |||||
if expected_type is PersistentId and isinstance(value, str): | |||||
value = parse_persistent_identifier(value) | |||||
expected_type_str = "PersistentId or str" | |||||
# Check the type of the context value | |||||
if not isinstance(value, expected_type): | |||||
raise StorageArgumentException( | |||||
f"Context key {key} must have type {expected_type_str}, " | |||||
f"but is {value!r}" | |||||
) | |||||
# If it is an SWHID, check it is also a core SWHID. | |||||
if expected_type is PersistentId: | |||||
value = cast(PersistentId, value) | |||||
if value.metadata != {}: | |||||
raise StorageArgumentException( | |||||
f"Context key {key} must be a core SWHID, " | |||||
f"but it has qualifiers {', '.join(value.metadata)}." | |||||
) |
Not sure I find this way of writing the dict easier to read than writing it as a literal, but I am mainly concerned why we need yet another model-like declaration here.
What does this carry that is not declared in the data model? Shouldn't this be put there (in the model) instead?
[edit] Just reread the spec (D3154) and I do better understand this part. However, I wonder is this notion of context should not be better described in swh.model .