diff --git a/swh/storage/fixer.py b/swh/storage/fixer.py index 2914aa3c..1c29df49 100644 --- a/swh/storage/fixer.py +++ b/swh/storage/fixer.py @@ -1,61 +1,65 @@ # Copyright (C) 2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging -from typing import Any, Dict, List +from typing import Any, Callable, Dict, List from swh.model.model import Origin logger = logging.getLogger(__name__) def _fix_content(content: Dict[str, Any]) -> Dict[str, Any]: """Filters-out invalid 'perms' key that leaked from swh.model.from_disk to the journal. >>> _fix_content({'perms': 0o100644, 'sha1_git': b'foo'}) {'sha1_git': b'foo'} >>> _fix_content({'sha1_git': b'bar'}) {'sha1_git': b'bar'} """ content = content.copy() content.pop("perms", None) return content def _fix_raw_extrinsic_metadata(obj_dict: Dict) -> Dict: """Fix legacy RawExtrinsicMetadata with type which is no longer part of the model. >>> _fix_raw_extrinsic_metadata({ ... 'type': 'directory', ... 'target': 'swh:1:dir:460a586d1c95d120811eaadb398d534e019b5243', ... }) {'target': 'swh:1:dir:460a586d1c95d120811eaadb398d534e019b5243'} >>> _fix_raw_extrinsic_metadata({ ... 'type': 'origin', ... 'target': 'https://inria.halpreprod.archives-ouvertes.fr/hal-01667309', ... }) {'target': 'swh:1:ori:155291d5b9ada4570672510509f93fcfd9809882'} """ o = obj_dict.copy() if o.pop("type", None) == "origin": o["target"] = str(Origin(o["target"]).swhid()) return o +object_fixers: Dict[str, Callable[[Dict], Dict]] = { + "content": _fix_content, + "raw_extrinsic_metadata": _fix_raw_extrinsic_metadata, +} + + def fix_objects(object_type: str, objects: List[Dict]) -> List[Dict]: """ Fix legacy objects from the journal to bring them up to date with the latest storage schema. """ - if object_type == "content": - return [_fix_content(v) for v in objects] - elif object_type == "raw_extrinsic_metadata": - return [_fix_raw_extrinsic_metadata(v) for v in objects] - else: - return objects + if object_type in object_fixers: + fixer = object_fixers[object_type] + objects = [fixer(v) for v in objects] + return objects