Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/fixer.py
# Copyright (C) 2020 The Software Heritage developers | # Copyright (C) 2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import logging | import logging | ||||
from typing import Any, Dict, List | from typing import Any, Callable, Dict, List | ||||
from swh.model.model import Origin | from swh.model.model import Origin | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
def _fix_content(content: Dict[str, Any]) -> Dict[str, Any]: | def _fix_content(content: Dict[str, Any]) -> Dict[str, Any]: | ||||
"""Filters-out invalid 'perms' key that leaked from swh.model.from_disk | """Filters-out invalid 'perms' key that leaked from swh.model.from_disk | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | def _fix_revision(revision: Dict[str, Any]) -> Dict: | ||||
... }) | ... }) | ||||
>>> pprint(rev1['metadata']['extra_headers']) | >>> pprint(rev1['metadata']['extra_headers']) | ||||
[['time_offset_seconds', b'-3600'], | [['time_offset_seconds', b'-3600'], | ||||
['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] | ['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] | ||||
""" # noqa | """ # noqa | ||||
rev = _fix_revision_pypi_empty_string(revision) | rev = _fix_revision_pypi_empty_string(revision) | ||||
rev = _fix_revision_transplant_source(rev) | rev = _fix_revision_transplant_source(rev) | ||||
rev.pop("metadata", None) # this haas been dead for a while now | |||||
return rev | return rev | ||||
def _fix_origin(origin: Dict) -> Dict: | def _fix_origin(origin: Dict) -> Dict: | ||||
"""Fix legacy origin with type which is no longer part of the model. | """Fix legacy origin with type which is no longer part of the model. | ||||
>>> from pprint import pprint | >>> from pprint import pprint | ||||
>>> pprint(_fix_origin({ | >>> pprint(_fix_origin({ | ||||
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines | def _fix_raw_extrinsic_metadata(obj_dict: Dict) -> Dict: | ||||
""" | """ | ||||
o = obj_dict.copy() | o = obj_dict.copy() | ||||
if o.pop("type", None) == "origin": | if o.pop("type", None) == "origin": | ||||
o["target"] = str(Origin(o["target"]).swhid()) | o["target"] = str(Origin(o["target"]).swhid()) | ||||
return o | return o | ||||
object_fixers: Dict[str, Callable[[Dict], Dict]] = { | |||||
"content": _fix_content, | |||||
"revision": _fix_revision, | |||||
"origin": _fix_origin, | |||||
"origin_visit": _fix_origin_visit, | |||||
"raw_extrinsic_metadata": _fix_raw_extrinsic_metadata, | |||||
} | |||||
def fix_objects(object_type: str, objects: List[Dict]) -> List[Dict]: | def fix_objects(object_type: str, objects: List[Dict]) -> List[Dict]: | ||||
""" | """ | ||||
Fix legacy objects from the journal to bring them up to date with the | Fix legacy objects from the journal to bring them up to date with the | ||||
latest storage schema. | latest storage schema. | ||||
""" | """ | ||||
if object_type == "content": | if object_type in object_fixers: | ||||
return [_fix_content(v) for v in objects] | fixer = object_fixers[object_type] | ||||
elif object_type == "revision": | objects = [fixer(v) for v in objects] | ||||
revisions = [_fix_revision(v) for v in objects] | |||||
return [rev for rev in revisions if rev is not None] | |||||
elif object_type == "origin": | |||||
return [_fix_origin(v) for v in objects] | |||||
elif object_type == "origin_visit": | |||||
return [_fix_origin_visit(v) for v in objects] | |||||
elif object_type == "raw_extrinsic_metadata": | |||||
return [_fix_raw_extrinsic_metadata(v) for v in objects] | |||||
else: | |||||
return objects | return objects |