Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/fixer.py
Show First 20 Lines • Show All 211 Lines • ▼ Show 20 Lines | def _fix_origin_visit(visit: Dict) -> Dict: | ||||
>>> pprint(_fix_origin_visit({ | >>> pprint(_fix_origin_visit({ | ||||
... 'origin': {'url': 'http://foo'}, | ... 'origin': {'url': 'http://foo'}, | ||||
... 'date': date, | ... 'date': date, | ||||
... 'type': 'git', | ... 'type': 'git', | ||||
... 'status': 'ongoing', | ... 'status': 'ongoing', | ||||
... 'snapshot': None, | ... 'snapshot': None, | ||||
... })) | ... })) | ||||
{'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | {'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | ||||
'metadata': None, | |||||
'origin': 'http://foo', | 'origin': 'http://foo', | ||||
'snapshot': None, | |||||
'status': 'ongoing', | |||||
'type': 'git'} | 'type': 'git'} | ||||
`visit['type']` is missing , but `origin['visit']['type']` exists: | `visit['type']` is missing , but `origin['visit']['type']` exists: | ||||
>>> pprint(_fix_origin_visit( | >>> pprint(_fix_origin_visit( | ||||
... {'origin': {'type': 'hg', 'url': 'http://foo'}, | ... {'origin': {'type': 'hg', 'url': 'http://foo'}, | ||||
... 'date': date, | ... 'date': date, | ||||
... 'status': 'ongoing', | ... 'status': 'ongoing', | ||||
... 'snapshot': None, | ... 'snapshot': None, | ||||
... })) | ... })) | ||||
{'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | {'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | ||||
'metadata': None, | |||||
'origin': 'http://foo', | 'origin': 'http://foo', | ||||
'snapshot': None, | |||||
'status': 'ongoing', | |||||
'type': 'hg'} | 'type': 'hg'} | ||||
>>> pprint(_fix_origin_visit( | >>> pprint(_fix_origin_visit( | ||||
... {'origin': {'type': 'hg', 'url': 'http://foo'}, | ... {'origin': {'type': 'hg', 'url': 'http://foo'}, | ||||
... 'date': '2020-02-27 14:39:19+00:00', | ... 'date': '2020-02-27 14:39:19+00:00', | ||||
... 'status': 'ongoing', | ... 'status': 'ongoing', | ||||
... 'snapshot': None, | ... 'snapshot': None, | ||||
... })) | ... })) | ||||
{'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | {'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | ||||
'metadata': None, | |||||
'origin': 'http://foo', | 'origin': 'http://foo', | ||||
'snapshot': None, | |||||
'status': 'ongoing', | |||||
'type': 'hg'} | 'type': 'hg'} | ||||
Old visit format (origin_visit with no type) raises: | Old visit format (origin_visit with no type) raises: | ||||
>>> _fix_origin_visit({ | >>> _fix_origin_visit({ | ||||
... 'origin': {'url': 'http://foo'}, | ... 'origin': {'url': 'http://foo'}, | ||||
... 'date': date, | ... 'date': date, | ||||
... 'status': 'ongoing', | ... 'status': 'ongoing', | ||||
Show All 25 Lines | if "type" not in visit: | ||||
# We expect the journal's origin_visit topic to no longer reference | # We expect the journal's origin_visit topic to no longer reference | ||||
# such visits. If it does, the replayer must crash so we can fix | # such visits. If it does, the replayer must crash so we can fix | ||||
# the journal's topic. | # the journal's topic. | ||||
raise ValueError(f"Old origin visit format detected: {visit}") | raise ValueError(f"Old origin visit format detected: {visit}") | ||||
if isinstance(visit["origin"], dict): | if isinstance(visit["origin"], dict): | ||||
# Old version of the schema: visit['origin'] was a dict. | # Old version of the schema: visit['origin'] was a dict. | ||||
visit["origin"] = visit["origin"]["url"] | visit["origin"] = visit["origin"]["url"] | ||||
if "metadata" not in visit: | |||||
visit["metadata"] = None | |||||
date = visit["date"] | date = visit["date"] | ||||
if isinstance(date, str): | if isinstance(date, str): | ||||
visit["date"] = datetime.datetime.fromisoformat(date) | visit["date"] = datetime.datetime.fromisoformat(date) | ||||
# Those are no longer part of the model | |||||
for key in ["status", "snapshot", "metadata"]: | |||||
visit.pop(key, None) | |||||
return visit | return visit | ||||
def fix_objects(object_type: str, objects: List[Dict]) -> List[Dict]: | def fix_objects(object_type: str, objects: List[Dict]) -> List[Dict]: | ||||
""" | """ | ||||
Fix legacy objects from the journal to bring them up to date with the | Fix legacy objects from the journal to bring them up to date with the | ||||
latest storage schema. | latest storage schema. | ||||
""" | """ | ||||
Show All 11 Lines |