Changeset View
Changeset View
Standalone View
Standalone View
swh/journal/replay.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
from time import time | from time import time | ||||
import logging | import logging | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from swh.core.statsd import statsd | from swh.core.statsd import statsd | ||||
from swh.model.identifiers import normalize_timestamp | from swh.model.identifiers import normalize_timestamp | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import SHA1_SIZE | from swh.model.model import SHA1_SIZE | ||||
Show All 26 Lines | def _fix_revision_pypi_empty_string(rev): | ||||
if rev['committer'].get('email') == '': | if rev['committer'].get('email') == '': | ||||
rev['committer']['email'] = b'' | rev['committer']['email'] = b'' | ||||
if rev['committer'].get('name') == '': | if rev['committer'].get('name') == '': | ||||
rev['committer']['name'] = b'' | rev['committer']['name'] = b'' | ||||
return rev | return rev | ||||
def _fix_revision_transplant_source(rev): | def _fix_revision_transplant_source(rev): | ||||
if rev.get('extra_headers'): | if rev.get('metadata') and rev['metadata'].get('extra_headers'): | ||||
rev = rev.copy() | rev = copy.deepcopy(rev) | ||||
rev['extra_headers'] = [ | rev['metadata']['extra_headers'] = [ | ||||
[key, value.encode('ascii')] | [key, value.encode('ascii')] | ||||
if key == 'transplant_source' and isinstance(value, str) | if key == 'transplant_source' and isinstance(value, str) | ||||
else [key, value] | else [key, value] | ||||
for (key, value) in rev['extra_headers']] | for (key, value) in rev['metadata']['extra_headers']] | ||||
return rev | return rev | ||||
def _check_date(date): | def _check_date(date): | ||||
"""Returns whether the date can be represented in backends with sane | """Returns whether the date can be represented in backends with sane | ||||
limits on timestamps and timezeones (resp. signed 64-bits and | limits on timestamps and timezeones (resp. signed 64-bits and | ||||
signed 16 bits), and that microseconds is valid (ie. between 0 and 10^6). | signed 16 bits), and that microseconds is valid (ie. between 0 and 10^6). | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | def fix_objects(object_type, objects): | ||||
Fix type of 'transplant_source' extra headers: | Fix type of 'transplant_source' extra headers: | ||||
>>> revs = fix_objects('revision', [{ | >>> revs = fix_objects('revision', [{ | ||||
... 'author': {'email': '', 'fullname': b'', 'name': ''}, | ... 'author': {'email': '', 'fullname': b'', 'name': ''}, | ||||
... 'committer': {'email': '', 'fullname': b'', 'name': ''}, | ... 'committer': {'email': '', 'fullname': b'', 'name': ''}, | ||||
... 'date': date, | ... 'date': date, | ||||
... 'committer_date': date, | ... 'committer_date': date, | ||||
... 'metadata': { | |||||
... 'extra_headers': [ | ... 'extra_headers': [ | ||||
... ['time_offset_seconds', b'-3600'], | ... ['time_offset_seconds', b'-3600'], | ||||
... ['transplant_source', '29c154a012a70f49df983625090434587622b39e']] | ... ['transplant_source', '29c154a012a70f49df983625090434587622b39e'] | ||||
... ]} | |||||
... }]) | ... }]) | ||||
>>> pprint(revs[0]['extra_headers']) | >>> pprint(revs[0]['metadata']['extra_headers']) | ||||
[['time_offset_seconds', b'-3600'], | [['time_offset_seconds', b'-3600'], | ||||
['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] | ['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] | ||||
Filter out revisions with invalid dates: | Filter out revisions with invalid dates: | ||||
>>> from copy import deepcopy | >>> from copy import deepcopy | ||||
>>> invalid_date1 = deepcopy(date) | >>> invalid_date1 = deepcopy(date) | ||||
>>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6 | >>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6 | ||||
▲ Show 20 Lines • Show All 238 Lines • Show Last 20 Lines |