Changeset View
Changeset View
Standalone View
Standalone View
swh/journal/replay.py
Show All 38 Lines | if rev['author'].get('name') == '': | ||||
rev['author']['name'] = b'' | rev['author']['name'] = b'' | ||||
if rev['committer'].get('email') == '': | if rev['committer'].get('email') == '': | ||||
rev['committer']['email'] = b'' | rev['committer']['email'] = b'' | ||||
if rev['committer'].get('name') == '': | if rev['committer'].get('name') == '': | ||||
rev['committer']['name'] = b'' | rev['committer']['name'] = b'' | ||||
return rev | return rev | ||||
def _fix_revision_transplant_source(rev): | |||||
if rev.get('extra_headers'): | |||||
rev = rev.copy() | |||||
rev['extra_headers'] = [ | |||||
[key, value.encode('ascii')] | |||||
if key == 'transplant_source' and isinstance(value, str) | |||||
else [key, value] | |||||
for (key, value) in rev['extra_headers']] | |||||
return rev | |||||
def _check_date(date): | def _check_date(date): | ||||
"""Returns whether the date can be represented in backends with sane | """Returns whether the date can be represented in backends with sane | ||||
limits on timestamps and timezeones (resp. signed 64-bits and | limits on timestamps and timezeones (resp. signed 64-bits and | ||||
signed 16 bits), and that microseconds is valid (ie. between 0 and 10^6). | signed 16 bits), and that microseconds is valid (ie. between 0 and 10^6). | ||||
""" | """ | ||||
date = normalize_timestamp(date) | date = normalize_timestamp(date) | ||||
return (-2**63 <= date['timestamp']['seconds'] < 2**63) \ | return (-2**63 <= date['timestamp']['seconds'] < 2**63) \ | ||||
and (0 <= date['timestamp']['microseconds'] < 10**6) \ | and (0 <= date['timestamp']['microseconds'] < 10**6) \ | ||||
and (-2**15 <= date['offset'] < 2**15) | and (-2**15 <= date['offset'] < 2**15) | ||||
def _check_revision_date(rev): | def _check_revision_date(rev): | ||||
"""Exclude revisions with invalid dates. | """Exclude revisions with invalid dates. | ||||
See https://forge.softwareheritage.org/T1339""" | See https://forge.softwareheritage.org/T1339""" | ||||
return _check_date(rev['date']) and _check_date(rev['committer_date']) | return _check_date(rev['date']) and _check_date(rev['committer_date']) | ||||
def _fix_revisions(revisions): | def _fix_revisions(revisions): | ||||
good_revisions = [] | good_revisions = [] | ||||
for rev in revisions: | for rev in revisions: | ||||
rev = _fix_revision_pypi_empty_string(rev) | rev = _fix_revision_pypi_empty_string(rev) | ||||
rev = _fix_revision_transplant_source(rev) | |||||
if not _check_revision_date(rev): | if not _check_revision_date(rev): | ||||
logging.warning('Excluding revision (invalid date): %r', rev) | logging.warning('Excluding revision (invalid date): %r', rev) | ||||
continue | continue | ||||
good_revisions.append(rev) | good_revisions.append(rev) | ||||
return good_revisions | return good_revisions | ||||
def _fix_origin_visits(visits): | def _fix_origin_visits(visits): | ||||
Show All 38 Lines | def fix_objects(object_type, objects): | ||||
... }])) | ... }])) | ||||
[{'author': {'email': b'', 'fullname': b'', 'name': b''}, | [{'author': {'email': b'', 'fullname': b'', 'name': b''}, | ||||
'committer': {'email': b'', 'fullname': b'', 'name': b''}, | 'committer': {'email': b'', 'fullname': b'', 'name': b''}, | ||||
'committer_date': {'offset': 0, | 'committer_date': {'offset': 0, | ||||
'timestamp': {'microseconds': 0, 'seconds': 1565096932}}, | 'timestamp': {'microseconds': 0, 'seconds': 1565096932}}, | ||||
'date': {'offset': 0, | 'date': {'offset': 0, | ||||
'timestamp': {'microseconds': 0, 'seconds': 1565096932}}}] | 'timestamp': {'microseconds': 0, 'seconds': 1565096932}}}] | ||||
Fix type of 'transplant_source' extra headers: | |||||
>>> revs = fix_objects('revision', [{ | |||||
... 'author': {'email': '', 'fullname': b'', 'name': ''}, | |||||
... 'committer': {'email': '', 'fullname': b'', 'name': ''}, | |||||
... 'date': date, | |||||
... 'committer_date': date, | |||||
... 'extra_headers': [ | |||||
... ['time_offset_seconds', b'-3600'], | |||||
... ['transplant_source', '29c154a012a70f49df983625090434587622b39e']] | |||||
... }]) | |||||
>>> pprint(revs[0]['extra_headers']) | |||||
[['time_offset_seconds', b'-3600'], | |||||
['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] | |||||
Filter out revisions with invalid dates: | Filter out revisions with invalid dates: | ||||
>>> from copy import deepcopy | >>> from copy import deepcopy | ||||
>>> invalid_date1 = deepcopy(date) | >>> invalid_date1 = deepcopy(date) | ||||
>>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6 | >>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6 | ||||
>>> fix_objects('revision', [{ | >>> fix_objects('revision', [{ | ||||
... 'author': {'email': '', 'fullname': b'', 'name': b''}, | ... 'author': {'email': '', 'fullname': b'', 'name': b''}, | ||||
... 'committer': {'email': '', 'fullname': b'', 'name': b''}, | ... 'committer': {'email': '', 'fullname': b'', 'name': b''}, | ||||
▲ Show 20 Lines • Show All 235 Lines • Show Last 20 Lines |