diff --git a/swh/storage/fixer.py b/swh/storage/fixer.py --- a/swh/storage/fixer.py +++ b/swh/storage/fixer.py @@ -6,9 +6,9 @@ import copy import datetime import logging -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List -from swh.model.model import Origin, TimestampWithTimezone +from swh.model.model import Origin logger = logging.getLogger(__name__) @@ -62,28 +62,7 @@ return rev -def _check_date(date): - """Returns whether the date can be represented in backends with sane - limits on timestamps and timezones (resp. signed 64-bits and - signed 16 bits), and that microseconds is valid (ie. between 0 and 10^6). - """ - if date is None: - return True - try: - TimestampWithTimezone.from_dict(date) - except ValueError: - return False - else: - return True - - -def _check_revision_date(rev): - """Exclude revisions with invalid dates. - See https://forge.softwareheritage.org/T1339""" - return _check_date(rev["date"]) and _check_date(rev["committer_date"]) - - -def _fix_revision(revision: Dict[str, Any]) -> Optional[Dict]: +def _fix_revision(revision: Dict[str, Any]) -> Dict: """Fix various legacy revision issues. Fix author/committer person: @@ -134,50 +113,9 @@ [['time_offset_seconds', b'-3600'], ['transplant_source', b'29c154a012a70f49df983625090434587622b39e']] - Revision with invalid date are filtered: - - >>> from copy import deepcopy - >>> invalid_date1 = deepcopy(date) - >>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6 - >>> rev = _fix_revision({ - ... 'author': {'fullname': b'', 'name': '', 'email': ''}, - ... 'committer': {'fullname': b'', 'name': '', 'email': ''}, - ... 'date': invalid_date1, - ... 'committer_date': date, - ... }) - >>> rev is None - True - - >>> invalid_date2 = deepcopy(date) - >>> invalid_date2['timestamp']['seconds'] = 2**70 # > 10^63 - >>> rev = _fix_revision({ - ... 'author': {'fullname': b'', 'name': '', 'email': ''}, - ... 'committer': {'fullname': b'', 'name': '', 'email': ''}, - ... 'date': invalid_date2, - ... 'committer_date': date, - ... }) - >>> rev is None - True - - >>> invalid_date3 = deepcopy(date) - >>> invalid_date3['offset'] = 2**20 # > 10^15 - >>> rev = _fix_revision({ - ... 'author': {'fullname': b'', 'name': '', 'email': ''}, - ... 'committer': {'fullname': b'', 'name': '', 'email': ''}, - ... 'date': date, - ... 'committer_date': invalid_date3, - ... }) - >>> rev is None - True - """ # noqa rev = _fix_revision_pypi_empty_string(revision) rev = _fix_revision_transplant_source(rev) - if not _check_revision_date(rev): - logger.warning( - "Invalid revision date detected: %(revision)s", {"revision": rev} - ) - return None return rev