Page MenuHomeSoftware Heritage

D2838.id10097.diff
No OneTemporary

D2838.id10097.diff

diff --git a/swh/journal/replay.py b/swh/journal/replay.py
--- a/swh/journal/replay.py
+++ b/swh/journal/replay.py
@@ -136,8 +136,11 @@
return _check_date(rev['date']) and _check_date(rev['committer_date'])
-def _fix_revisions(revisions: List[Dict]) -> List[Dict]:
- """Adapt revisions into a list of (current) storage compatible dicts.
+def _fix_revision(revision: Dict[str, Any]) -> Optional[Revision]:
+ """Adapt revision into an swh revision model object (current storage
+ compatible).
+
+ Fix author/committer person:
>>> from pprint import pprint
>>> date = {
@@ -147,22 +150,26 @@
... },
... 'offset': 0,
... }
- >>> pprint(_fix_revisions([{
+ >>> rev0 = _fix_revision({
+ ... 'id': b'rev-id',
... 'author': {'email': '', 'fullname': b'', 'name': ''},
... 'committer': {'email': '', 'fullname': b'', 'name': ''},
... 'date': date,
... 'committer_date': date,
- ... }]))
- [{'author': {'email': b'', 'fullname': b'', 'name': b''},
- 'committer': {'email': b'', 'fullname': b'', 'name': b''},
- 'committer_date': {'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1565096932}},
- 'date': {'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1565096932}}}]
+ ... 'type': 'git',
+ ... 'message': '',
+ ... 'directory': b'dir-id',
+ ... 'synthetic': False,
+ ... }).to_dict()
+ >>> rev0['author']
+ {'fullname': b'', 'name': b'', 'email': b''}
+ >>> rev0['committer']
+ {'fullname': b'', 'name': b'', 'email': b''}
Fix type of 'transplant_source' extra headers:
- >>> revs = _fix_revisions([{
+ >>> rev1 = _fix_revision({
+ ... 'id': b'rev-id',
... 'author': {'email': '', 'fullname': b'', 'name': ''},
... 'committer': {'email': '', 'fullname': b'', 'name': ''},
... 'date': date,
@@ -170,57 +177,62 @@
... 'metadata': {
... 'extra_headers': [
... ['time_offset_seconds', b'-3600'],
- ... ['transplant_source', '29c154a012a70f49df983625090434587622b39e'] # noqa
- ... ]}
- ... }])
- >>> pprint(revs[0]['metadata']['extra_headers'])
+ ... ['transplant_source', '29c154a012a70f49df983625090434587622b39e']
+ ... ]},
+ ... 'type': 'git',
+ ... 'message': '',
+ ... 'directory': b'dir-id',
+ ... 'synthetic': False,
+ ... })
+ >>> pprint(rev1.metadata['extra_headers'])
[['time_offset_seconds', b'-3600'],
['transplant_source', b'29c154a012a70f49df983625090434587622b39e']]
- Filter out revisions with invalid dates:
+ Revision with invalid date are filtered:
>>> from copy import deepcopy
>>> invalid_date1 = deepcopy(date)
>>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': invalid_date1,
... 'committer_date': date,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
>>> invalid_date2 = deepcopy(date)
>>> invalid_date2['timestamp']['seconds'] = 2**70 # > 10^63
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': invalid_date2,
... 'committer_date': date,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
>>> invalid_date3 = deepcopy(date)
>>> invalid_date3['offset'] = 2**20 # > 10^15
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': date,
... 'committer_date': invalid_date3,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
- """
- good_revisions: List = []
- for rev in revisions:
- rev = _fix_revision_pypi_empty_string(rev)
- rev = _fix_revision_transplant_source(rev)
- if not _check_revision_date(rev):
- logging.warning('Excluding revision (invalid date): %r', rev)
- continue
- if rev not in good_revisions:
- good_revisions.append(rev)
- return good_revisions
+ """ # noqa
+ rev = _fix_revision_pypi_empty_string(revision)
+ rev = _fix_revision_transplant_source(rev)
+ if not _check_revision_date(rev):
+ logger.warning('Invalid revision date detected: %(revision)s', {
+ 'revision': rev
+ })
+ return None
+ return Revision.from_dict(rev)
def _fix_origin_visit(visit: Dict) -> OriginVisit:
@@ -338,9 +350,12 @@
collision_aware_content_add(
storage.content_add_metadata, contents)
elif object_type == 'revision':
- storage.revision_add(
- Revision.from_dict(r) for r in _fix_revisions(objects)
- )
+ revisions: List[Revision] = []
+ for revision in objects:
+ rev = _fix_revision(revision)
+ if rev:
+ revisions.append(rev)
+ storage.revision_add(revisions)
elif object_type == 'origin_visit':
visits = [_fix_origin_visit(v) for v in objects]
storage.origin_add(Origin(url=v.origin) for v in visits)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 2:44 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215760

Event Timeline