Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066534
D2838.id10097.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D2838.id10097.diff
View Options
diff --git a/swh/journal/replay.py b/swh/journal/replay.py
--- a/swh/journal/replay.py
+++ b/swh/journal/replay.py
@@ -136,8 +136,11 @@
return _check_date(rev['date']) and _check_date(rev['committer_date'])
-def _fix_revisions(revisions: List[Dict]) -> List[Dict]:
- """Adapt revisions into a list of (current) storage compatible dicts.
+def _fix_revision(revision: Dict[str, Any]) -> Optional[Revision]:
+ """Adapt revision into an swh revision model object (current storage
+ compatible).
+
+ Fix author/committer person:
>>> from pprint import pprint
>>> date = {
@@ -147,22 +150,26 @@
... },
... 'offset': 0,
... }
- >>> pprint(_fix_revisions([{
+ >>> rev0 = _fix_revision({
+ ... 'id': b'rev-id',
... 'author': {'email': '', 'fullname': b'', 'name': ''},
... 'committer': {'email': '', 'fullname': b'', 'name': ''},
... 'date': date,
... 'committer_date': date,
- ... }]))
- [{'author': {'email': b'', 'fullname': b'', 'name': b''},
- 'committer': {'email': b'', 'fullname': b'', 'name': b''},
- 'committer_date': {'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1565096932}},
- 'date': {'offset': 0,
- 'timestamp': {'microseconds': 0, 'seconds': 1565096932}}}]
+ ... 'type': 'git',
+ ... 'message': '',
+ ... 'directory': b'dir-id',
+ ... 'synthetic': False,
+ ... }).to_dict()
+ >>> rev0['author']
+ {'fullname': b'', 'name': b'', 'email': b''}
+ >>> rev0['committer']
+ {'fullname': b'', 'name': b'', 'email': b''}
Fix type of 'transplant_source' extra headers:
- >>> revs = _fix_revisions([{
+ >>> rev1 = _fix_revision({
+ ... 'id': b'rev-id',
... 'author': {'email': '', 'fullname': b'', 'name': ''},
... 'committer': {'email': '', 'fullname': b'', 'name': ''},
... 'date': date,
@@ -170,57 +177,62 @@
... 'metadata': {
... 'extra_headers': [
... ['time_offset_seconds', b'-3600'],
- ... ['transplant_source', '29c154a012a70f49df983625090434587622b39e'] # noqa
- ... ]}
- ... }])
- >>> pprint(revs[0]['metadata']['extra_headers'])
+ ... ['transplant_source', '29c154a012a70f49df983625090434587622b39e']
+ ... ]},
+ ... 'type': 'git',
+ ... 'message': '',
+ ... 'directory': b'dir-id',
+ ... 'synthetic': False,
+ ... })
+ >>> pprint(rev1.metadata['extra_headers'])
[['time_offset_seconds', b'-3600'],
['transplant_source', b'29c154a012a70f49df983625090434587622b39e']]
- Filter out revisions with invalid dates:
+ Revision with invalid date are filtered:
>>> from copy import deepcopy
>>> invalid_date1 = deepcopy(date)
>>> invalid_date1['timestamp']['microseconds'] = 1000000000 # > 10^6
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': invalid_date1,
... 'committer_date': date,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
>>> invalid_date2 = deepcopy(date)
>>> invalid_date2['timestamp']['seconds'] = 2**70 # > 10^63
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': invalid_date2,
... 'committer_date': date,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
>>> invalid_date3 = deepcopy(date)
>>> invalid_date3['offset'] = 2**20 # > 10^15
- >>> _fix_revisions([{
+ >>> rev = _fix_revision({
... 'author': {'email': '', 'fullname': b'', 'name': b''},
... 'committer': {'email': '', 'fullname': b'', 'name': b''},
... 'date': date,
... 'committer_date': invalid_date3,
- ... }])
- []
+ ... })
+ >>> rev is None
+ True
- """
- good_revisions: List = []
- for rev in revisions:
- rev = _fix_revision_pypi_empty_string(rev)
- rev = _fix_revision_transplant_source(rev)
- if not _check_revision_date(rev):
- logging.warning('Excluding revision (invalid date): %r', rev)
- continue
- if rev not in good_revisions:
- good_revisions.append(rev)
- return good_revisions
+ """ # noqa
+ rev = _fix_revision_pypi_empty_string(revision)
+ rev = _fix_revision_transplant_source(rev)
+ if not _check_revision_date(rev):
+ logger.warning('Invalid revision date detected: %(revision)s', {
+ 'revision': rev
+ })
+ return None
+ return Revision.from_dict(rev)
def _fix_origin_visit(visit: Dict) -> OriginVisit:
@@ -338,9 +350,12 @@
collision_aware_content_add(
storage.content_add_metadata, contents)
elif object_type == 'revision':
- storage.revision_add(
- Revision.from_dict(r) for r in _fix_revisions(objects)
- )
+ revisions: List[Revision] = []
+ for revision in objects:
+ rev = _fix_revision(revision)
+ if rev:
+ revisions.append(rev)
+ storage.revision_add(revisions)
elif object_type == 'origin_visit':
visits = [_fix_origin_visit(v) for v in objects]
storage.origin_add(Origin(url=v.origin) for v in visits)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 2:44 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215760
Attached To
D2838: journal.replay: Align fix revision behavior to other fix methods
Event Timeline
Log In to Comment