Changeset View
Standalone View
swh/journal/replay.py
Show First 20 Lines • Show All 229 Lines • ▼ Show 20 Lines | if not _check_revision_date(rev): | ||||
logger.warning('Invalid revision date detected: %(revision)s', { | logger.warning('Invalid revision date detected: %(revision)s', { | ||||
'revision': rev | 'revision': rev | ||||
}) | }) | ||||
return None | return None | ||||
return Revision.from_dict(rev) | return Revision.from_dict(rev) | ||||
def _fix_origin_visit(visit: Dict) -> OriginVisit: | def _fix_origin_visit(visit: Dict) -> OriginVisit: | ||||
"""Adapt origin visits into a list of current storage compatible | """Adapt origin visit into current storage compatible OriginVisit. | ||||
OriginVisits. | |||||
`visit['origin']` is a dict instead of an URL: | `visit['origin']` is a dict instead of an URL: | ||||
>>> from datetime import datetime, timezone | >>> from datetime import datetime, timezone | ||||
>>> from pprint import pprint | >>> from pprint import pprint | ||||
>>> date = datetime(2020, 2, 27, 14, 39, 19, tzinfo=timezone.utc) | >>> date = datetime(2020, 2, 27, 14, 39, 19, tzinfo=timezone.utc) | ||||
>>> pprint(_fix_origin_visit({ | >>> pprint(_fix_origin_visit({ | ||||
... 'origin': {'url': 'http://foo'}, | ... 'origin': {'url': 'http://foo'}, | ||||
Show All 19 Lines | def _fix_origin_visit(visit: Dict) -> OriginVisit: | ||||
... }).to_dict()) | ... }).to_dict()) | ||||
{'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | {'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), | ||||
'metadata': None, | 'metadata': None, | ||||
'origin': 'http://foo', | 'origin': 'http://foo', | ||||
'snapshot': None, | 'snapshot': None, | ||||
'status': 'ongoing', | 'status': 'ongoing', | ||||
'type': 'hg'} | 'type': 'hg'} | ||||
Old visit format (origin_visit with no type) raises: | |||||
>>> _fix_origin_visit({ | |||||
... 'origin': {'url': 'http://foo'}, | |||||
... 'date': date, | |||||
... 'status': 'ongoing', | |||||
... 'snapshot': None | |||||
... }) | |||||
Traceback (most recent call last): | |||||
... | |||||
ValueError: Old origin visit format detected... | |||||
>>> _fix_origin_visit({ | |||||
... 'origin': 'http://foo', | |||||
... 'date': date, | |||||
... 'status': 'ongoing', | |||||
... 'snapshot': None | |||||
vlorentz: You don't need pytest in doctests, see https://docs.python.org/3/library/doctest.html#what… | |||||
Done Inline ActionsOk, is there a doctest configuration file where i could put because i don't want to leak the traceback which could change over time (i see 3.7 in my current setup). 276 >>> _fix_origin_visit({ UNEXPECTED EXCEPTION: ValueError("Old origin visit format detected: {'origin': {'url': 'http://foo'}, 'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), 'status': 'ongoing', 'snapshot': None}") Traceback (most recent call last): File "/usr/lib/python3.7/doctest.py", line 1329, in __run compileflags, 1), test.globs) File "<doctest swh.journal.replay._fix_origin_visit[5]>", line 5, in <module> File "/home/tony/work/inria/repo/swh/swh-environment/swh-journal/.tox/py3/lib/python3.7/site-packages/swh/journal/replay.py", line 305, in _fix_origin_visit raise ValueError(f'Old origin visit format detected: {visit}') ValueError: Old origin visit format detected: {'origin': {'url': 'http://foo'}, 'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), 'status': 'ongoing', 'snapshot': None} I'd like to only set the following as output: ValueError: Old origin visit format detected: {'origin': {'url': 'http://foo'}, 'date': datetime.datetime(2020, 2, 27, 14, 39, 19, tzinfo=datetime.timezone.utc), 'status': 'ongoing', 'snapshot': None} ardumont: Ok, is there a doctest configuration file where i could put
https://docs.python. | |||||
Done Inline Actionsah but the documentation i target seems to explicit it already... -o IGNORE_EXCEPTION_DETAIL let's check ;) ardumont: ah but the documentation i target seems to explicit it already...
```
-o… | |||||
Not Done Inline ActionsWhat do you mean? doctest always ignores the traceback. eg. https://forge.softwareheritage.org/source/swh-scheduler/browse/master/swh/scheduler/cli/utils.py$79-82 vlorentz: What do you mean? doctest always ignores the traceback. eg. https://forge.softwareheritage. | |||||
Done Inline Actions
it did not work... or i misused it... tox.ini (tryout): commands = pytest --cov={envsitepackagesdir}/swh/journal \ {envsitepackagesdir}/swh/journal \ --cov-branch \ --doctest-modules -o doctest.IGNORE_EXCEPTION_DETAIL=1 {posargs} meh, i'll do like in your sample, thx
I must be missing the Traceback... that the output starts with in your sample. Well, i finally made it work... >>> _fix_origin_visit({ ... 'origin': 'http://foo', ... 'date': date, ... 'status': 'ongoing', ... 'snapshot': None ... }) Traceback (most recent call last): ... ValueError: Old origin visit format detected... There is a lot of noise though... Anyway, thx a lot! ardumont: > let's check
it did not work... or i misused it...
tox.ini (tryout):
```
commands =… | |||||
... }) | |||||
Traceback (most recent call last): | |||||
... | |||||
ValueError: Old origin visit format detected... | |||||
""" # noqa | """ # noqa | ||||
visit = visit.copy() | visit = visit.copy() | ||||
if 'type' not in visit: | if 'type' not in visit: | ||||
if isinstance(visit['origin'], dict) and 'type' in visit['origin']: | if isinstance(visit['origin'], dict) and 'type' in visit['origin']: | ||||
# Very old version of the schema: visits did not have a type, | # Very old version of the schema: visits did not have a type, | ||||
# but their 'origin' field was a dict with a 'type' key. | # but their 'origin' field was a dict with a 'type' key. | ||||
visit['type'] = visit['origin']['type'] | visit['type'] = visit['origin']['type'] | ||||
else: | else: | ||||
# Very very old version of the schema: 'type' is missing, | # Very old schema version: 'type' is missing, stop early | ||||
# so there is nothing we can do to fix it. | |||||
raise ValueError('Got an origin_visit too old to be replayed.') | # We expect the journal's origin_visit topic to no longer reference | ||||
# such visits. If it does, the replayer must crash so we can fix | |||||
# the journal's topic. | |||||
raise ValueError(f'Old origin visit format detected: {visit}') | |||||
if isinstance(visit['origin'], dict): | if isinstance(visit['origin'], dict): | ||||
# Old version of the schema: visit['origin'] was a dict. | # Old version of the schema: visit['origin'] was a dict. | ||||
visit['origin'] = visit['origin']['url'] | visit['origin'] = visit['origin']['url'] | ||||
if 'metadata' not in visit: | if 'metadata' not in visit: | ||||
visit['metadata'] = None | visit['metadata'] = None | ||||
return OriginVisit.from_dict(visit) | return OriginVisit.from_dict(visit) | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | def _insert_objects(object_type: str, objects: List[Dict], storage) -> None: | ||||
elif object_type == 'revision': | elif object_type == 'revision': | ||||
revisions: List[Revision] = [] | revisions: List[Revision] = [] | ||||
for revision in objects: | for revision in objects: | ||||
rev = _fix_revision(revision) | rev = _fix_revision(revision) | ||||
if rev: | if rev: | ||||
revisions.append(rev) | revisions.append(rev) | ||||
storage.revision_add(revisions) | storage.revision_add(revisions) | ||||
elif object_type == 'origin_visit': | elif object_type == 'origin_visit': | ||||
visits = [_fix_origin_visit(v) for v in objects] | visits: List[OriginVisit] = [] | ||||
storage.origin_add(Origin(url=v.origin) for v in visits) | origins: List[Origin] = [] | ||||
for obj in objects: | |||||
visit = _fix_origin_visit(obj) | |||||
visits.append(visit) | |||||
origins.append(Origin(url=visit.origin)) | |||||
Done Inline ActionsIs that what you think is wrong? Just send all origins from the origin_visit and not only the one that are converted correctly? ardumont: Is that what you think is wrong?
Just send all origins from the origin_visit and not only the… | |||||
storage.origin_add(origins) | |||||
storage.origin_visit_upsert(visits) | storage.origin_visit_upsert(visits) | ||||
elif object_type in ('directory', 'release', 'snapshot', 'origin'): | elif object_type in ('directory', 'release', 'snapshot', 'origin'): | ||||
method = getattr(storage, object_type + '_add') | method = getattr(storage, object_type + '_add') | ||||
method(object_converter_fn[object_type](o) for o in objects) | method(object_converter_fn[object_type](o) for o in objects) | ||||
else: | else: | ||||
logger.warning('Received a series of %s, this should not happen', | logger.warning('Received a series of %s, this should not happen', | ||||
object_type) | object_type) | ||||
▲ Show 20 Lines • Show All 249 Lines • Show Last 20 Lines |
You don't need pytest in doctests, see https://docs.python.org/3/library/doctest.html#what-about-exceptions
(And it makes sense not to use pytest, as doctests are also documentation examples not just tests)