diff --git a/swh/journal/replay.py b/swh/journal/replay.py --- a/swh/journal/replay.py +++ b/swh/journal/replay.py @@ -65,10 +65,8 @@ method = getattr(storage, object_type + '_add') method([object_]) elif object_type == 'origin_visit': - origin_id = storage.origin_add_one(object_.pop('origin')) - visit = storage.origin_visit_add( - origin=origin_id, date=object_.pop('date')) - storage.origin_visit_update( - origin_id, visit['visit'], **object_) + storage.origin_visit_upsert([{ + **object_, + 'origin': storage.origin_add_one(object_['origin'])}]) else: assert False diff --git a/swh/journal/tests/test_write_replay.py b/swh/journal/tests/test_write_replay.py --- a/swh/journal/tests/test_write_replay.py +++ b/swh/journal/tests/test_write_replay.py @@ -4,9 +4,13 @@ # See top-level LICENSE file for more information from collections import namedtuple +import datetime +import functools +from typing import Tuple, List +import attr from hypothesis import given -from hypothesis.strategies import lists, one_of, composite +from hypothesis.strategies import lists, one_of, composite, builds, integers from swh.model.hashutil import MultiHash from swh.storage.in_memory import Storage @@ -31,6 +35,148 @@ self._object_types = object_types +Sha1Git = bytes + + +@attr.s +class Person: + name = attr.ib(type=bytes) + email = attr.ib(type=bytes) + fullname = attr.ib(type=bytes) + + +@attr.s +class Timestamp: + seconds = attr.ib(type=int) + microseconds = attr.ib(type=int) + + @seconds.validator + def check_seconds(self, attribute, value): + """Check that seconds fit in a 64-bits signed integer.""" + if not (-2**63 <= value < 2**63): + raise ValueError('Seconds must be a signed 64-bits integer.') + + @microseconds.validator + def check_microseconds(self, attribute, value): + """Checks that microseconds are positive and < 1000000.""" + if not (0 <= value < 10**6): + raise ValueError('Microseconds must be in [0, 1000000[.') + + +timestamps = functools.partial( + builds, Timestamp, + seconds=integers(-2**63, 2**63-1), + microseconds=integers(0, 1000000)) + + +@attr.s +class Date: + timestamp = attr.ib(type=Timestamp) + offset = attr.ib(type=int) + negative_utc = attr.ib(type=bool) + + def to_model(self): + return attr.asdict(self) + + +dates = functools.partial( + builds, Date, + timestamp=timestamps(), + offset=integers(-2**16, 2**16-1)) + + +@attr.s +class OriginVisit: + origin = attr.ib(type=dict) + date = attr.ib(type=datetime.datetime) + + def to_model(self): + ov = attr.asdict(self) + ov['date'] = str(self.date) + return ov + + +origin_visits = functools.partial( + builds, OriginVisit, + origin=origins()) + + +@attr.s +class Release: + id = attr.ib(type=Sha1Git) + name = attr.ib(type=bytes) + message = attr.ib(type=bytes) + date = attr.ib(type=Date) + author = attr.ib(type=Person) + target = attr.ib(type=Sha1Git) + target_type = attr.ib(type=str) + synthetic = attr.ib(type=bool) + + def to_model(self): + rel = attr.asdict(self) + rel['date'] = self.date.to_model() + return rel + + +releases = functools.partial( + builds, Release, + date=dates()) + + +@attr.s +class Revision: + id = attr.ib(type=Sha1Git) + message = attr.ib(type=bytes) + author = attr.ib(type=Person) + committer = attr.ib(type=Person) + date = attr.ib(type=Date) + committer_date = attr.ib(type=Date) + parents = attr.ib(type=Tuple[Sha1Git]) + type = attr.ib(type=str) + directory = attr.ib(type=Sha1Git) + metadata = attr.ib(type=str) + synthetic = attr.ib(type=bool) + + def to_model(self): + rev = attr.asdict(self) + rev['date'] = self.date.to_model() + rev['committer_date'] = self.committer_date.to_model() + return rev + + +revisions = functools.partial( + builds, Revision, + date=dates(), committer_date=dates()) + + +@attr.s +class DirectoryEntry: + name = attr.ib(type=bytes) + type = attr.ib(type=str, + validator=attr.validators.in_(['file', 'dir', 'rev'])) + target = attr.ib(type=Sha1Git) + perms = attr.ib(type=int, + validator=attr.validators.in_(range(0, 512))) + + def to_model(self): + return attr.asdict(self) + + +@attr.s +class Directory: + id = attr.ib(type=Sha1Git) + entries = attr.ib(type=List[DirectoryEntry]) + + def to_model(self): + dir_ = attr.asdict(self) + dir_['entries'] = [entry.to_model() for entry in self.entries] + return dir_ + + +directories = functools.partial( + builds, Directory) + + @composite def contents(draw): """Generate valid and consistent content. @@ -55,7 +201,11 @@ objects = lists(one_of( origins().map(lambda x: ('origin', x)), + origin_visits().map(lambda x: ('origin_visit', x.to_model())), snapshots().map(lambda x: ('snapshot', x)), + releases().map(lambda x: ('release', x.to_model())), + revisions().map(lambda x: ('revision', x.to_model())), + directories().map(lambda x: ('directory', x.to_model())), contents().map(lambda x: ('content', x)), )) @@ -77,14 +227,20 @@ storage1.journal_writer.send = send for (obj_type, obj) in objects: - method = getattr(storage1, obj_type + '_add') - method([obj]) + obj = obj.copy() + if obj_type == 'origin_visit': + origin_id = storage1.origin_add_one(obj.pop('origin')) + storage1.origin_visit_add(origin_id, **obj) + else: + method = getattr(storage1, obj_type + '_add') + method([obj]) storage2 = Storage() replayer = MockedStorageReplayer() replayer.poll = poll replayer.fill(storage2) - for attr in ('_contents', '_directories', '_revisions', '_releases', - '_snapshots', '_origin_visits', '_origins'): - assert getattr(storage1, attr) == getattr(storage2, attr), attr + for attr_name in ('_contents', '_directories', '_revisions', '_releases', + '_snapshots', '_origin_visits', '_origins'): + assert getattr(storage1, attr_name) == getattr(storage2, attr_name), \ + attr_name