diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,3 @@ swh.core[db,http] >= 0.0.65 -swh.model >= 0.0.50 +swh.model >= 0.0.51 swh.objstorage >= 0.0.17 diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -1246,7 +1246,6 @@ ('origin_visit', None)) if self.journal_writer: - visit = attr.evolve(visit, origin=origin) self.journal_writer.write_addition('origin_visit', visit) return visit_ret @@ -1267,6 +1266,8 @@ None """ + if not isinstance(origin, str): + raise TypeError('origin must be a string, not %r' % (origin,)) origin_url = self._get_origin_url(origin) if origin_url is None: raise ValueError('Unknown origin.') @@ -1288,16 +1289,10 @@ visit = attr.evolve(visit, **updates) if self.journal_writer: - origin = self._origins[origin_url] - journal_visit = attr.evolve(visit, origin=origin) - self.journal_writer.write_update('origin_visit', journal_visit) + self.journal_writer.write_update('origin_visit', visit) self._origin_visits[origin_url][visit_id-1] = visit - if origin_url not in self._origin_visits or \ - visit_id > len(self._origin_visits[origin_url]): - return - def origin_visit_upsert(self, visits): """Add a origin_visits with a specific id and with all its data. If there is already an origin_visit with the same @@ -1306,7 +1301,7 @@ Args: visits: iterable of dicts with keys: - origin: dict with keys either `id` or `url` + origin: origin url visit: origin visit id type: type of loader used for the visit date: timestamp of such visit @@ -1315,18 +1310,19 @@ snapshot (sha1_git): identifier of the snapshot to add to the visit """ + for visit in visits: + if not isinstance(visit['origin'], str): + raise TypeError("visit['origin'] must be a string, not %r" + % (visit['origin'],)) visits = [OriginVisit.from_dict(d) for d in visits] if self.journal_writer: for visit in visits: - visit = attr.evolve( - visit, - origin=self._origins[visit.origin.url]) self.journal_writer.write_addition('origin_visit', visit) for visit in visits: visit_id = visit.visit - origin_url = visit.origin.url + origin_url = visit.origin visit = attr.evolve(visit, origin=origin_url) diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1227,7 +1227,6 @@ """ origin_url = origin - origin = self.origin_get({'url': origin_url}, db=db, cur=cur) if isinstance(date, str): # FIXME: Converge on iso8601 at some point @@ -1239,7 +1238,7 @@ # We can write to the journal only after inserting to the # DB, because we want the id of the visit self.journal_writer.write_addition('origin_visit', { - 'origin': origin, 'date': date, 'type': type, + 'origin': origin_url, 'date': date, 'type': type, 'visit': visit_id, 'status': 'ongoing', 'metadata': None, 'snapshot': None}) @@ -1266,6 +1265,8 @@ None """ + if not isinstance(origin, str): + raise TypeError('origin must be a string, not %r' % (origin,)) origin_url = origin visit = db.origin_visit_get(origin_url, visit_id, cur=cur) @@ -1284,10 +1285,8 @@ if updates: if self.journal_writer: - origin = self.origin_get( - [{'url': origin_url}], db=db, cur=cur)[0] self.journal_writer.write_update('origin_visit', { - **visit, **updates, 'origin': origin}) + **visit, **updates}) db.origin_visit_update(origin_url, visit_id, updates, cur) @@ -1312,18 +1311,15 @@ for visit in visits: if isinstance(visit['date'], str): visit['date'] = dateutil.parser.parse(visit['date']) - visit['origin'] = \ - self.origin_get([visit['origin']], db=db, cur=cur)[0] + if not isinstance(visit['origin'], str): + raise TypeError("visit['origin'] must be a string, not %r" + % (visit['origin'],)) if self.journal_writer: for visit in visits: - visit = copy.deepcopy(visit) - if visit.get('type') is None: - visit['type'] = visit['origin']['type'] self.journal_writer.write_addition('origin_visit', visit) for visit in visits: - visit['origin'] = visit['origin']['url'] # TODO: upsert them all in a single query db.origin_visit_upsert(**visit, cur=cur) diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1071,9 +1071,8 @@ 'snapshot': None, } in actual_origin_visits - expected_origin = data.origin2 origin_visit = { - 'origin': expected_origin, + 'origin': origin_url, 'date': date_visit, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1082,7 +1081,7 @@ 'snapshot': None, } objects = list(swh_storage.journal_writer.objects) - assert ('origin', expected_origin) in objects + assert ('origin', data.origin2) in objects assert ('origin_visit', origin_visit) in objects def test_origin_visit_get__unknown_origin(self, swh_storage): @@ -1140,7 +1139,6 @@ assert ('origin', data.origin2) in objects for visit in expected_visits: - visit['origin'] = data.origin2 assert ('origin_visit', visit) in objects def test_origin_visit_add_validation(self, swh_storage): @@ -1197,7 +1195,7 @@ actual_origin_visits = list(swh_storage.origin_visit_get( origin_url)) expected_visits = [{ - 'origin': origin_visit2['origin'], + 'origin': origin_url, 'date': date_visit, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1205,7 +1203,7 @@ 'metadata': visit1_metadata, 'snapshot': None, }, { - 'origin': origin_visit2['origin'], + 'origin': origin_url, 'date': date_visit2, 'visit': origin_visit2['visit'], 'type': data.type_visit2, @@ -1221,7 +1219,7 @@ limit=1)) assert actual_origin_visits_bis == [ { - 'origin': origin_visit2['origin'], + 'origin': origin_url, 'date': date_visit, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1235,7 +1233,7 @@ last_visit=origin_visit1['visit'])) assert actual_origin_visits_ter == [ { - 'origin': origin_visit2['origin'], + 'origin': origin_url, 'date': date_visit2, 'visit': origin_visit2['visit'], 'type': data.type_visit2, @@ -1248,7 +1246,7 @@ origin_url2)) assert actual_origin_visits2 == [ { - 'origin': origin_visit3['origin'], + 'origin': origin_url2, 'date': date_visit2, 'visit': origin_visit3['visit'], 'type': data.type_visit3, @@ -1257,10 +1255,8 @@ 'snapshot': None, }] - expected_origin = data.origin.copy() - expected_origin2 = data.origin2.copy() data1 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': date_visit, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1269,7 +1265,7 @@ 'snapshot': None, } data2 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': date_visit2, 'visit': origin_visit2['visit'], 'type': data.type_visit2, @@ -1278,7 +1274,7 @@ 'snapshot': None, } data3 = { - 'origin': expected_origin2, + 'origin': origin_url2, 'date': date_visit2, 'visit': origin_visit3['visit'], 'type': data.type_visit3, @@ -1287,7 +1283,7 @@ 'snapshot': None, } data4 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': date_visit, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1296,7 +1292,7 @@ 'snapshot': None, } data5 = { - 'origin': expected_origin2, + 'origin': origin_url2, 'date': date_visit2, 'visit': origin_visit3['visit'], 'type': data.type_visit3, @@ -1305,8 +1301,8 @@ 'snapshot': None, } objects = list(swh_storage.journal_writer.objects) - assert ('origin', expected_origin) in objects - assert ('origin', expected_origin2) in objects + assert ('origin', data.origin) in objects + assert ('origin', data.origin2) in objects assert ('origin_visit', data1) in objects assert ('origin_visit', data2) in objects assert ('origin_visit', data3) in objects @@ -1465,7 +1461,7 @@ # when swh_storage.origin_visit_upsert([ { - 'origin': data.origin2, + 'origin': origin_url, 'date': data.date_visit2, 'visit': 123, 'type': data.type_visit2, @@ -1474,7 +1470,7 @@ 'snapshot': None, }, { - 'origin': data.origin2, + 'origin': origin_url, 'date': '2018-01-01 23:00:00+00', 'visit': 1234, 'type': data.type_visit2, @@ -1508,9 +1504,8 @@ }, ] - expected_origin = data.origin2 data1 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit2, 'visit': 123, 'type': data.type_visit2, @@ -1519,7 +1514,7 @@ 'snapshot': None, } data2 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit3, 'visit': 1234, 'type': data.type_visit2, @@ -1528,7 +1523,7 @@ 'snapshot': None, } assert list(swh_storage.journal_writer.objects) == [ - ('origin', expected_origin), + ('origin', data.origin2), ('origin_visit', data1), ('origin_visit', data2)] @@ -1544,7 +1539,7 @@ type=data.type_visit1, ) swh_storage.origin_visit_upsert([{ - 'origin': data.origin2, + 'origin': origin_url, 'date': data.date_visit2, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1570,9 +1565,8 @@ 'snapshot': None, }] - expected_origin = data.origin2 data1 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit2, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1581,7 +1575,7 @@ 'snapshot': None, } data2 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit2, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1590,7 +1584,7 @@ 'snapshot': None, } assert list(swh_storage.journal_writer.objects) == [ - ('origin', expected_origin), + ('origin', data.origin2), ('origin_visit', data1), ('origin_visit', data2)] @@ -1748,9 +1742,8 @@ by_ov = swh_storage.snapshot_get_by_origin_visit(origin_url, visit_id) assert by_ov == {**data.empty_snapshot, 'next_branch': None} - expected_origin = data.origin.copy() data1 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit1, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1759,7 +1752,7 @@ 'snapshot': None, } data2 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit1, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -1768,7 +1761,7 @@ 'snapshot': data.empty_snapshot['id'], } assert list(swh_storage.journal_writer.objects) == \ - [('origin', expected_origin), + [('origin', data.origin), ('origin_visit', data1), ('snapshot', data.empty_snapshot), ('origin_visit', data2)] @@ -2108,9 +2101,8 @@ origin_url, visit2_id) assert by_ov2 == {**data.snapshot, 'next_branch': None} - expected_origin = data.origin.copy() data1 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit1, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -2119,7 +2111,7 @@ 'snapshot': None, } data2 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit1, 'visit': origin_visit1['visit'], 'type': data.type_visit1, @@ -2128,7 +2120,7 @@ 'snapshot': data.snapshot['id'], } data3 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit2, 'visit': origin_visit2['visit'], 'type': data.type_visit2, @@ -2137,7 +2129,7 @@ 'snapshot': None, } data4 = { - 'origin': expected_origin, + 'origin': origin_url, 'date': data.date_visit2, 'visit': origin_visit2['visit'], 'type': data.type_visit2, @@ -2146,7 +2138,7 @@ 'snapshot': data.snapshot['id'], } assert list(swh_storage.journal_writer.objects) \ - == [('origin', expected_origin), + == [('origin', data.origin), ('origin_visit', data1), ('snapshot', data.snapshot), ('origin_visit', data2), @@ -3213,11 +3205,11 @@ obj = obj.to_dict() if obj_type == 'origin_visit': origin = obj.pop('origin') - swh_storage.origin_add_one(origin) + swh_storage.origin_add_one({'url': origin}) if 'visit' in obj: del obj['visit'] swh_storage.origin_visit_add( - origin['url'], obj['date'], obj['type']) + origin, obj['date'], obj['type']) else: method = getattr(swh_storage, obj_type + '_add') try: