diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -14,8 +14,8 @@ import dateutil from swh.model.model import ( - Revision, Release, Directory, DirectoryEntry, Content, SkippedContent, - OriginVisit, Snapshot, Origin + Revision, Release, Directory, DirectoryEntry, Content, + SkippedContent, OriginVisit, Snapshot, Origin ) from swh.model.hashutil import DEFAULT_ALGORITHMS from swh.storage.objstorage import ObjStorage @@ -668,7 +668,8 @@ else: return results - def origin_get_one(self, origin): + def origin_get_one(self, origin: Dict[str, Any]) -> Optional[ + Dict[str, Any]]: if 'id' in origin: raise StorageArgumentException('Origin ids are not supported.') if 'url' not in origin: @@ -757,17 +758,21 @@ return origin_url - def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin # TODO: rename the argument + def origin_visit_add(self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> OriginVisit: + origin_url = origin.url if isinstance(date, str): + # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) + elif not isinstance(date, datetime.datetime): + raise StorageArgumentException( + 'Date must be a datetime or a string') - origin = self.origin_get_one({'url': origin_url}) - - if not origin: - return None + if not self.origin_get_one({'url': origin.url}): + raise StorageArgumentException( + 'Unknown origin %s', origin_url) visit_id = self._cql_runner.origin_generate_unique_visit_id(origin_url) @@ -784,23 +789,19 @@ except (KeyError, TypeError, ValueError) as e: raise StorageArgumentException(*e.args) self.journal_writer.origin_visit_add(visit) - self._cql_runner.origin_visit_add_one(visit) - - return { - 'origin': origin_url, - 'visit': visit_id, - } + return visit def origin_visit_update( - self, origin: str, visit_id: int, status: Optional[str] = None, - metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None): - origin_url = origin # TODO: rename the argument - + self, origin: Origin, visit: OriginVisit, + status: Optional[str] = None, metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None): # Get the existing data of the visit - row = self._cql_runner.origin_visit_get_one(origin_url, visit_id) + row = self._cql_runner.origin_visit_get_one(origin.url, visit.visit) if not row: - raise StorageArgumentException('This origin visit does not exist.') + raise StorageArgumentException( + 'origin visit %s for origin %s does not exist.', + visit.visit, origin.url) try: visit = OriginVisit.from_dict(self._format_origin_visit_row(row)) except (KeyError, TypeError, ValueError) as e: @@ -821,7 +822,7 @@ self.journal_writer.origin_visit_update(visit) - self._cql_runner.origin_visit_update(origin_url, visit_id, updates) + self._cql_runner.origin_visit_update(origin.url, visit.visit, updates) def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.journal_writer.origin_visit_upsert(visits) diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -6,6 +6,8 @@ import random import select +from typing import Any, Dict + from swh.core.db import BaseDb from swh.core.db.db_utils import stored_procedure, jsonize from swh.core.db.db_utils import execute_values_generator @@ -351,7 +353,9 @@ (origin, ts, type)) return cur.fetchone()[0] - def origin_visit_update(self, origin_id, visit_id, updates, cur=None): + def origin_visit_update( + self, origin_url: str, visit_id: int, + updates: Dict[str, Any], cur=None): """Update origin_visit's status.""" cur = self._cursor(cur) update_cols = [] @@ -359,7 +363,7 @@ where = ['origin.id = origin_visit.origin', 'origin.url=%s', 'visit=%s'] - where_values = [origin_id, visit_id] + where_values = [origin_url, visit_id] if 'status' in updates: update_cols.append('status=%s') values.append(updates.pop('status')) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -19,8 +19,9 @@ import attr from swh.model.model import ( - BaseContent, Content, SkippedContent, Directory, Revision, Release, - Snapshot, OriginVisit, Origin, SHA1_SIZE) + BaseContent, Content, SkippedContent, Directory, Revision, + Release, Snapshot, OriginVisit, Origin, SHA1_SIZE +) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.storage.objstorage import ObjStorage @@ -702,20 +703,23 @@ return origin.url - def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin - if origin_url is None: - raise StorageArgumentException('Unknown origin.') + def origin_visit_add(self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> OriginVisit: + origin_url = origin.url if isinstance(date, str): # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) elif not isinstance(date, datetime.datetime): raise StorageArgumentException( - 'date must be a datetime or a string.') + 'Date must be a datetime or a string') + + origin = self.origin_get({'url': origin_url}) + if not origin: # Cannot add a visit without an origin + raise StorageArgumentException( + 'Unknown origin %s', origin_url) - visit_ret = None if origin_url in self._origins: origin = self._origins[origin_url] # visit ids are in the range [1, +inf[ @@ -731,30 +735,31 @@ visit=visit_id, ) self._origin_visits[origin_url].append(visit) - visit_ret = { - 'origin': origin.url, - 'visit': visit_id, - } + visit = visit - self._objects[(origin_url, visit_id)].append( + self._objects[(origin.url, visit.visit)].append( ('origin_visit', None)) self.journal_writer.origin_visit_add(visit) - return visit_ret + # return last visit + return visit def origin_visit_update( - self, origin: str, visit_id: int, status: Optional[str] = None, - metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None): - origin_url = self._get_origin_url(origin) - if origin_url is None: - raise StorageArgumentException('Unknown origin.') - + self, origin: Origin, visit: OriginVisit, + status: Optional[str] = None, metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None): + visit_id = visit.visit if visit.visit else -1 + if visit_id == -1: + raise StorageArgumentException( + 'origin visit %s for origin %s does not exist.', + visit.visit, origin.url) from None try: - visit = self._origin_visits[origin_url][visit_id-1] + visit = self._origin_visits[origin.url][visit_id - 1] except IndexError: raise StorageArgumentException( - 'Unknown visit_id for this origin') from None + 'origin visit %s for origin %s does not exist.', + visit.visit, origin.url) from None updates: Dict[str, Any] = {} if status: @@ -770,8 +775,7 @@ raise StorageArgumentException(*e.args) self.journal_writer.origin_visit_update(visit) - - self._origin_visits[origin_url][visit_id-1] = visit + self._origin_visits[origin.url][visit_id - 1] = visit def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.journal_writer.origin_visit_upsert(visits) diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -3,6 +3,8 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import datetime + from typing import Any, Dict, Iterable, List, Optional, Union from swh.core.api import remote_api_endpoint @@ -768,27 +770,33 @@ @remote_api_endpoint('origin/visit/add') def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: + self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> OriginVisit: """Add an origin_visit for the origin at ts with status 'ongoing'. Args: - origin (str): visited origin's identifier or URL - date (Union[str,datetime]): timestamp of such visit - type (str): the type of loader used for the visit (hg, git, ...) + origin: visited origin's identifier or URL + date: timestamp of such visit + type: the type of loader used for the visit (hg, git, ...) + + Raises: + StorageArgumentException if the date is mistyped, or the origin + is unknown. Returns: dict: dictionary with keys origin and visit where: - - - origin: origin identifier - - visit: the visit identifier for the new visit occurrence + - origin: origin object + - visit: the visit object for the new visit occurrence """ ... @remote_api_endpoint('origin/visit/update') def origin_visit_update( - self, origin: str, visit_id: int, status: Optional[str] = None, - metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None): + self, origin: Origin, visit: OriginVisit, + status: Optional[str] = None, metadata: Optional[Dict] = None, + snapshot: Optional[bytes] = None): """Update an origin_visit's status. Args: diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -804,20 +804,28 @@ @timed @db_transaction() def origin_visit_add( - self, origin, date, type, db=None, cur=None - ) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin + self, origin: Origin, date: Union[str, datetime.datetime], + type: str, db=None, cur=None) -> OriginVisit: + origin_url = origin.url if isinstance(date, str): # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) + elif not isinstance(date, datetime.datetime): + raise StorageArgumentException( + 'Date must be a datetime or a string') + + origin = self.origin_get({'url': origin_url}) + if not origin: # Cannot add a visit without an origin + raise StorageArgumentException( + 'Unknown origin %s', origin_url) with convert_validation_exceptions(): - visit_id = db.origin_visit_add(origin_url, date, type, cur) + visit_id = db.origin_visit_add(origin_url, date, type, cur=cur) # We can write to the journal only after inserting to the # DB, because we want the id of the visit - visit = { + visit = OriginVisit.from_dict({ 'origin': origin_url, 'date': date, 'type': type, @@ -825,47 +833,48 @@ 'status': 'ongoing', 'metadata': None, 'snapshot': None - } + }) self.journal_writer.origin_visit_add(visit) send_metric('origin_visit:add', count=1, method_name='origin_visit') - return { - 'origin': origin_url, - 'visit': visit_id, - } + return visit @timed @db_transaction() - def origin_visit_update(self, origin: str, visit_id: int, + def origin_visit_update(self, origin: Origin, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None, db=None, cur=None): - if not isinstance(origin, str): + visit_row = db.origin_visit_get( + origin.url, visit.visit, cur=cur) + if not visit_row: raise StorageArgumentException( - 'origin must be a string, not %r' % (origin,)) - origin_url = origin - visit = db.origin_visit_get(origin_url, visit_id, cur=cur) + 'origin visit %s for origin %s does not exist.', + visit.visit, origin.url) - if not visit: - raise StorageArgumentException('Invalid visit_id for this origin.') - - visit = dict(zip(db.origin_visit_get_cols, visit)) + visit_to_update = OriginVisit.from_dict( + dict(zip(db.origin_visit_get_cols, visit_row))) updates: Dict[str, Any] = {} - if status and status != visit['status']: + if status and status != visit.status: updates['status'] = status - if metadata and metadata != visit['metadata']: + if metadata and metadata != visit.metadata: updates['metadata'] = metadata - if snapshot and snapshot != visit['snapshot']: + if snapshot and snapshot != visit.snapshot: updates['snapshot'] = snapshot if updates: - updated_visit = {**visit, **updates} - self.journal_writer.origin_visit_update(updated_visit) + try: + visit_to_update = attr.evolve(visit, **updates) + except (KeyError, TypeError, ValueError) as e: + raise StorageArgumentException(*e.args) + + self.journal_writer.origin_visit_update(visit_to_update) with convert_validation_exceptions(): - db.origin_visit_update(origin_url, visit_id, updates, cur) + db.origin_visit_update( + origin.url, visit.visit, updates, cur=cur) @timed @db_transaction() diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -261,10 +261,11 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin + origin = Origin(url=origin_url) origin_visit = swh_storage.origin_visit_add( - origin_url, '2020-01-01', 'hg') - assert origin_visit['origin'] == origin_url - assert isinstance(origin_visit['visit'], int) + origin, '2020-01-01', 'hg') + assert origin_visit.origin == origin_url + assert isinstance(origin_visit.visit, int) origin_visit = next(swh_storage.origin_visit_get(origin_url)) assert origin_visit['origin'] == origin_url @@ -297,13 +298,14 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin - r = swh_storage.origin_visit_add(sample_origin, '2020-01-01', 'git') + origin = Origin(url=sample_origin) + r = swh_storage.origin_visit_add(origin, '2020-01-01', 'git') assert r == {'origin': origin_url, 'visit': 1} mock_memory.assert_has_calls([ - call(sample_origin, '2020-01-01', 'git'), - call(sample_origin, '2020-01-01', 'git'), - call(sample_origin, '2020-01-01', 'git') + call(origin, '2020-01-01', 'git'), + call(origin, '2020-01-01', 'git'), + call(origin, '2020-01-01', 'git') ]) assert mock_sleep.call_count == 2 @@ -323,11 +325,13 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin + origin = Origin(url=origin_url) + with pytest.raises(StorageArgumentException, match='Refuse to add'): - swh_storage.origin_visit_add(origin_url, '2020-01-31', 'svn') + swh_storage.origin_visit_add(origin, '2020-01-31', 'svn') mock_memory.assert_has_calls([ - call(origin_url, '2020-01-31', 'svn'), + call(origin, '2020-01-31', 'svn'), ]) @@ -597,21 +601,22 @@ sample_origin = sample_data['origin'][0] swh_storage.origin_add_one(sample_origin) origin_url = sample_origin['url'] + origin = Origin(url=origin_url) origin_visit = swh_storage.origin_visit_add( - origin_url, '2020-01-01', 'hg') + origin, '2020-01-01', 'hg') ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url - assert ov['visit'] == origin_visit['visit'] + assert ov['visit'] == origin_visit.visit assert ov['status'] == 'ongoing' assert ov['snapshot'] is None assert ov['metadata'] is None - swh_storage.origin_visit_update(origin_url, ov['visit'], status='full') + swh_storage.origin_visit_update(origin, origin_visit, status='full') ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url - assert ov['visit'] == origin_visit['visit'] + assert ov['visit'] == origin_visit.visit assert ov['status'] == 'full' assert ov['snapshot'] is None assert ov['metadata'] is None diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -26,7 +26,9 @@ from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes -from swh.model.model import Content, OriginVisit, Release, Revision +from swh.model.model import ( + Content, Origin, OriginVisit, Release, Revision +) from swh.model.hypothesis_strategies import objects from swh.storage import HashCollision, get_storage from swh.storage.converters import origin_url_to_sha1 as sha1 @@ -1389,11 +1391,11 @@ # Add visits to those origins for origin in data.origins: + origin = Origin.from_dict(origin) for date_visit in visits: visit = swh_storage.origin_visit_add( - origin['url'], date=date_visit, type=visit_type) - swh_storage.origin_visit_update( - origin['url'], visit_id=visit['visit'], status='full') + origin, date=date_visit, type=visit_type) + swh_storage.origin_visit_update(origin, visit, status='full') swh_storage.refresh_stat_counters() @@ -1414,11 +1416,12 @@ # will be found by the random selection visits = self._generate_random_visits(nb_visits=3, start=13, end=24) for origin in data.origins: + origin = Origin.from_dict(origin) for date_visit in visits: visit = swh_storage.origin_visit_add( - origin['url'], date=date_visit, type=visit_type) + origin, date=date_visit, type=visit_type) swh_storage.origin_visit_update( - origin['url'], visit_id=visit['visit'], status='full') + origin, visit=visit, status='full') random_origin_visit = swh_storage.origin_visit_get_random(visit_type) assert random_origin_visit is None @@ -1556,7 +1559,7 @@ # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) date_visit = datetime.datetime.now(datetime.timezone.utc) # Round to milliseconds before insertion, so equality doesn't fail @@ -1566,31 +1569,24 @@ # when origin_visit1 = swh_storage.origin_visit_add( - origin_url, + origin, type=data.type_visit1, date=date_visit) actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) - assert { - 'origin': origin_url, - 'date': date_visit, - 'visit': origin_visit1['visit'], - 'type': data.type_visit1, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - } in actual_origin_visits - + origin.url)) origin_visit = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } + + assert origin_visit in actual_origin_visits + objects = list(swh_storage.journal_writer.journal.objects) assert ('origin', data.origin2) in objects assert ('origin_visit', origin_visit) in objects @@ -1601,7 +1597,7 @@ def test_origin_visit_add_default_type(self, swh_storage): # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) # when date_visit = datetime.datetime.now(datetime.timezone.utc) @@ -1615,42 +1611,42 @@ microsecond=round(date_visit2.microsecond, -3)) origin_visit1 = swh_storage.origin_visit_add( - origin_url, + origin, date=date_visit, type=data.type_visit1, ) origin_visit2 = swh_storage.origin_visit_add( - origin_url, + origin, date=date_visit2, type=data.type_visit2, ) # then - assert origin_visit1['origin'] == origin_url - assert origin_visit1['visit'] is not None + assert origin_visit1.origin == origin.url + assert origin_visit1.visit is not None actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) expected_visits = [ - { - 'origin': origin_url, - 'date': date_visit, - 'visit': origin_visit1['visit'], - 'type': data.type_visit1, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - { - 'origin': origin_url, - 'date': date_visit2, - 'visit': origin_visit2['visit'], - 'type': data.type_visit2, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - ] + { + 'origin': origin.url, + 'date': date_visit, + 'visit': origin_visit1.visit, + 'type': data.type_visit1, + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + { + 'origin': origin.url, + 'date': date_visit2, + 'visit': origin_visit2.visit, + 'type': data.type_visit2, + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + ] for visit in expected_visits: assert visit in actual_origin_visits @@ -1661,10 +1657,10 @@ assert ('origin_visit', visit) in objects def test_origin_visit_add_validation(self, swh_storage): - origin_url = swh_storage.origin_add_one(data.origin2) + origin = Origin(url=swh_storage.origin_add_one(data.origin2)) with pytest.raises(StorageArgumentException) as cm: - swh_storage.origin_visit_add(origin_url, date=[b'foo'], type='git') + swh_storage.origin_visit_add(origin, date=[b'foo'], type='git') if type(cm.value) == psycopg2.ProgrammingError: assert cm.value.pgcode \ @@ -1673,7 +1669,7 @@ def test_origin_visit_update(self, swh_storage): # given swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] + origin = Origin(url=data.origin['url']) date_visit = datetime.datetime.now(datetime.timezone.utc) date_visit2 = date_visit + datetime.timedelta(minutes=1) @@ -1686,21 +1682,21 @@ microsecond=round(date_visit2.microsecond, -3)) origin_visit1 = swh_storage.origin_visit_add( - origin_url, + origin, date=date_visit, type=data.type_visit1, ) origin_visit2 = swh_storage.origin_visit_add( - origin_url, + origin, date=date_visit2, type=data.type_visit2 ) swh_storage.origin_add_one(data.origin2) - origin_url2 = data.origin2['url'] + origin2 = Origin(url=data.origin2['url']) origin_visit3 = swh_storage.origin_visit_add( - origin_url2, + origin2, date=date_visit2, type=data.type_visit3 ) @@ -1711,28 +1707,26 @@ 'directories': 22, } swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], status='full', + origin, origin_visit1, status='full', metadata=visit1_metadata) swh_storage.origin_visit_update( - origin_url2, - origin_visit3['visit'], status='partial') + origin2, origin_visit3, status='partial') # then actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) expected_visits = [{ - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': visit1_metadata, 'snapshot': None, }, { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -1742,13 +1736,12 @@ assert visit in actual_origin_visits actual_origin_visits_bis = list(swh_storage.origin_visit_get( - origin_url, - limit=1)) + origin.url, limit=1)) assert actual_origin_visits_bis == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': visit1_metadata, @@ -1756,13 +1749,12 @@ }] actual_origin_visits_ter = list(swh_storage.origin_visit_get( - origin_url, - last_visit=origin_visit1['visit'])) + origin.url, last_visit=origin_visit1.visit)) assert actual_origin_visits_ter == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -1770,12 +1762,12 @@ }] actual_origin_visits2 = list(swh_storage.origin_visit_get( - origin_url2)) + origin2.url)) assert actual_origin_visits2 == [ { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'partial', 'metadata': None, @@ -1783,45 +1775,45 @@ }] data1 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data3 = { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data4 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'metadata': visit1_metadata, 'status': 'full', 'snapshot': None, } data5 = { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'partial', 'metadata': None, @@ -1837,17 +1829,17 @@ assert ('origin_visit', data5) in objects def test_origin_visit_update_validation(self, swh_storage): - origin_url = data.origin['url'] + origin = Origin(url=data.origin['url']) swh_storage.origin_add_one(data.origin) - visit = swh_storage.origin_visit_add( - origin_url, + origin_visit = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) with pytest.raises(StorageArgumentException, match='status') as cm: swh_storage.origin_visit_update( - origin_url, visit['visit'], status='foobar') + origin, origin_visit, status='foobar') if type(cm.value) == psycopg2.DataError: assert cm.value.pgcode == \ @@ -1855,60 +1847,60 @@ def test_origin_visit_find_by_date(self, swh_storage): # given - swh_storage.origin_add_one(data.origin) + origin_url = swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) swh_storage.origin_visit_add( - data.origin['url'], + origin, date=data.date_visit2, type=data.type_visit1, ) origin_visit2 = swh_storage.origin_visit_add( - data.origin['url'], + origin, date=data.date_visit3, type=data.type_visit2, ) origin_visit3 = swh_storage.origin_visit_add( - data.origin['url'], + origin, date=data.date_visit2, type=data.type_visit3, ) # Simple case visit = swh_storage.origin_visit_find_by_date( - data.origin['url'], data.date_visit3) - assert visit['visit'] == origin_visit2['visit'] + origin.url, data.date_visit3) + assert visit['visit'] == origin_visit2.visit # There are two visits at the same date, the latest must be returned visit = swh_storage.origin_visit_find_by_date( - data.origin['url'], data.date_visit2) - assert visit['visit'] == origin_visit3['visit'] + origin.url, data.date_visit2) + assert visit['visit'] == origin_visit3.visit def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): swh_storage.origin_visit_find_by_date('foo', data.date_visit2) def test_origin_visit_update_missing_snapshot(self, swh_storage): # given - swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] + origin_url = swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit = swh_storage.origin_visit_add( - origin_url, + Origin(url=origin_url), date=data.date_visit1, type=data.type_visit1, ) # when swh_storage.origin_visit_update( - origin_url, - origin_visit['visit'], + origin, + origin_visit, snapshot=data.snapshot['id']) # then actual_origin_visit = swh_storage.origin_visit_get_by( - origin_url, - origin_visit['visit']) + origin.url, origin_visit.visit) assert actual_origin_visit['snapshot'] == data.snapshot['id'] # when @@ -1919,29 +1911,29 @@ swh_storage.origin_add_one(data.origin) swh_storage.origin_add_one(data.origin2) - origin_url = data.origin['url'] - origin2_url = data.origin2['url'] + origin = Origin(url=data.origin['url']) + origin2 = Origin(url=data.origin2['url']) origin_visit1 = swh_storage.origin_visit_add( - origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], + origin, + origin_visit1, snapshot=data.snapshot['id']) # Add some other {origin, visit} entries swh_storage.origin_visit_add( - origin_url, + origin, date=data.date_visit3, type=data.type_visit3, ) swh_storage.origin_visit_add( - origin2_url, + origin2, date=data.date_visit3, type=data.type_visit3, ) @@ -1953,14 +1945,13 @@ } swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], status='full', - metadata=visit1_metadata) + origin, origin_visit1, + status='full', metadata=visit1_metadata) - expected_origin_visit = origin_visit1.copy() + expected_origin_visit = origin_visit1.to_dict() expected_origin_visit.update({ - 'origin': origin_url, - 'visit': origin_visit1['visit'], + 'origin': origin.url, + 'visit': origin_visit1.visit, 'date': data.date_visit2, 'type': data.type_visit2, 'metadata': visit1_metadata, @@ -1970,8 +1961,8 @@ # when actual_origin_visit1 = swh_storage.origin_visit_get_by( - origin_url, - origin_visit1['visit']) + origin.url, + origin_visit1.visit) # then assert actual_origin_visit1 == expected_origin_visit @@ -2056,18 +2047,19 @@ def test_origin_visit_upsert_existing(self, swh_storage): # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) # when origin_visit1 = swh_storage.origin_visit_add( - origin_url, + origin, date=data.date_visit2, type=data.type_visit1, ) + swh_storage.origin_visit_upsert([OriginVisit.from_dict({ - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2075,16 +2067,16 @@ })]) # then - assert origin_visit1['origin'] == origin_url - assert origin_visit1['visit'] is not None + assert origin_visit1.origin == origin.url + assert origin_visit1.visit is not None actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) assert actual_origin_visits == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2092,18 +2084,18 @@ }] data1 = { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2122,108 +2114,108 @@ def test_origin_visit_get_latest(self, swh_storage): swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=data.origin['url']) + ov1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] - origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + + ov2 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = swh_storage.origin_visit_add( - origin=origin_url, + ov3 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) - visit3_id = origin_visit3['visit'] - origin_visit1 = swh_storage.origin_visit_get_by(origin_url, visit1_id) - origin_visit2 = swh_storage.origin_visit_get_by(origin_url, visit2_id) - origin_visit3 = swh_storage.origin_visit_get_by(origin_url, visit3_id) + origin_visit1 = swh_storage.origin_visit_get_by( + origin.url, ov1.visit) + origin_visit2 = swh_storage.origin_visit_get_by( + origin.url, ov2.visit) + origin_visit3 = swh_storage.origin_visit_get_by( + origin.url, ov3.visit) # Two visits, both with no snapshot - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) assert swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) is None + origin.url, require_snapshot=True) is None # Add snapshot to visit1; require_snapshot=True makes it return # visit1 and require_snapshot=False still returns visit2 swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, - snapshot=data.complete_snapshot['id']) - assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ + origin, ov1, snapshot=data.complete_snapshot['id']) + + assert {**origin_visit1, + 'snapshot': data.complete_snapshot['id']} \ == swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Status filter: all three visits are status=ongoing, so no visit # returned assert swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) is None + origin.url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again swh_storage.origin_visit_update( - origin_url, - visit1_id, status='full') + origin, ov1, status='full') assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, - snapshot=data.empty_snapshot['id']) + origin, ov2, snapshot=data.empty_snapshot['id']) assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Check that the status filter is still working assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) # Add snapshot to visit3 (same date as visit2) swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit3_id, snapshot=data.complete_snapshot['id']) + origin, ov3, snapshot=data.complete_snapshot['id']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full'], require_snapshot=True) + origin.url, allowed_statuses=['full'], require_snapshot=True) assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] - } == swh_storage.origin_visit_get_latest(origin_url) + } == swh_storage.origin_visit_get_latest(origin.url) assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] } == swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) def test_person_fullname_unicity(self, swh_storage): # given (person injection through revisions for example) @@ -2249,18 +2241,20 @@ def test_snapshot_add_get_empty(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) + + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.empty_snapshot]) assert actual_result == {'snapshot:add': 1} swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.empty_snapshot['id']) + origin, origin_visit1, snapshot=data.empty_snapshot['id']) by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) assert by_id == {**data.empty_snapshot, 'next_branch': None} @@ -2271,7 +2265,7 @@ data1 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2280,7 +2274,7 @@ data2 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2295,16 +2289,17 @@ def test_snapshot_add_get_complete(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.complete_snapshot['id']) + origin, origin_visit1, snapshot=data.complete_snapshot['id']) assert actual_result == {'snapshot:add': 1} by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) @@ -2453,16 +2448,16 @@ def test_snapshot_add_get_filtered(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.complete_snapshot['id']) + origin, origin_visit1, snapshot=data.complete_snapshot['id']) snp_id = data.complete_snapshot['id'] branches = data.complete_snapshot['branches'] @@ -2572,16 +2567,17 @@ def test_snapshot_add_get(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + visit_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.snapshot['id']) + origin, origin_visit1, snapshot=data.snapshot['id']) by_id = swh_storage.snapshot_get(data.snapshot['id']) assert by_id == {**data.snapshot, 'next_branch': None} @@ -2596,7 +2592,16 @@ def test_snapshot_add_nonexistent_visit(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - visit_id = 54164461156 + origin = Origin(url=origin_url) + # unknown visit + visit = OriginVisit( + origin=origin, + visit=54164461156, + date=data.date_visit1, + status='ongoing', + type='git', + snapshot=None + ) swh_storage.journal_writer.journal.objects[:] = [] @@ -2604,7 +2609,7 @@ with pytest.raises(StorageArgumentException): swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.snapshot['id']) + origin, visit, snapshot=data.snapshot['id']) assert list(swh_storage.journal_writer.journal.objects) == [ ('snapshot', data.snapshot)] @@ -2612,30 +2617,31 @@ def test_snapshot_add_twice__by_origin_visit(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] + visit1_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, snapshot=data.snapshot['id']) + origin, origin_visit1, snapshot=data.snapshot['id']) by_ov1 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit1_id) assert by_ov1 == {**data.snapshot, 'next_branch': None} origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] + visit2_id = origin_visit2.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, snapshot=data.snapshot['id']) + origin, origin_visit2, snapshot=data.snapshot['id']) by_ov2 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit2_id) @@ -2644,7 +2650,7 @@ data1 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2653,7 +2659,7 @@ data2 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2662,7 +2668,7 @@ data3 = { 'origin': origin_url, 'date': data.date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -2671,7 +2677,7 @@ data4 = { 'origin': origin_url, 'date': data.date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -2689,26 +2695,25 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin_url = data.origin['url'] + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] + origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one origin_visit3 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit3, ) - visit3_id = origin_visit3['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2716,18 +2721,19 @@ # Add snapshot to visit1, latest snapshot = visit 1 snapshot swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, snapshot=data.complete_snapshot['id']) + origin, origin_visit1, snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) # Status filter: all three visits are status=ongoing, so no snapshot # returned assert swh_storage.snapshot_get_latest( - origin_url, - allowed_statuses=['full']) is None + origin_url, + allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin_url, visit1_id, status='full') + swh_storage.origin_visit_update( + origin, origin_visit1, status='full') assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest( origin_url, @@ -2736,7 +2742,7 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, snapshot=data.empty_snapshot['id']) + origin, origin_visit2, snapshot=data.empty_snapshot['id']) assert {**data.empty_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2750,7 +2756,7 @@ # the new snapshot is returned swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit3_id, snapshot=data.complete_snapshot['id']) + origin, origin_visit3, snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2760,18 +2766,18 @@ assert swh_storage.snapshot_get_latest(origin_url) is None swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] + origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2779,8 +2785,7 @@ # Add unknown snapshot to visit1, check that the inconsistency is # detected swh_storage.origin_visit_update( - origin_url, - visit1_id, snapshot=data.complete_snapshot['id']) + origin, origin_visit1, snapshot=data.complete_snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest(origin_url) @@ -2788,13 +2793,11 @@ # Status filter: both visits are status=ongoing, so no snapshot # returned assert swh_storage.snapshot_get_latest( - origin_url, - allowed_statuses=['full']) is None + origin_url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again swh_storage.origin_visit_update( - origin_url, - visit1_id, status='full') + origin, origin_visit1, status='full') with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2809,8 +2812,7 @@ # Add unknown snapshot to visit2 and check that the inconsistency # is detected swh_storage.origin_visit_update( - origin_url, - visit2_id, snapshot=data.snapshot['id']) + origin, origin_visit2, snapshot=data.snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2867,15 +2869,16 @@ # Add other objects. Check their counter increased as well. swh_storage.origin_add_one(data.origin2) + origin = Origin(url=data.origin2['url']) origin_visit1 = swh_storage.origin_visit_add( - origin=data.origin2['url'], + origin, date=data.date_visit2, type=data.type_visit2, ) + swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - data.origin2['url'], origin_visit1['visit'], - snapshot=data.snapshot['id']) + origin, origin_visit1, snapshot=data.snapshot['id']) swh_storage.directory_add([data.dir]) swh_storage.revision_add([data.revision]) swh_storage.release_add([data.release]) @@ -3737,8 +3740,9 @@ swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) now = datetime.datetime.now(tz=datetime.timezone.utc) + origin = Origin(url='https://github.com/user1/repo1') swh_storage.origin_visit_add( - origin='https://github.com/user1/repo1', date=now, type='git') + origin, date=now, type='git') assert swh_storage.origin_count('github', with_visit=False) == 3 # it has a visit, but no snapshot, so with_visit=True => 0 @@ -3761,11 +3765,11 @@ now = datetime.datetime.now(tz=datetime.timezone.utc) swh_storage.snapshot_add([data.snapshot]) + origin = Origin(url='https://github.com/user1/repo1') visit = swh_storage.origin_visit_add( - origin='https://github.com/user1/repo1', date=now, type='git') + origin, date=now, type='git') swh_storage.origin_visit_update( - origin='https://github.com/user1/repo1', visit_id=visit['visit'], - snapshot=data.snapshot['id']) + origin, visit, snapshot=data.snapshot['id']) assert swh_storage.origin_count('github', with_visit=False) == 3 # github/user1 has a visit and a snapshot, so with_visit=True => 1 @@ -3784,8 +3788,9 @@ for (obj_type, obj) in objects: obj = obj.to_dict() if obj_type == 'origin_visit': - origin = obj.pop('origin') - swh_storage.origin_add_one({'url': origin}) + origin_url = obj.pop('origin') + swh_storage.origin_add_one({'url': origin_url}) + origin = Origin(url=origin_url) if 'visit' in obj: del obj['visit'] swh_storage.origin_visit_add(