diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -14,8 +14,8 @@ import dateutil from swh.model.model import ( - Revision, Release, Directory, DirectoryEntry, Content, SkippedContent, - OriginVisit, Snapshot, Origin + BaseModel, Revision, Release, Directory, DirectoryEntry, Content, + SkippedContent, OriginVisit, Snapshot, Origin ) from swh.model.hashutil import DEFAULT_ALGORITHMS from swh.storage.objstorage import ObjStorage @@ -757,17 +757,22 @@ return origin_url - def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin # TODO: rename the argument + def origin_visit_add(self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> Dict[str, BaseModel]: + origin_url = origin.url if isinstance(date, str): + # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) + elif not isinstance(date, datetime.datetime): + raise StorageArgumentException( + 'Date must be a datetime or a string') origin = self.origin_get_one({'url': origin_url}) - - if not origin: - return None + if not origin: # Cannot add a visit without an origin + raise StorageArgumentException( + 'Unknown origin %s', origin_url) visit_id = self._cql_runner.origin_generate_unique_visit_id(origin_url) @@ -788,9 +793,9 @@ self._cql_runner.origin_visit_add_one(visit) return { - 'origin': origin_url, - 'visit': visit_id, - } + 'origin': origin, + 'visit': visit, + } def origin_visit_update( self, origin: str, visit_id: int, status: Optional[str] = None, diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -19,8 +19,9 @@ import attr from swh.model.model import ( - BaseContent, Content, SkippedContent, Directory, Revision, Release, - Snapshot, OriginVisit, Origin, SHA1_SIZE) + BaseContent, BaseModel, Content, SkippedContent, Directory, Revision, + Release, Snapshot, OriginVisit, Origin, SHA1_SIZE +) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex from swh.storage.objstorage import ObjStorage @@ -702,20 +703,26 @@ return origin.url - def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin - if origin_url is None: - raise StorageArgumentException('Unknown origin.') + def origin_visit_add(self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> Dict[str, BaseModel]: + origin_url = origin.url if isinstance(date, str): # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) elif not isinstance(date, datetime.datetime): raise StorageArgumentException( - 'date must be a datetime or a string.') + 'Date must be a datetime or a string') + + origin = self.origin_get({'url': origin_url}) + if not origin: # Cannot add a visit without an origin + raise StorageArgumentException( + 'Unknown origin %s', origin_url) - visit_ret = None + visit_ret: Dict[str, BaseModel] = { + 'origin': origin, + } if origin_url in self._origins: origin = self._origins[origin_url] # visit ids are in the range [1, +inf[ @@ -731,16 +738,15 @@ visit=visit_id, ) self._origin_visits[origin_url].append(visit) - visit_ret = { - 'origin': origin.url, - 'visit': visit_id, - } + visit = visit - self._objects[(origin_url, visit_id)].append( + self._objects[(origin.url, visit.visit)].append( ('origin_visit', None)) self.journal_writer.origin_visit_add(visit) + visit_ret['visit'] = visit + return visit_ret def origin_visit_update( diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -3,11 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import datetime + from typing import Any, Dict, Iterable, List, Optional, Union from swh.core.api import remote_api_endpoint from swh.model.model import ( - Content, Directory, Origin, OriginVisit, Revision, Release, + BaseModel, Content, Directory, Origin, OriginVisit, Revision, Release, Snapshot, SkippedContent ) @@ -768,19 +770,24 @@ @remote_api_endpoint('origin/visit/add') def origin_visit_add( - self, origin, date, type) -> Optional[Dict[str, Union[str, int]]]: + self, origin: Origin, + date: Union[str, datetime.datetime], + type: str) -> Dict[str, BaseModel]: """Add an origin_visit for the origin at ts with status 'ongoing'. Args: - origin (str): visited origin's identifier or URL - date (Union[str,datetime]): timestamp of such visit - type (str): the type of loader used for the visit (hg, git, ...) + origin: visited origin's identifier or URL + date: timestamp of such visit + type: the type of loader used for the visit (hg, git, ...) + + Raises: + StorageArgumentException if the date is mistyped, or the origin + is unknown. Returns: dict: dictionary with keys origin and visit where: - - - origin: origin identifier - - visit: the visit identifier for the new visit occurrence + - origin: origin object + - visit: the visit object for the new visit occurrence """ ... diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -20,7 +20,7 @@ import psycopg2.errors from swh.model.model import ( - Content, Directory, Origin, OriginVisit, + BaseModel, Content, Directory, Origin, OriginVisit, Revision, Release, SkippedContent, Snapshot, SHA1_SIZE ) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex @@ -804,20 +804,28 @@ @timed @db_transaction() def origin_visit_add( - self, origin, date, type, db=None, cur=None - ) -> Optional[Dict[str, Union[str, int]]]: - origin_url = origin + self, origin: Origin, date: Union[str, datetime.datetime], + type: str, db=None, cur=None) -> Dict[str, BaseModel]: + origin_url = origin.url if isinstance(date, str): # FIXME: Converge on iso8601 at some point date = dateutil.parser.parse(date) + elif not isinstance(date, datetime.datetime): + raise StorageArgumentException( + 'Date must be a datetime or a string') + + origin = self.origin_get({'url': origin_url}) + if not origin: # Cannot add a visit without an origin + raise StorageArgumentException( + 'Unknown origin %s', origin_url) with convert_validation_exceptions(): - visit_id = db.origin_visit_add(origin_url, date, type, cur) + visit_id = db.origin_visit_add(origin_url, date, type, cur=cur) # We can write to the journal only after inserting to the # DB, because we want the id of the visit - visit = { + visit = OriginVisit.from_dict({ 'origin': origin_url, 'date': date, 'type': type, @@ -825,13 +833,13 @@ 'status': 'ongoing', 'metadata': None, 'snapshot': None - } + }) self.journal_writer.origin_visit_add(visit) send_metric('origin_visit:add', count=1, method_name='origin_visit') return { - 'origin': origin_url, - 'visit': visit_id, + 'origin': origin, + 'visit': visit, } @timed diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -261,10 +261,12 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin - origin_visit = swh_storage.origin_visit_add( - origin_url, '2020-01-01', 'hg') - assert origin_visit['origin'] == origin_url - assert isinstance(origin_visit['visit'], int) + origin = Origin(url=origin_url) + result = swh_storage.origin_visit_add( + origin, '2020-01-01', 'hg') + origin_visit = result['visit'] + assert origin_visit.origin == origin_url + assert isinstance(origin_visit.visit, int) origin_visit = next(swh_storage.origin_visit_get(origin_url)) assert origin_visit['origin'] == origin_url @@ -297,13 +299,14 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin - r = swh_storage.origin_visit_add(sample_origin, '2020-01-01', 'git') + origin = Origin(url=sample_origin) + r = swh_storage.origin_visit_add(origin, '2020-01-01', 'git') assert r == {'origin': origin_url, 'visit': 1} mock_memory.assert_has_calls([ - call(sample_origin, '2020-01-01', 'git'), - call(sample_origin, '2020-01-01', 'git'), - call(sample_origin, '2020-01-01', 'git') + call(origin, '2020-01-01', 'git'), + call(origin, '2020-01-01', 'git'), + call(origin, '2020-01-01', 'git') ]) assert mock_sleep.call_count == 2 @@ -323,11 +326,13 @@ origin = list(swh_storage.origin_visit_get(origin_url)) assert not origin + origin = Origin(url=origin_url) + with pytest.raises(StorageArgumentException, match='Refuse to add'): - swh_storage.origin_visit_add(origin_url, '2020-01-31', 'svn') + swh_storage.origin_visit_add(origin, '2020-01-31', 'svn') mock_memory.assert_has_calls([ - call(origin_url, '2020-01-31', 'svn'), + call(origin, '2020-01-31', 'svn'), ]) @@ -597,21 +602,24 @@ sample_origin = sample_data['origin'][0] swh_storage.origin_add_one(sample_origin) origin_url = sample_origin['url'] - origin_visit = swh_storage.origin_visit_add( - origin_url, '2020-01-01', 'hg') + origin = Origin(url=origin_url) + result = swh_storage.origin_visit_add( + origin, '2020-01-01', 'hg') + origin_visit = result['visit'] ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url - assert ov['visit'] == origin_visit['visit'] + assert ov['visit'] == origin_visit.visit assert ov['status'] == 'ongoing' assert ov['snapshot'] is None assert ov['metadata'] is None - swh_storage.origin_visit_update(origin_url, ov['visit'], status='full') + swh_storage.origin_visit_update( + origin.url, origin_visit.visit, status='full') ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url - assert ov['visit'] == origin_visit['visit'] + assert ov['visit'] == origin_visit.visit assert ov['status'] == 'full' assert ov['snapshot'] is None assert ov['metadata'] is None diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -26,7 +26,9 @@ from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes -from swh.model.model import Content, OriginVisit, Release, Revision +from swh.model.model import ( + Content, Origin, OriginVisit, Release, Revision +) from swh.model.hypothesis_strategies import objects from swh.storage import HashCollision, get_storage from swh.storage.converters import origin_url_to_sha1 as sha1 @@ -1389,11 +1391,13 @@ # Add visits to those origins for origin in data.origins: + origin = Origin.from_dict(origin) for date_visit in visits: - visit = swh_storage.origin_visit_add( - origin['url'], date=date_visit, type=visit_type) + result = swh_storage.origin_visit_add( + origin, date=date_visit, type=visit_type) + visit = result['visit'] swh_storage.origin_visit_update( - origin['url'], visit_id=visit['visit'], status='full') + origin.url, visit_id=visit.visit, status='full') swh_storage.refresh_stat_counters() @@ -1414,11 +1418,13 @@ # will be found by the random selection visits = self._generate_random_visits(nb_visits=3, start=13, end=24) for origin in data.origins: + origin = Origin.from_dict(origin) for date_visit in visits: - visit = swh_storage.origin_visit_add( - origin['url'], date=date_visit, type=visit_type) + result = swh_storage.origin_visit_add( + origin, date=date_visit, type=visit_type) + visit = result['visit'] swh_storage.origin_visit_update( - origin['url'], visit_id=visit['visit'], status='full') + origin.url, visit.visit, status='full') random_origin_visit = swh_storage.origin_visit_get_random(visit_type) assert random_origin_visit is None @@ -1556,7 +1562,7 @@ # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) date_visit = datetime.datetime.now(datetime.timezone.utc) # Round to milliseconds before insertion, so equality doesn't fail @@ -1565,32 +1571,26 @@ microsecond=round(date_visit.microsecond, -3)) # when - origin_visit1 = swh_storage.origin_visit_add( - origin_url, + result = swh_storage.origin_visit_add( + origin, type=data.type_visit1, date=date_visit) + origin_visit1 = result['visit'] actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) - assert { - 'origin': origin_url, - 'date': date_visit, - 'visit': origin_visit1['visit'], - 'type': data.type_visit1, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - } in actual_origin_visits - + origin.url)) origin_visit = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } + + assert origin_visit in actual_origin_visits + objects = list(swh_storage.journal_writer.journal.objects) assert ('origin', data.origin2) in objects assert ('origin_visit', origin_visit) in objects @@ -1601,7 +1601,7 @@ def test_origin_visit_add_default_type(self, swh_storage): # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) # when date_visit = datetime.datetime.now(datetime.timezone.utc) @@ -1614,43 +1614,45 @@ date_visit2 = date_visit2.replace( microsecond=round(date_visit2.microsecond, -3)) - origin_visit1 = swh_storage.origin_visit_add( - origin_url, + result1 = swh_storage.origin_visit_add( + origin, date=date_visit, type=data.type_visit1, ) - origin_visit2 = swh_storage.origin_visit_add( - origin_url, + origin_visit1 = result1['visit'] + result2 = swh_storage.origin_visit_add( + origin, date=date_visit2, type=data.type_visit2, ) + origin_visit2 = result2['visit'] # then - assert origin_visit1['origin'] == origin_url - assert origin_visit1['visit'] is not None + assert origin_visit1.origin == origin.url + assert origin_visit1.visit is not None actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) expected_visits = [ - { - 'origin': origin_url, - 'date': date_visit, - 'visit': origin_visit1['visit'], - 'type': data.type_visit1, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - { - 'origin': origin_url, - 'date': date_visit2, - 'visit': origin_visit2['visit'], - 'type': data.type_visit2, - 'status': 'ongoing', - 'metadata': None, - 'snapshot': None, - }, - ] + { + 'origin': origin.url, + 'date': date_visit, + 'visit': origin_visit1.visit, + 'type': data.type_visit1, + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + { + 'origin': origin.url, + 'date': date_visit2, + 'visit': origin_visit2.visit, + 'type': data.type_visit2, + 'status': 'ongoing', + 'metadata': None, + 'snapshot': None, + }, + ] for visit in expected_visits: assert visit in actual_origin_visits @@ -1661,10 +1663,10 @@ assert ('origin_visit', visit) in objects def test_origin_visit_add_validation(self, swh_storage): - origin_url = swh_storage.origin_add_one(data.origin2) + origin = Origin(url=swh_storage.origin_add_one(data.origin2)) with pytest.raises(StorageArgumentException) as cm: - swh_storage.origin_visit_add(origin_url, date=[b'foo'], type='git') + swh_storage.origin_visit_add(origin, date=[b'foo'], type='git') if type(cm.value) == psycopg2.ProgrammingError: assert cm.value.pgcode \ @@ -1673,7 +1675,7 @@ def test_origin_visit_update(self, swh_storage): # given swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] + origin = Origin(url=data.origin['url']) date_visit = datetime.datetime.now(datetime.timezone.utc) date_visit2 = date_visit + datetime.timedelta(minutes=1) @@ -1685,25 +1687,28 @@ date_visit2 = date_visit2.replace( microsecond=round(date_visit2.microsecond, -3)) - origin_visit1 = swh_storage.origin_visit_add( - origin_url, + result1 = swh_storage.origin_visit_add( + origin, date=date_visit, type=data.type_visit1, ) + origin_visit1 = result1['visit'] - origin_visit2 = swh_storage.origin_visit_add( - origin_url, + result2 = swh_storage.origin_visit_add( + origin, date=date_visit2, type=data.type_visit2 ) + origin_visit2 = result2['visit'] swh_storage.origin_add_one(data.origin2) - origin_url2 = data.origin2['url'] - origin_visit3 = swh_storage.origin_visit_add( - origin_url2, + origin2 = Origin(url=data.origin2['url']) + result3 = swh_storage.origin_visit_add( + origin2, date=date_visit2, type=data.type_visit3 ) + origin_visit3 = result3['visit'] # when visit1_metadata = { @@ -1711,28 +1716,26 @@ 'directories': 22, } swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], status='full', + origin.url, origin_visit1.visit, status='full', metadata=visit1_metadata) swh_storage.origin_visit_update( - origin_url2, - origin_visit3['visit'], status='partial') + origin2.url, origin_visit3.visit, status='partial') # then actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) expected_visits = [{ - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': visit1_metadata, 'snapshot': None, }, { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -1742,13 +1745,12 @@ assert visit in actual_origin_visits actual_origin_visits_bis = list(swh_storage.origin_visit_get( - origin_url, - limit=1)) + origin.url, limit=1)) assert actual_origin_visits_bis == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': visit1_metadata, @@ -1756,13 +1758,12 @@ }] actual_origin_visits_ter = list(swh_storage.origin_visit_get( - origin_url, - last_visit=origin_visit1['visit'])) + origin.url, last_visit=origin_visit1.visit)) assert actual_origin_visits_ter == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -1770,12 +1771,12 @@ }] actual_origin_visits2 = list(swh_storage.origin_visit_get( - origin_url2)) + origin2.url)) assert actual_origin_visits2 == [ { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'partial', 'metadata': None, @@ -1783,45 +1784,45 @@ }] data1 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data3 = { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data4 = { - 'origin': origin_url, + 'origin': origin.url, 'date': date_visit, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'metadata': visit1_metadata, 'status': 'full', 'snapshot': None, } data5 = { - 'origin': origin_url2, + 'origin': origin2.url, 'date': date_visit2, - 'visit': origin_visit3['visit'], + 'visit': origin_visit3.visit, 'type': data.type_visit3, 'status': 'partial', 'metadata': None, @@ -1837,17 +1838,18 @@ assert ('origin_visit', data5) in objects def test_origin_visit_update_validation(self, swh_storage): - origin_url = data.origin['url'] + origin = Origin(url=data.origin['url']) swh_storage.origin_add_one(data.origin) - visit = swh_storage.origin_visit_add( - origin_url, + result = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) + visit = result['visit'] with pytest.raises(StorageArgumentException, match='status') as cm: swh_storage.origin_visit_update( - origin_url, visit['visit'], status='foobar') + origin.url, visit.visit, status='foobar') if type(cm.value) == psycopg2.DataError: assert cm.value.pgcode == \ @@ -1855,60 +1857,64 @@ def test_origin_visit_find_by_date(self, swh_storage): # given - swh_storage.origin_add_one(data.origin) + origin_url = swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) swh_storage.origin_visit_add( - data.origin['url'], + origin, date=data.date_visit2, type=data.type_visit1, ) - origin_visit2 = swh_storage.origin_visit_add( - data.origin['url'], + result2 = swh_storage.origin_visit_add( + origin, date=data.date_visit3, type=data.type_visit2, ) + origin_visit2 = result2['visit'] - origin_visit3 = swh_storage.origin_visit_add( - data.origin['url'], + result3 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit3, ) + origin_visit3 = result3['visit'] # Simple case visit = swh_storage.origin_visit_find_by_date( - data.origin['url'], data.date_visit3) - assert visit['visit'] == origin_visit2['visit'] + origin.url, data.date_visit3) + assert visit['visit'] == origin_visit2.visit # There are two visits at the same date, the latest must be returned visit = swh_storage.origin_visit_find_by_date( - data.origin['url'], data.date_visit2) - assert visit['visit'] == origin_visit3['visit'] + origin.url, data.date_visit2) + assert visit['visit'] == origin_visit3.visit def test_origin_visit_find_by_date__unknown_origin(self, swh_storage): swh_storage.origin_visit_find_by_date('foo', data.date_visit2) def test_origin_visit_update_missing_snapshot(self, swh_storage): # given - swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] + origin_url = swh_storage.origin_add_one(data.origin) + origin = Origin(url=origin_url) - origin_visit = swh_storage.origin_visit_add( - origin_url, + result = swh_storage.origin_visit_add( + Origin(url=origin_url), date=data.date_visit1, type=data.type_visit1, ) + origin_visit = result['visit'] + # when swh_storage.origin_visit_update( - origin_url, - origin_visit['visit'], + origin.url, + origin_visit.visit, snapshot=data.snapshot['id']) # then actual_origin_visit = swh_storage.origin_visit_get_by( - origin_url, - origin_visit['visit']) + origin.url, origin_visit.visit) assert actual_origin_visit['snapshot'] == data.snapshot['id'] # when @@ -1919,29 +1925,30 @@ swh_storage.origin_add_one(data.origin) swh_storage.origin_add_one(data.origin2) - origin_url = data.origin['url'] - origin2_url = data.origin2['url'] + origin = Origin(url=data.origin['url']) + origin2 = Origin(url=data.origin2['url']) - origin_visit1 = swh_storage.origin_visit_add( - origin_url, + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) + origin_visit1 = result1['visit'] swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], + origin.url, + origin_visit1.visit, snapshot=data.snapshot['id']) # Add some other {origin, visit} entries swh_storage.origin_visit_add( - origin_url, + origin, date=data.date_visit3, type=data.type_visit3, ) swh_storage.origin_visit_add( - origin2_url, + origin2, date=data.date_visit3, type=data.type_visit3, ) @@ -1953,14 +1960,13 @@ } swh_storage.origin_visit_update( - origin_url, - origin_visit1['visit'], status='full', - metadata=visit1_metadata) + origin.url, origin_visit1.visit, + status='full', metadata=visit1_metadata) - expected_origin_visit = origin_visit1.copy() + expected_origin_visit = origin_visit1.to_dict() expected_origin_visit.update({ - 'origin': origin_url, - 'visit': origin_visit1['visit'], + 'origin': origin.url, + 'visit': origin_visit1.visit, 'date': data.date_visit2, 'type': data.type_visit2, 'metadata': visit1_metadata, @@ -1970,8 +1976,8 @@ # when actual_origin_visit1 = swh_storage.origin_visit_get_by( - origin_url, - origin_visit1['visit']) + origin.url, + origin_visit1.visit) # then assert actual_origin_visit1 == expected_origin_visit @@ -2056,18 +2062,20 @@ def test_origin_visit_upsert_existing(self, swh_storage): # given swh_storage.origin_add_one(data.origin2) - origin_url = data.origin2['url'] + origin = Origin(url=data.origin2['url']) # when - origin_visit1 = swh_storage.origin_visit_add( - origin_url, + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit1, ) + origin_visit1 = result1['visit'] + swh_storage.origin_visit_upsert([OriginVisit.from_dict({ - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2075,16 +2083,16 @@ })]) # then - assert origin_visit1['origin'] == origin_url - assert origin_visit1['visit'] is not None + assert origin_visit1.origin == origin.url + assert origin_visit1.visit is not None actual_origin_visits = list(swh_storage.origin_visit_get( - origin_url)) + origin.url)) assert actual_origin_visits == [ { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2092,18 +2100,18 @@ }] data1 = { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, 'snapshot': None, } data2 = { - 'origin': origin_url, + 'origin': origin.url, 'date': data.date_visit2, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'full', 'metadata': None, @@ -2122,15 +2130,15 @@ def test_origin_visit_get_latest(self, swh_storage): swh_storage.origin_add_one(data.origin) - origin_url = data.origin['url'] + origin = Origin(url=data.origin['url']) origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit1, type=data.type_visit1, ) visit1_id = origin_visit1['visit'] origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) @@ -2138,92 +2146,94 @@ # Add a visit with the same date as the previous one origin_visit3 = swh_storage.origin_visit_add( - origin=origin_url, + origin, date=data.date_visit2, type=data.type_visit2, ) visit3_id = origin_visit3['visit'] - origin_visit1 = swh_storage.origin_visit_get_by(origin_url, visit1_id) - origin_visit2 = swh_storage.origin_visit_get_by(origin_url, visit2_id) - origin_visit3 = swh_storage.origin_visit_get_by(origin_url, visit3_id) + origin_visit1 = swh_storage.origin_visit_get_by( + origin.url, visit1_id.visit) + origin_visit2 = swh_storage.origin_visit_get_by( + origin.url, visit2_id.visit) + origin_visit3 = swh_storage.origin_visit_get_by( + origin.url, visit3_id.visit) # Two visits, both with no snapshot - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) assert swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) is None + origin.url, require_snapshot=True) is None # Add snapshot to visit1; require_snapshot=True makes it return # visit1 and require_snapshot=False still returns visit2 swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, - snapshot=data.complete_snapshot['id']) - assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ + origin.url, visit1_id.visit, snapshot=data.complete_snapshot['id']) + + assert {**origin_visit1, + 'snapshot': data.complete_snapshot['id']} \ == swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Status filter: all three visits are status=ongoing, so no visit # returned assert swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) is None + origin.url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again swh_storage.origin_visit_update( - origin_url, - visit1_id, status='full') + origin.url, visit1_id.visit, status='full') assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, - snapshot=data.empty_snapshot['id']) + origin.url, visit2_id.visit, snapshot=data.empty_snapshot['id']) assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) - assert origin_visit3 == swh_storage.origin_visit_get_latest(origin_url) + assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) # Check that the status filter is still working assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) # Add snapshot to visit3 (same date as visit2) swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit3_id, snapshot=data.complete_snapshot['id']) + origin.url, visit3_id.visit, snapshot=data.complete_snapshot['id']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) + origin.url, allowed_statuses=['full']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], 'status': 'full'} == swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full'], require_snapshot=True) + origin.url, allowed_statuses=['full'], require_snapshot=True) assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] - } == swh_storage.origin_visit_get_latest(origin_url) + } == swh_storage.origin_visit_get_latest(origin.url) assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] } == swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + origin.url, require_snapshot=True) def test_person_fullname_unicity(self, swh_storage): # given (person injection through revisions for example) @@ -2249,18 +2259,22 @@ def test_snapshot_add_get_empty(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + + origin = Origin(url=origin_url) + result = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + origin_visit1 = result['visit'] + visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.empty_snapshot]) assert actual_result == {'snapshot:add': 1} swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.empty_snapshot['id']) + origin.url, origin_visit1.visit, + snapshot=data.empty_snapshot['id']) by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) assert by_id == {**data.empty_snapshot, 'next_branch': None} @@ -2271,7 +2285,7 @@ data1 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2280,7 +2294,7 @@ data2 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2295,16 +2309,19 @@ def test_snapshot_add_get_complete(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + origin_visit1 = result1['visit'] + visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.complete_snapshot['id']) + origin.url, origin_visit1.visit, + snapshot=data.complete_snapshot['id']) assert actual_result == {'snapshot:add': 1} by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) @@ -2453,16 +2470,18 @@ def test_snapshot_add_get_filtered(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + origin_visit1 = result1['visit'] swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.complete_snapshot['id']) + origin.url, origin_visit1.visit, + snapshot=data.complete_snapshot['id']) snp_id = data.complete_snapshot['id'] branches = data.complete_snapshot['branches'] @@ -2572,16 +2591,18 @@ def test_snapshot_add_get(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit_id = origin_visit1['visit'] + origin_visit1 = result1['visit'] + visit_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.snapshot['id']) + origin.url, origin_visit1.visit, snapshot=data.snapshot['id']) by_id = swh_storage.snapshot_get(data.snapshot['id']) assert by_id == {**data.snapshot, 'next_branch': None} @@ -2596,7 +2617,16 @@ def test_snapshot_add_nonexistent_visit(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - visit_id = 54164461156 + origin = Origin(url=origin_url) + # unknown visit + visit = OriginVisit( + origin=origin, + visit=54164461156, + date=data.date_visit1, + status='ongoing', + type='git', + snapshot=None + ) swh_storage.journal_writer.journal.objects[:] = [] @@ -2604,7 +2634,7 @@ with pytest.raises(StorageArgumentException): swh_storage.origin_visit_update( - origin_url, visit_id, snapshot=data.snapshot['id']) + origin.url, visit.visit, snapshot=data.snapshot['id']) assert list(swh_storage.journal_writer.journal.objects) == [ ('snapshot', data.snapshot)] @@ -2612,30 +2642,33 @@ def test_snapshot_add_twice__by_origin_visit(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] + origin_visit1 = result1['visit'] + visit1_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, snapshot=data.snapshot['id']) + origin.url, origin_visit1.visit, snapshot=data.snapshot['id']) by_ov1 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit1_id) assert by_ov1 == {**data.snapshot, 'next_branch': None} - origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + result2 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] + origin_visit2 = result2['visit'] + visit2_id = origin_visit2.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, snapshot=data.snapshot['id']) + origin.url, origin_visit2.visit, snapshot=data.snapshot['id']) by_ov2 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit2_id) @@ -2644,7 +2677,7 @@ data1 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2653,7 +2686,7 @@ data2 = { 'origin': origin_url, 'date': data.date_visit1, - 'visit': origin_visit1['visit'], + 'visit': origin_visit1.visit, 'type': data.type_visit1, 'status': 'ongoing', 'metadata': None, @@ -2662,7 +2695,7 @@ data3 = { 'origin': origin_url, 'date': data.date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -2671,7 +2704,7 @@ data4 = { 'origin': origin_url, 'date': data.date_visit2, - 'visit': origin_visit2['visit'], + 'visit': origin_visit2.visit, 'type': data.type_visit2, 'status': 'ongoing', 'metadata': None, @@ -2689,26 +2722,28 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin_url = data.origin['url'] - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] - origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin_visit1 = result1['visit'] + + result2 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] + origin_visit2 = result2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = swh_storage.origin_visit_add( - origin=origin_url, + result3 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit3, ) - visit3_id = origin_visit3['visit'] + origin_visit3 = result3['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2716,18 +2751,20 @@ # Add snapshot to visit1, latest snapshot = visit 1 snapshot swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit1_id, snapshot=data.complete_snapshot['id']) + origin.url, origin_visit1.visit, + snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) # Status filter: all three visits are status=ongoing, so no snapshot # returned assert swh_storage.snapshot_get_latest( - origin_url, - allowed_statuses=['full']) is None + origin_url, + allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin_url, visit1_id, status='full') + swh_storage.origin_visit_update( + origin.url, origin_visit1.visit, status='full') assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest( origin_url, @@ -2736,7 +2773,8 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin_url, visit2_id, snapshot=data.empty_snapshot['id']) + origin.url, origin_visit2.visit, + snapshot=data.empty_snapshot['id']) assert {**data.empty_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2750,7 +2788,8 @@ # the new snapshot is returned swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin_url, visit3_id, snapshot=data.complete_snapshot['id']) + origin.url, origin_visit3.visit, + snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2760,18 +2799,20 @@ assert swh_storage.snapshot_get_latest(origin_url) is None swh_storage.origin_add_one(data.origin) - origin_visit1 = swh_storage.origin_visit_add( - origin=origin_url, + origin = Origin(url=origin_url) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] - origin_visit2 = swh_storage.origin_visit_add( - origin=origin_url, + origin_visit1 = result1['visit'] + + result2 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] + origin_visit2 = result2['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2779,8 +2820,8 @@ # Add unknown snapshot to visit1, check that the inconsistency is # detected swh_storage.origin_visit_update( - origin_url, - visit1_id, snapshot=data.complete_snapshot['id']) + origin.url, origin_visit1.visit, + snapshot=data.complete_snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest(origin_url) @@ -2788,13 +2829,11 @@ # Status filter: both visits are status=ongoing, so no snapshot # returned assert swh_storage.snapshot_get_latest( - origin_url, - allowed_statuses=['full']) is None + origin_url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again swh_storage.origin_visit_update( - origin_url, - visit1_id, status='full') + origin.url, origin_visit1.visit, status='full') with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2809,8 +2848,7 @@ # Add unknown snapshot to visit2 and check that the inconsistency # is detected swh_storage.origin_visit_update( - origin_url, - visit2_id, snapshot=data.snapshot['id']) + origin.url, origin_visit2.visit, snapshot=data.snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2867,15 +2905,17 @@ # Add other objects. Check their counter increased as well. swh_storage.origin_add_one(data.origin2) - origin_visit1 = swh_storage.origin_visit_add( - origin=data.origin2['url'], + origin = Origin(url=data.origin2['url']) + result1 = swh_storage.origin_visit_add( + origin, date=data.date_visit2, type=data.type_visit2, ) + origin_visit1 = result1['visit'] + swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - data.origin2['url'], origin_visit1['visit'], - snapshot=data.snapshot['id']) + origin.url, origin_visit1.visit, snapshot=data.snapshot['id']) swh_storage.directory_add([data.dir]) swh_storage.revision_add([data.revision]) swh_storage.release_add([data.release]) @@ -3737,8 +3777,9 @@ swh_storage.origin_add([{'url': url} for url in self.ORIGINS]) now = datetime.datetime.now(tz=datetime.timezone.utc) + origin = Origin(url='https://github.com/user1/repo1') swh_storage.origin_visit_add( - origin='https://github.com/user1/repo1', date=now, type='git') + origin, date=now, type='git') assert swh_storage.origin_count('github', with_visit=False) == 3 # it has a visit, but no snapshot, so with_visit=True => 0 @@ -3761,11 +3802,12 @@ now = datetime.datetime.now(tz=datetime.timezone.utc) swh_storage.snapshot_add([data.snapshot]) - visit = swh_storage.origin_visit_add( - origin='https://github.com/user1/repo1', date=now, type='git') + origin = Origin(url='https://github.com/user1/repo1') + result = swh_storage.origin_visit_add( + origin, date=now, type='git') + visit = result['visit'] swh_storage.origin_visit_update( - origin='https://github.com/user1/repo1', visit_id=visit['visit'], - snapshot=data.snapshot['id']) + origin.url, visit.visit, snapshot=data.snapshot['id']) assert swh_storage.origin_count('github', with_visit=False) == 3 # github/user1 has a visit and a snapshot, so with_visit=True => 1 @@ -3784,8 +3826,9 @@ for (obj_type, obj) in objects: obj = obj.to_dict() if obj_type == 'origin_visit': - origin = obj.pop('origin') - swh_storage.origin_add_one({'url': origin}) + origin_url = obj.pop('origin') + swh_storage.origin_add_one({'url': origin_url}) + origin = Origin(url=origin_url) if 'visit' in obj: del obj['visit'] swh_storage.origin_visit_add(