diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -14,7 +14,7 @@ import dateutil from swh.model.model import ( - BaseModel, Revision, Release, Directory, DirectoryEntry, Content, + Revision, Release, Directory, DirectoryEntry, Content, SkippedContent, OriginVisit, Snapshot, Origin ) from swh.model.hashutil import DEFAULT_ALGORITHMS @@ -668,7 +668,8 @@ else: return results - def origin_get_one(self, origin): + def origin_get_one(self, origin: Dict[str, Any]) -> Optional[ + Dict[str, Any]]: if 'id' in origin: raise StorageArgumentException('Origin ids are not supported.') if 'url' not in origin: @@ -759,7 +760,7 @@ def origin_visit_add(self, origin: Origin, date: Union[str, datetime.datetime], - type: str) -> Dict[str, BaseModel]: + type: str) -> OriginVisit: origin_url = origin.url if isinstance(date, str): @@ -769,8 +770,7 @@ raise StorageArgumentException( 'Date must be a datetime or a string') - origin = self.origin_get_one({'url': origin_url}) - if not origin: # Cannot add a visit without an origin + if not self.origin_get_one({'url': origin.url}): raise StorageArgumentException( 'Unknown origin %s', origin_url) @@ -789,13 +789,8 @@ except (KeyError, TypeError, ValueError) as e: raise StorageArgumentException(*e.args) self.journal_writer.origin_visit_add(visit) - self._cql_runner.origin_visit_add_one(visit) - - return { - 'origin': origin, - 'visit': visit, - } + return visit def origin_visit_update( self, origin: Origin, visit: OriginVisit, diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -19,7 +19,7 @@ import attr from swh.model.model import ( - BaseContent, BaseModel, Content, SkippedContent, Directory, Revision, + BaseContent, Content, SkippedContent, Directory, Revision, Release, Snapshot, OriginVisit, Origin, SHA1_SIZE ) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex @@ -705,7 +705,7 @@ def origin_visit_add(self, origin: Origin, date: Union[str, datetime.datetime], - type: str) -> Dict[str, BaseModel]: + type: str) -> OriginVisit: origin_url = origin.url if isinstance(date, str): @@ -720,9 +720,6 @@ raise StorageArgumentException( 'Unknown origin %s', origin_url) - visit_ret: Dict[str, BaseModel] = { - 'origin': origin, - } if origin_url in self._origins: origin = self._origins[origin_url] # visit ids are in the range [1, +inf[ @@ -745,9 +742,8 @@ self.journal_writer.origin_visit_add(visit) - visit_ret['visit'] = visit - - return visit_ret + # return last visit + return visit def origin_visit_update( self, origin: Origin, visit: OriginVisit, diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -9,7 +9,7 @@ from swh.core.api import remote_api_endpoint from swh.model.model import ( - BaseModel, Content, Directory, Origin, OriginVisit, Revision, Release, + Content, Directory, Origin, OriginVisit, Revision, Release, Snapshot, SkippedContent ) @@ -772,7 +772,7 @@ def origin_visit_add( self, origin: Origin, date: Union[str, datetime.datetime], - type: str) -> Dict[str, BaseModel]: + type: str) -> OriginVisit: """Add an origin_visit for the origin at ts with status 'ongoing'. Args: diff --git a/swh/storage/retry.py b/swh/storage/retry.py --- a/swh/storage/retry.py +++ b/swh/storage/retry.py @@ -15,7 +15,7 @@ from swh.model.model import ( Content, SkippedContent, Directory, Revision, Release, Snapshot, - Origin, + Origin, OriginVisit ) from swh.storage import get_storage @@ -93,8 +93,8 @@ return self.storage.origin_add_one(origin) @swh_retry - def origin_visit_add(self, origin: Dict, - date: Union[datetime, str], type: str) -> Dict: + def origin_visit_add(self, origin: Origin, + date: Union[datetime, str], type: str) -> OriginVisit: return self.storage.origin_visit_add(origin, date, type) @swh_retry diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -20,7 +20,7 @@ import psycopg2.errors from swh.model.model import ( - BaseModel, Content, Directory, Origin, OriginVisit, + Content, Directory, Origin, OriginVisit, Revision, Release, SkippedContent, Snapshot, SHA1_SIZE ) from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex @@ -805,7 +805,7 @@ @db_transaction() def origin_visit_add( self, origin: Origin, date: Union[str, datetime.datetime], - type: str, db=None, cur=None) -> Dict[str, BaseModel]: + type: str, db=None, cur=None) -> OriginVisit: origin_url = origin.url if isinstance(date, str): @@ -837,10 +837,7 @@ self.journal_writer.origin_visit_add(visit) send_metric('origin_visit:add', count=1, method_name='origin_visit') - return { - 'origin': origin, - 'visit': visit, - } + return visit @timed @db_transaction() diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -262,9 +262,8 @@ assert not origin origin = Origin(url=origin_url) - result = swh_storage.origin_visit_add( + origin_visit = swh_storage.origin_visit_add( origin, '2020-01-01', 'hg') - origin_visit = result['visit'] assert origin_visit.origin == origin_url assert isinstance(origin_visit.visit, int) @@ -603,9 +602,8 @@ swh_storage.origin_add_one(sample_origin) origin_url = sample_origin['url'] origin = Origin(url=origin_url) - result = swh_storage.origin_visit_add( + origin_visit = swh_storage.origin_visit_add( origin, '2020-01-01', 'hg') - origin_visit = result['visit'] ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1393,9 +1393,8 @@ for origin in data.origins: origin = Origin.from_dict(origin) for date_visit in visits: - result = swh_storage.origin_visit_add( + visit = swh_storage.origin_visit_add( origin, date=date_visit, type=visit_type) - visit = result['visit'] swh_storage.origin_visit_update(origin, visit, status='full') swh_storage.refresh_stat_counters() @@ -1419,9 +1418,8 @@ for origin in data.origins: origin = Origin.from_dict(origin) for date_visit in visits: - result = swh_storage.origin_visit_add( + visit = swh_storage.origin_visit_add( origin, date=date_visit, type=visit_type) - visit = result['visit'] swh_storage.origin_visit_update( origin, visit=visit, status='full') @@ -1570,11 +1568,10 @@ microsecond=round(date_visit.microsecond, -3)) # when - result = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, type=data.type_visit1, date=date_visit) - origin_visit1 = result['visit'] actual_origin_visits = list(swh_storage.origin_visit_get( origin.url)) @@ -1613,18 +1610,16 @@ date_visit2 = date_visit2.replace( microsecond=round(date_visit2.microsecond, -3)) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=date_visit, type=data.type_visit1, ) - origin_visit1 = result1['visit'] - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=date_visit2, type=data.type_visit2, ) - origin_visit2 = result2['visit'] # then assert origin_visit1.origin == origin.url @@ -1686,28 +1681,25 @@ date_visit2 = date_visit2.replace( microsecond=round(date_visit2.microsecond, -3)) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=date_visit, type=data.type_visit1, ) - origin_visit1 = result1['visit'] - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=date_visit2, type=data.type_visit2 ) - origin_visit2 = result2['visit'] swh_storage.origin_add_one(data.origin2) origin2 = Origin(url=data.origin2['url']) - result3 = swh_storage.origin_visit_add( + origin_visit3 = swh_storage.origin_visit_add( origin2, date=date_visit2, type=data.type_visit3 ) - origin_visit3 = result3['visit'] # when visit1_metadata = { @@ -1839,16 +1831,15 @@ def test_origin_visit_update_validation(self, swh_storage): origin = Origin(url=data.origin['url']) swh_storage.origin_add_one(data.origin) - result = swh_storage.origin_visit_add( + origin_visit = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - visit = result['visit'] with pytest.raises(StorageArgumentException, match='status') as cm: swh_storage.origin_visit_update( - origin, visit, status='foobar') + origin, origin_visit, status='foobar') if type(cm.value) == psycopg2.DataError: assert cm.value.pgcode == \ @@ -1865,19 +1856,17 @@ type=data.type_visit1, ) - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=data.date_visit3, type=data.type_visit2, ) - origin_visit2 = result2['visit'] - result3 = swh_storage.origin_visit_add( + origin_visit3 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit3, ) - origin_visit3 = result3['visit'] # Simple case visit = swh_storage.origin_visit_find_by_date( @@ -1897,14 +1886,12 @@ origin_url = swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result = swh_storage.origin_visit_add( + origin_visit = swh_storage.origin_visit_add( Origin(url=origin_url), date=data.date_visit1, type=data.type_visit1, ) - origin_visit = result['visit'] - # when swh_storage.origin_visit_update( origin, @@ -1927,12 +1914,11 @@ origin = Origin(url=data.origin['url']) origin2 = Origin(url=data.origin2['url']) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - origin_visit1 = result1['visit'] swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( @@ -2064,12 +2050,11 @@ origin = Origin(url=data.origin2['url']) # when - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit1, ) - origin_visit1 = result1['visit'] swh_storage.origin_visit_upsert([OriginVisit.from_dict({ 'origin': origin.url, @@ -2130,33 +2115,31 @@ def test_origin_visit_get_latest(self, swh_storage): swh_storage.origin_add_one(data.origin) origin = Origin(url=data.origin['url']) - origin_visit1 = swh_storage.origin_visit_add( + ov1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - visit1_id = origin_visit1['visit'] - origin_visit2 = swh_storage.origin_visit_add( + + ov2 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - visit2_id = origin_visit2['visit'] # Add a visit with the same date as the previous one - origin_visit3 = swh_storage.origin_visit_add( + ov3 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - visit3_id = origin_visit3['visit'] origin_visit1 = swh_storage.origin_visit_get_by( - origin.url, visit1_id.visit) + origin.url, ov1.visit) origin_visit2 = swh_storage.origin_visit_get_by( - origin.url, visit2_id.visit) + origin.url, ov2.visit) origin_visit3 = swh_storage.origin_visit_get_by( - origin.url, visit3_id.visit) + origin.url, ov3.visit) # Two visits, both with no snapshot assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url) @@ -2167,7 +2150,7 @@ # visit1 and require_snapshot=False still returns visit2 swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, visit1_id, snapshot=data.complete_snapshot['id']) + origin, ov1, snapshot=data.complete_snapshot['id']) assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ @@ -2182,7 +2165,8 @@ origin.url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update(origin, visit1_id, status='full') + swh_storage.origin_visit_update( + origin, ov1, status='full') assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], @@ -2194,7 +2178,7 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin, visit2_id, snapshot=data.empty_snapshot['id']) + origin, ov2, snapshot=data.empty_snapshot['id']) assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ swh_storage.origin_visit_get_latest( origin.url, require_snapshot=True) @@ -2211,7 +2195,7 @@ # Add snapshot to visit3 (same date as visit2) swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, visit3_id, snapshot=data.complete_snapshot['id']) + origin, ov3, snapshot=data.complete_snapshot['id']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], @@ -2259,12 +2243,11 @@ swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result['visit'] visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.empty_snapshot]) @@ -2307,12 +2290,11 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] visit_id = origin_visit1.visit actual_result = swh_storage.snapshot_add([data.complete_snapshot]) @@ -2467,12 +2449,11 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( @@ -2587,12 +2568,11 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] visit_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) @@ -2638,12 +2618,11 @@ origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] visit1_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( @@ -2653,12 +2632,11 @@ origin_url, visit1_id) assert by_ov1 == {**data.snapshot, 'next_branch': None} - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - origin_visit2 = result2['visit'] visit2_id = origin_visit2.visit swh_storage.snapshot_add([data.snapshot]) @@ -2718,27 +2696,24 @@ swh_storage.origin_add_one(data.origin) origin_url = data.origin['url'] origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - origin_visit2 = result2['visit'] # Add a visit with the same date as the previous one - result3 = swh_storage.origin_visit_add( + origin_visit3 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit3, ) - origin_visit3 = result3['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2792,19 +2767,17 @@ swh_storage.origin_add_one(data.origin) origin = Origin(url=origin_url) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit1, type=data.type_visit1, ) - origin_visit1 = result1['visit'] - result2 = swh_storage.origin_visit_add( + origin_visit2 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - origin_visit2 = result2['visit'] # Two visits, both with no snapshot: latest snapshot is None assert swh_storage.snapshot_get_latest(origin_url) is None @@ -2897,12 +2870,11 @@ swh_storage.origin_add_one(data.origin2) origin = Origin(url=data.origin2['url']) - result1 = swh_storage.origin_visit_add( + origin_visit1 = swh_storage.origin_visit_add( origin, date=data.date_visit2, type=data.type_visit2, ) - origin_visit1 = result1['visit'] swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( @@ -3794,9 +3766,8 @@ swh_storage.snapshot_add([data.snapshot]) origin = Origin(url='https://github.com/user1/repo1') - result = swh_storage.origin_visit_add( + visit = swh_storage.origin_visit_add( origin, date=now, type='git') - visit = result['visit'] swh_storage.origin_visit_update( origin, visit, snapshot=data.snapshot['id'])