diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -793,15 +793,15 @@ return visit def origin_visit_update( - self, origin: Origin, visit: OriginVisit, + self, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None) -> None: # Get the existing data of the visit - row = self._cql_runner.origin_visit_get_one(origin.url, visit.visit) + row = self._cql_runner.origin_visit_get_one(visit.origin, visit.visit) if not row: raise StorageArgumentException( 'origin visit %s for origin %s does not exist.', - visit.visit, origin.url) + visit.visit, visit.origin) try: visit = OriginVisit.from_dict(self._format_origin_visit_row(row)) except (KeyError, TypeError, ValueError) as e: @@ -822,7 +822,8 @@ self.journal_writer.origin_visit_update(visit) - self._cql_runner.origin_visit_update(origin.url, visit.visit, updates) + self._cql_runner.origin_visit_update( + visit.origin, visit.visit, updates) def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.journal_writer.origin_visit_upsert(visits) diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -746,20 +746,20 @@ return visit def origin_visit_update( - self, origin: Origin, visit: OriginVisit, + self, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None) -> None: visit_id = visit.visit if visit.visit else -1 if visit_id == -1: raise StorageArgumentException( 'origin visit %s for origin %s does not exist.', - visit.visit, origin.url) from None + visit.visit, visit.origin) from None try: - visit = self._origin_visits[origin.url][visit_id - 1] + visit = self._origin_visits[visit.origin][visit_id - 1] except IndexError: raise StorageArgumentException( 'origin visit %s for origin %s does not exist.', - visit.visit, origin.url) from None + visit.visit, visit.origin) from None updates: Dict[str, Any] = {} if status: @@ -775,7 +775,7 @@ raise StorageArgumentException(*e.args) self.journal_writer.origin_visit_update(visit) - self._origin_visits[origin.url][visit_id - 1] = visit + self._origin_visits[visit.origin][visit_id - 1] = visit def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: self.journal_writer.origin_visit_upsert(visits) diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -794,14 +794,13 @@ @remote_api_endpoint('origin/visit/update') def origin_visit_update( - self, origin: Origin, visit: OriginVisit, + self, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None) -> None: """Update an origin_visit's status. Args: - origin (str): visited origin's URL - visit_id: Visit's id + visit: Visit status: Visit's new status metadata: Data associated to the visit snapshot (sha1_git): identifier of the snapshot to add to diff --git a/swh/storage/retry.py b/swh/storage/retry.py --- a/swh/storage/retry.py +++ b/swh/storage/retry.py @@ -99,11 +99,11 @@ @swh_retry def origin_visit_update( - self, origin: str, visit_id: int, status: Optional[str] = None, + self, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, - snapshot: Optional[Dict] = None) -> Dict: + snapshot: Optional[Dict] = None) -> None: return self.storage.origin_visit_update( - origin, visit_id, status=status, + visit, status=status, metadata=metadata, snapshot=snapshot) @swh_retry diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -841,17 +841,17 @@ @timed @db_transaction() - def origin_visit_update(self, origin: Origin, visit: OriginVisit, + def origin_visit_update(self, visit: OriginVisit, status: Optional[str] = None, metadata: Optional[Dict] = None, snapshot: Optional[bytes] = None, db=None, cur=None) -> None: visit_row = db.origin_visit_get( - origin.url, visit.visit, cur=cur) + visit.origin, visit.visit, cur=cur) if not visit_row: raise StorageArgumentException( 'origin visit %s for origin %s does not exist.', - visit.visit, origin.url) + visit.visit, visit.origin) visit_to_update = OriginVisit.from_dict( dict(zip(db.origin_visit_get_cols, visit_row))) @@ -874,7 +874,7 @@ with convert_validation_exceptions(): db.origin_visit_update( - origin.url, visit.visit, updates, cur=cur) + visit.origin, visit.visit, updates, cur=cur) @timed @db_transaction() diff --git a/swh/storage/tests/conftest.py b/swh/storage/tests/conftest.py --- a/swh/storage/tests/conftest.py +++ b/swh/storage/tests/conftest.py @@ -240,4 +240,5 @@ 'tool': [data.metadata_tool], 'provider': [data.provider], 'origin_metadata': [data.origin_metadata, data.origin_metadata2], + 'origin_visit': [data.origin_visit], } diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -380,6 +380,14 @@ tzinfo=datetime.timezone.utc) type_visit3 = 'deb' +origin_visit = { + 'origin': origin['url'], + 'date': date_visit1, + 'type': type_visit1, + 'status': 'ongoing', + 'snapshot': None, +} + release = { 'id': b'87659012345678901234', 'name': b'v0.0.1', diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -10,7 +10,7 @@ import pytest from swh.model.model import ( - Content, Directory, Release, Revision, Snapshot, Origin + Content, Directory, Release, Revision, Snapshot, Origin, OriginVisit ) from swh.storage import HashCollision, get_storage @@ -612,7 +612,7 @@ assert ov['snapshot'] is None assert ov['metadata'] is None - swh_storage.origin_visit_update(origin, origin_visit, status='full') + swh_storage.origin_visit_update(origin_visit, status='full') ov = next(swh_storage.origin_visit_get(origin_url)) assert ov['origin'] == origin_url @@ -627,8 +627,7 @@ """Multiple retries for hash collision and psycopg2 error but finally ok """ - sample_origin = sample_data['origin'][1] - origin_url = sample_origin['url'] + origin_visit = OriginVisit.from_dict(sample_data['origin_visit'][0]) mock_memory = mocker.patch( 'swh.storage.in_memory.InMemoryStorage.origin_visit_update') @@ -638,22 +637,21 @@ # second try goes ko psycopg2.IntegrityError('origin already inserted'), # ok then! - {'origin': origin_url, 'visit': 1} + None ] mock_sleep = mocker.patch( 'swh.storage.retry.RetryingProxyStorage' '.origin_visit_update.retry.sleep') - visit_id = 1 - swh_storage.origin_visit_update(origin_url, visit_id, status='full') + swh_storage.origin_visit_update(origin_visit, status='full') mock_memory.assert_has_calls([ - call(origin_url, visit_id, metadata=None, + call(origin_visit, metadata=None, snapshot=None, status='full'), - call(origin_url, visit_id, metadata=None, + call(origin_visit, metadata=None, snapshot=None, status='full'), - call(origin_url, visit_id, metadata=None, + call(origin_visit, metadata=None, snapshot=None, status='full'), ]) assert mock_sleep.call_count == 2 @@ -664,15 +662,14 @@ """Unfiltered errors are raising without retry """ + origin_visit = OriginVisit.from_dict(sample_data['origin_visit'][0]) mock_memory = mocker.patch( 'swh.storage.in_memory.InMemoryStorage.origin_visit_update') mock_memory.side_effect = StorageArgumentException( 'Refuse to add origin always!') - origin_url = sample_data['origin'][0]['url'] - visit_id = 9 with pytest.raises(StorageArgumentException, match='Refuse to add'): - swh_storage.origin_visit_update(origin_url, visit_id, 'partial') + swh_storage.origin_visit_update(origin_visit, 'partial') assert mock_memory.call_count == 1 diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1395,7 +1395,7 @@ for date_visit in visits: visit = swh_storage.origin_visit_add( origin, date=date_visit, type=visit_type) - swh_storage.origin_visit_update(origin, visit, status='full') + swh_storage.origin_visit_update(visit, status='full') swh_storage.refresh_stat_counters() @@ -1420,8 +1420,7 @@ for date_visit in visits: visit = swh_storage.origin_visit_add( origin, date=date_visit, type=visit_type) - swh_storage.origin_visit_update( - origin, visit=visit, status='full') + swh_storage.origin_visit_update(visit, status='full') random_origin_visit = swh_storage.origin_visit_get_random(visit_type) assert random_origin_visit is None @@ -1707,10 +1706,8 @@ 'directories': 22, } swh_storage.origin_visit_update( - origin, origin_visit1, status='full', - metadata=visit1_metadata) - swh_storage.origin_visit_update( - origin2, origin_visit3, status='partial') + origin_visit1, status='full', metadata=visit1_metadata) + swh_storage.origin_visit_update(origin_visit3, status='partial') # then actual_origin_visits = list(swh_storage.origin_visit_get( @@ -1838,8 +1835,7 @@ ) with pytest.raises(StorageArgumentException, match='status') as cm: - swh_storage.origin_visit_update( - origin, origin_visit, status='foobar') + swh_storage.origin_visit_update(origin_visit, status='foobar') if type(cm.value) == psycopg2.DataError: assert cm.value.pgcode == \ @@ -1894,9 +1890,7 @@ # when swh_storage.origin_visit_update( - origin, - origin_visit, - snapshot=data.snapshot['id']) + origin_visit, snapshot=data.snapshot['id']) # then actual_origin_visit = swh_storage.origin_visit_get_by( @@ -1922,7 +1916,6 @@ swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.snapshot['id']) @@ -1945,8 +1938,7 @@ } swh_storage.origin_visit_update( - origin, origin_visit1, - status='full', metadata=visit1_metadata) + origin_visit1, status='full', metadata=visit1_metadata) expected_origin_visit = origin_visit1.to_dict() expected_origin_visit.update({ @@ -2150,7 +2142,7 @@ # visit1 and require_snapshot=False still returns visit2 swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, ov1, snapshot=data.complete_snapshot['id']) + ov1, snapshot=data.complete_snapshot['id']) assert {**origin_visit1, 'snapshot': data.complete_snapshot['id']} \ @@ -2165,8 +2157,7 @@ origin.url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update( - origin, ov1, status='full') + swh_storage.origin_visit_update(ov1, status='full') assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], @@ -2178,7 +2169,7 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin, ov2, snapshot=data.empty_snapshot['id']) + ov2, snapshot=data.empty_snapshot['id']) assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \ swh_storage.origin_visit_get_latest( origin.url, require_snapshot=True) @@ -2195,7 +2186,7 @@ # Add snapshot to visit3 (same date as visit2) swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, ov3, snapshot=data.complete_snapshot['id']) + ov3, snapshot=data.complete_snapshot['id']) assert { **origin_visit1, 'snapshot': data.complete_snapshot['id'], @@ -2254,7 +2245,7 @@ assert actual_result == {'snapshot:add': 1} swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.empty_snapshot['id']) + origin_visit1, snapshot=data.empty_snapshot['id']) by_id = swh_storage.snapshot_get(data.empty_snapshot['id']) assert by_id == {**data.empty_snapshot, 'next_branch': None} @@ -2299,7 +2290,7 @@ actual_result = swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.complete_snapshot['id']) + origin_visit1, snapshot=data.complete_snapshot['id']) assert actual_result == {'snapshot:add': 1} by_id = swh_storage.snapshot_get(data.complete_snapshot['id']) @@ -2457,7 +2448,7 @@ swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.complete_snapshot['id']) + origin_visit1, snapshot=data.complete_snapshot['id']) snp_id = data.complete_snapshot['id'] branches = data.complete_snapshot['branches'] @@ -2577,7 +2568,7 @@ swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.snapshot['id']) + origin_visit1, snapshot=data.snapshot['id']) by_id = swh_storage.snapshot_get(data.snapshot['id']) assert by_id == {**data.snapshot, 'next_branch': None} @@ -2592,10 +2583,9 @@ def test_snapshot_add_nonexistent_visit(self, swh_storage): origin_url = data.origin['url'] swh_storage.origin_add_one(data.origin) - origin = Origin(url=origin_url) # unknown visit visit = OriginVisit( - origin=origin, + origin=origin_url, visit=54164461156, date=data.date_visit1, status='ongoing', @@ -2609,7 +2599,7 @@ with pytest.raises(StorageArgumentException): swh_storage.origin_visit_update( - origin, visit, snapshot=data.snapshot['id']) + visit, snapshot=data.snapshot['id']) assert list(swh_storage.journal_writer.journal.objects) == [ ('snapshot', data.snapshot)] @@ -2626,7 +2616,7 @@ visit1_id = origin_visit1.visit swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.snapshot['id']) + origin_visit1, snapshot=data.snapshot['id']) by_ov1 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit1_id) @@ -2641,7 +2631,7 @@ swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin, origin_visit2, snapshot=data.snapshot['id']) + origin_visit2, snapshot=data.snapshot['id']) by_ov2 = swh_storage.snapshot_get_by_origin_visit( origin_url, visit2_id) @@ -2721,7 +2711,7 @@ # Add snapshot to visit1, latest snapshot = visit 1 snapshot swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.complete_snapshot['id']) + origin_visit1, snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2732,8 +2722,7 @@ allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update( - origin, origin_visit1, status='full') + swh_storage.origin_visit_update(origin_visit1, status='full') assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest( origin_url, @@ -2742,7 +2731,7 @@ # Add snapshot to visit2 and check that the new snapshot is returned swh_storage.snapshot_add([data.empty_snapshot]) swh_storage.origin_visit_update( - origin, origin_visit2, snapshot=data.empty_snapshot['id']) + origin_visit2, snapshot=data.empty_snapshot['id']) assert {**data.empty_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2756,7 +2745,7 @@ # the new snapshot is returned swh_storage.snapshot_add([data.complete_snapshot]) swh_storage.origin_visit_update( - origin, origin_visit3, snapshot=data.complete_snapshot['id']) + origin_visit3, snapshot=data.complete_snapshot['id']) assert {**data.complete_snapshot, 'next_branch': None} \ == swh_storage.snapshot_get_latest(origin_url) @@ -2785,7 +2774,7 @@ # Add unknown snapshot to visit1, check that the inconsistency is # detected swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.complete_snapshot['id']) + origin_visit1, snapshot=data.complete_snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest(origin_url) @@ -2796,8 +2785,7 @@ origin_url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again - swh_storage.origin_visit_update( - origin, origin_visit1, status='full') + swh_storage.origin_visit_update(origin_visit1, status='full') with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2812,7 +2800,7 @@ # Add unknown snapshot to visit2 and check that the inconsistency # is detected swh_storage.origin_visit_update( - origin, origin_visit2, snapshot=data.snapshot['id']) + origin_visit2, snapshot=data.snapshot['id']) with pytest.raises(Exception): # XXX: should the exception be more specific than this? swh_storage.snapshot_get_latest( @@ -2878,7 +2866,7 @@ swh_storage.snapshot_add([data.snapshot]) swh_storage.origin_visit_update( - origin, origin_visit1, snapshot=data.snapshot['id']) + origin_visit1, snapshot=data.snapshot['id']) swh_storage.directory_add([data.dir]) swh_storage.revision_add([data.revision]) swh_storage.release_add([data.release]) @@ -3768,8 +3756,7 @@ origin = Origin(url='https://github.com/user1/repo1') visit = swh_storage.origin_visit_add( origin, date=now, type='git') - swh_storage.origin_visit_update( - origin, visit, snapshot=data.snapshot['id']) + swh_storage.origin_visit_update(visit, snapshot=data.snapshot['id']) assert swh_storage.origin_count('github', with_visit=False) == 3 # github/user1 has a visit and a snapshot, so with_visit=True => 1