Page MenuHomeSoftware Heritage

D2821.diff
No OneTemporary

D2821.diff

diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -14,7 +14,7 @@
import dateutil
from swh.model.model import (
- BaseModel, Revision, Release, Directory, DirectoryEntry, Content,
+ Revision, Release, Directory, DirectoryEntry, Content,
SkippedContent, OriginVisit, Snapshot, Origin
)
from swh.model.hashutil import DEFAULT_ALGORITHMS
@@ -668,7 +668,8 @@
else:
return results
- def origin_get_one(self, origin):
+ def origin_get_one(self, origin: Dict[str, Any]) -> Optional[
+ Dict[str, Any]]:
if 'id' in origin:
raise StorageArgumentException('Origin ids are not supported.')
if 'url' not in origin:
@@ -759,7 +760,7 @@
def origin_visit_add(self, origin: Origin,
date: Union[str, datetime.datetime],
- type: str) -> Dict[str, BaseModel]:
+ type: str) -> OriginVisit:
origin_url = origin.url
if isinstance(date, str):
@@ -769,8 +770,7 @@
raise StorageArgumentException(
'Date must be a datetime or a string')
- origin = self.origin_get_one({'url': origin_url})
- if not origin: # Cannot add a visit without an origin
+ if not self.origin_get_one({'url': origin.url}):
raise StorageArgumentException(
'Unknown origin %s', origin_url)
@@ -789,13 +789,8 @@
except (KeyError, TypeError, ValueError) as e:
raise StorageArgumentException(*e.args)
self.journal_writer.origin_visit_add(visit)
-
self._cql_runner.origin_visit_add_one(visit)
-
- return {
- 'origin': origin,
- 'visit': visit,
- }
+ return visit
def origin_visit_update(
self, origin: Origin, visit: OriginVisit,
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -19,7 +19,7 @@
import attr
from swh.model.model import (
- BaseContent, BaseModel, Content, SkippedContent, Directory, Revision,
+ BaseContent, Content, SkippedContent, Directory, Revision,
Release, Snapshot, OriginVisit, Origin, SHA1_SIZE
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
@@ -705,7 +705,7 @@
def origin_visit_add(self, origin: Origin,
date: Union[str, datetime.datetime],
- type: str) -> Dict[str, BaseModel]:
+ type: str) -> OriginVisit:
origin_url = origin.url
if isinstance(date, str):
@@ -720,9 +720,6 @@
raise StorageArgumentException(
'Unknown origin %s', origin_url)
- visit_ret: Dict[str, BaseModel] = {
- 'origin': origin,
- }
if origin_url in self._origins:
origin = self._origins[origin_url]
# visit ids are in the range [1, +inf[
@@ -745,9 +742,8 @@
self.journal_writer.origin_visit_add(visit)
- visit_ret['visit'] = visit
-
- return visit_ret
+ # return last visit
+ return visit
def origin_visit_update(
self, origin: Origin, visit: OriginVisit,
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -9,7 +9,7 @@
from swh.core.api import remote_api_endpoint
from swh.model.model import (
- BaseModel, Content, Directory, Origin, OriginVisit, Revision, Release,
+ Content, Directory, Origin, OriginVisit, Revision, Release,
Snapshot, SkippedContent
)
@@ -772,7 +772,7 @@
def origin_visit_add(
self, origin: Origin,
date: Union[str, datetime.datetime],
- type: str) -> Dict[str, BaseModel]:
+ type: str) -> OriginVisit:
"""Add an origin_visit for the origin at ts with status 'ongoing'.
Args:
diff --git a/swh/storage/retry.py b/swh/storage/retry.py
--- a/swh/storage/retry.py
+++ b/swh/storage/retry.py
@@ -15,7 +15,7 @@
from swh.model.model import (
Content, SkippedContent, Directory, Revision, Release, Snapshot,
- Origin,
+ Origin, OriginVisit
)
from swh.storage import get_storage
@@ -93,8 +93,8 @@
return self.storage.origin_add_one(origin)
@swh_retry
- def origin_visit_add(self, origin: Dict,
- date: Union[datetime, str], type: str) -> Dict:
+ def origin_visit_add(self, origin: Origin,
+ date: Union[datetime, str], type: str) -> OriginVisit:
return self.storage.origin_visit_add(origin, date, type)
@swh_retry
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -20,7 +20,7 @@
import psycopg2.errors
from swh.model.model import (
- BaseModel, Content, Directory, Origin, OriginVisit,
+ Content, Directory, Origin, OriginVisit,
Revision, Release, SkippedContent, Snapshot, SHA1_SIZE
)
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
@@ -805,7 +805,7 @@
@db_transaction()
def origin_visit_add(
self, origin: Origin, date: Union[str, datetime.datetime],
- type: str, db=None, cur=None) -> Dict[str, BaseModel]:
+ type: str, db=None, cur=None) -> OriginVisit:
origin_url = origin.url
if isinstance(date, str):
@@ -837,10 +837,7 @@
self.journal_writer.origin_visit_add(visit)
send_metric('origin_visit:add', count=1, method_name='origin_visit')
- return {
- 'origin': origin,
- 'visit': visit,
- }
+ return visit
@timed
@db_transaction()
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -262,9 +262,8 @@
assert not origin
origin = Origin(url=origin_url)
- result = swh_storage.origin_visit_add(
+ origin_visit = swh_storage.origin_visit_add(
origin, '2020-01-01', 'hg')
- origin_visit = result['visit']
assert origin_visit.origin == origin_url
assert isinstance(origin_visit.visit, int)
@@ -603,9 +602,8 @@
swh_storage.origin_add_one(sample_origin)
origin_url = sample_origin['url']
origin = Origin(url=origin_url)
- result = swh_storage.origin_visit_add(
+ origin_visit = swh_storage.origin_visit_add(
origin, '2020-01-01', 'hg')
- origin_visit = result['visit']
ov = next(swh_storage.origin_visit_get(origin_url))
assert ov['origin'] == origin_url
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1393,9 +1393,8 @@
for origin in data.origins:
origin = Origin.from_dict(origin)
for date_visit in visits:
- result = swh_storage.origin_visit_add(
+ visit = swh_storage.origin_visit_add(
origin, date=date_visit, type=visit_type)
- visit = result['visit']
swh_storage.origin_visit_update(origin, visit, status='full')
swh_storage.refresh_stat_counters()
@@ -1419,9 +1418,8 @@
for origin in data.origins:
origin = Origin.from_dict(origin)
for date_visit in visits:
- result = swh_storage.origin_visit_add(
+ visit = swh_storage.origin_visit_add(
origin, date=date_visit, type=visit_type)
- visit = result['visit']
swh_storage.origin_visit_update(
origin, visit=visit, status='full')
@@ -1570,11 +1568,10 @@
microsecond=round(date_visit.microsecond, -3))
# when
- result = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
type=data.type_visit1,
date=date_visit)
- origin_visit1 = result['visit']
actual_origin_visits = list(swh_storage.origin_visit_get(
origin.url))
@@ -1613,18 +1610,16 @@
date_visit2 = date_visit2.replace(
microsecond=round(date_visit2.microsecond, -3))
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=date_visit,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=date_visit2,
type=data.type_visit2,
)
- origin_visit2 = result2['visit']
# then
assert origin_visit1.origin == origin.url
@@ -1686,28 +1681,25 @@
date_visit2 = date_visit2.replace(
microsecond=round(date_visit2.microsecond, -3))
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=date_visit,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=date_visit2,
type=data.type_visit2
)
- origin_visit2 = result2['visit']
swh_storage.origin_add_one(data.origin2)
origin2 = Origin(url=data.origin2['url'])
- result3 = swh_storage.origin_visit_add(
+ origin_visit3 = swh_storage.origin_visit_add(
origin2,
date=date_visit2,
type=data.type_visit3
)
- origin_visit3 = result3['visit']
# when
visit1_metadata = {
@@ -1839,16 +1831,15 @@
def test_origin_visit_update_validation(self, swh_storage):
origin = Origin(url=data.origin['url'])
swh_storage.origin_add_one(data.origin)
- result = swh_storage.origin_visit_add(
+ origin_visit = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- visit = result['visit']
with pytest.raises(StorageArgumentException, match='status') as cm:
swh_storage.origin_visit_update(
- origin, visit, status='foobar')
+ origin, origin_visit, status='foobar')
if type(cm.value) == psycopg2.DataError:
assert cm.value.pgcode == \
@@ -1865,19 +1856,17 @@
type=data.type_visit1,
)
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=data.date_visit3,
type=data.type_visit2,
)
- origin_visit2 = result2['visit']
- result3 = swh_storage.origin_visit_add(
+ origin_visit3 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit3,
)
- origin_visit3 = result3['visit']
# Simple case
visit = swh_storage.origin_visit_find_by_date(
@@ -1897,14 +1886,12 @@
origin_url = swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result = swh_storage.origin_visit_add(
+ origin_visit = swh_storage.origin_visit_add(
Origin(url=origin_url),
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit = result['visit']
-
# when
swh_storage.origin_visit_update(
origin,
@@ -1927,12 +1914,11 @@
origin = Origin(url=data.origin['url'])
origin2 = Origin(url=data.origin2['url'])
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- origin_visit1 = result1['visit']
swh_storage.snapshot_add([data.snapshot])
swh_storage.origin_visit_update(
@@ -2064,12 +2050,11 @@
origin = Origin(url=data.origin2['url'])
# when
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
swh_storage.origin_visit_upsert([OriginVisit.from_dict({
'origin': origin.url,
@@ -2130,33 +2115,31 @@
def test_origin_visit_get_latest(self, swh_storage):
swh_storage.origin_add_one(data.origin)
origin = Origin(url=data.origin['url'])
- origin_visit1 = swh_storage.origin_visit_add(
+ ov1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- visit1_id = origin_visit1['visit']
- origin_visit2 = swh_storage.origin_visit_add(
+
+ ov2 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- visit2_id = origin_visit2['visit']
# Add a visit with the same date as the previous one
- origin_visit3 = swh_storage.origin_visit_add(
+ ov3 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- visit3_id = origin_visit3['visit']
origin_visit1 = swh_storage.origin_visit_get_by(
- origin.url, visit1_id.visit)
+ origin.url, ov1.visit)
origin_visit2 = swh_storage.origin_visit_get_by(
- origin.url, visit2_id.visit)
+ origin.url, ov2.visit)
origin_visit3 = swh_storage.origin_visit_get_by(
- origin.url, visit3_id.visit)
+ origin.url, ov3.visit)
# Two visits, both with no snapshot
assert origin_visit3 == swh_storage.origin_visit_get_latest(origin.url)
@@ -2167,7 +2150,7 @@
# visit1 and require_snapshot=False still returns visit2
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
- origin, visit1_id, snapshot=data.complete_snapshot['id'])
+ origin, ov1, snapshot=data.complete_snapshot['id'])
assert {**origin_visit1,
'snapshot': data.complete_snapshot['id']} \
@@ -2182,7 +2165,8 @@
origin.url, allowed_statuses=['full']) is None
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin, visit1_id, status='full')
+ swh_storage.origin_visit_update(
+ origin, ov1, status='full')
assert {
**origin_visit1,
'snapshot': data.complete_snapshot['id'],
@@ -2194,7 +2178,7 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
swh_storage.origin_visit_update(
- origin, visit2_id, snapshot=data.empty_snapshot['id'])
+ origin, ov2, snapshot=data.empty_snapshot['id'])
assert {**origin_visit2, 'snapshot': data.empty_snapshot['id']} == \
swh_storage.origin_visit_get_latest(
origin.url, require_snapshot=True)
@@ -2211,7 +2195,7 @@
# Add snapshot to visit3 (same date as visit2)
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
- origin, visit3_id, snapshot=data.complete_snapshot['id'])
+ origin, ov3, snapshot=data.complete_snapshot['id'])
assert {
**origin_visit1,
'snapshot': data.complete_snapshot['id'],
@@ -2259,12 +2243,11 @@
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result['visit']
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.empty_snapshot])
@@ -2307,12 +2290,11 @@
origin_url = data.origin['url']
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.complete_snapshot])
@@ -2467,12 +2449,11 @@
origin_url = data.origin['url']
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
@@ -2587,12 +2568,11 @@
origin_url = data.origin['url']
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
visit_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
@@ -2638,12 +2618,11 @@
origin_url = data.origin['url']
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
visit1_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
swh_storage.origin_visit_update(
@@ -2653,12 +2632,11 @@
origin_url, visit1_id)
assert by_ov1 == {**data.snapshot, 'next_branch': None}
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- origin_visit2 = result2['visit']
visit2_id = origin_visit2.visit
swh_storage.snapshot_add([data.snapshot])
@@ -2718,27 +2696,24 @@
swh_storage.origin_add_one(data.origin)
origin_url = data.origin['url']
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- origin_visit2 = result2['visit']
# Add a visit with the same date as the previous one
- result3 = swh_storage.origin_visit_add(
+ origin_visit3 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit3,
)
- origin_visit3 = result3['visit']
# Two visits, both with no snapshot: latest snapshot is None
assert swh_storage.snapshot_get_latest(origin_url) is None
@@ -2792,19 +2767,17 @@
swh_storage.origin_add_one(data.origin)
origin = Origin(url=origin_url)
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit1,
type=data.type_visit1,
)
- origin_visit1 = result1['visit']
- result2 = swh_storage.origin_visit_add(
+ origin_visit2 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- origin_visit2 = result2['visit']
# Two visits, both with no snapshot: latest snapshot is None
assert swh_storage.snapshot_get_latest(origin_url) is None
@@ -2897,12 +2870,11 @@
swh_storage.origin_add_one(data.origin2)
origin = Origin(url=data.origin2['url'])
- result1 = swh_storage.origin_visit_add(
+ origin_visit1 = swh_storage.origin_visit_add(
origin,
date=data.date_visit2,
type=data.type_visit2,
)
- origin_visit1 = result1['visit']
swh_storage.snapshot_add([data.snapshot])
swh_storage.origin_visit_update(
@@ -3794,9 +3766,8 @@
swh_storage.snapshot_add([data.snapshot])
origin = Origin(url='https://github.com/user1/repo1')
- result = swh_storage.origin_visit_add(
+ visit = swh_storage.origin_visit_add(
origin, date=now, type='git')
- visit = result['visit']
swh_storage.origin_visit_update(
origin, visit, snapshot=data.snapshot['id'])

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 9:01 AM (2 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220968

Event Timeline