Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163756
D3262.id11576.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
50 KB
Subscribers
None
D3262.id11576.diff
View Options
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,6 @@
click
flask
psycopg2
-python-dateutil
vcversioner
aiohttp
tenacity
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -8,10 +8,9 @@
import json
import random
import re
-from typing import Any, Dict, List, Iterable, Optional, Union
+from typing import Any, Dict, List, Iterable, Optional
import attr
-import dateutil
from swh.core.api.serializers import msgpack_loads, msgpack_dumps
from swh.model.model import (
@@ -795,48 +794,32 @@
return origin_url
- def origin_visit_add(
- self, origin_url: str, date: Union[str, datetime.datetime], type: str
- ) -> OriginVisit:
- if isinstance(date, str):
- # FIXME: Converge on iso8601 at some point
- date = dateutil.parser.parse(date)
- elif not isinstance(date, datetime.datetime):
- raise StorageArgumentException("Date must be a datetime or a string")
-
- if not self.origin_get_one({"url": origin_url}):
- raise StorageArgumentException("Unknown origin %s", origin_url)
-
- visit_id = self._cql_runner.origin_generate_unique_visit_id(origin_url)
- visit_state = "ongoing"
- with convert_validation_exceptions():
- visit = OriginVisit.from_dict(
- {
- "origin": origin_url,
- "date": date,
- "type": type,
- "status": visit_state,
- "snapshot": None,
- "metadata": None,
- "visit": visit_id,
- }
- )
+ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]:
+ for visit in visits:
+ origin = self.origin_get({"url": visit.origin})
+ if not origin: # Cannot add a visit without an origin
+ raise StorageArgumentException("Unknown origin %s", visit.origin)
- self.journal_writer.origin_visit_add([visit])
- self._cql_runner.origin_visit_add_one(visit)
+ all_visits = []
+ nb_visits = 0
+ for visit in visits:
+ nb_visits += 1
+ if not visit.visit:
+ visit_id = self._cql_runner.origin_generate_unique_visit_id(
+ visit.origin
+ )
+ visit = attr.evolve(visit, visit=visit_id)
+ self.journal_writer.origin_visit_add([visit])
+ self._cql_runner.origin_visit_add_one(visit)
+ assert visit.visit is not None
+ all_visits.append(visit)
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date,
- status=visit_state,
- snapshot=None,
- metadata=None,
- )
- self._origin_visit_status_add(visit_status)
+ visit_status_dict = visit.to_dict()
+ visit_status_dict.pop("type")
+ visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ self._origin_visit_status_add(visit_status)
- return visit
+ return all_visits
def _origin_visit_status_add(self, visit_status: OriginVisitStatus) -> None:
"""Add an origin visit status"""
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -5,7 +5,6 @@
import re
import bisect
-import dateutil
import collections
import copy
import datetime
@@ -26,7 +25,6 @@
Optional,
Tuple,
TypeVar,
- Union,
)
import attr
@@ -794,54 +792,43 @@
return origin.url
- def origin_visit_add(
- self, origin_url: str, date: Union[str, datetime.datetime], type: str
- ) -> OriginVisit:
- if isinstance(date, str):
- # FIXME: Converge on iso8601 at some point
- date = dateutil.parser.parse(date)
- elif not isinstance(date, datetime.datetime):
- raise StorageArgumentException("Date must be a datetime or a string")
-
- origin = self.origin_get({"url": origin_url})
- if not origin: # Cannot add a visit without an origin
- raise StorageArgumentException("Unknown origin %s", origin_url)
-
- if origin_url in self._origins:
- origin = self._origins[origin_url]
- # visit ids are in the range [1, +inf[
- visit_id = len(self._origin_visits[origin_url]) + 1
- status = "ongoing"
- with convert_validation_exceptions():
- visit = OriginVisit(
- origin=origin_url,
- date=date,
- type=type,
- # TODO: Remove when we remove those fields from the model
- status=status,
- snapshot=None,
- metadata=None,
- visit=visit_id,
- )
- self.journal_writer.origin_visit_add([visit])
- self._origin_visits[origin_url].append(visit)
- assert visit.visit is not None
- visit_key = (origin_url, visit.visit)
+ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]:
+ for visit in visits:
+ origin = self.origin_get({"url": visit.origin})
+ if not origin: # Cannot add a visit without an origin
+ raise StorageArgumentException("Unknown origin %s", visit.origin)
- with convert_validation_exceptions():
- visit_update = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date,
- status=status,
- snapshot=None,
- metadata=None,
- )
- self._origin_visit_status_add_one(visit_update)
- self._objects[visit_key].append(("origin_visit", None))
+ all_visits = []
+ for visit in visits:
+ origin_url = visit.origin
+ if origin_url in self._origins:
+ origin = self._origins[origin_url]
+ if visit.visit:
+ self.journal_writer.origin_visit_add([visit])
+ while len(self._origin_visits[origin_url]) < visit.visit:
+ self._origin_visits[origin_url].append(None)
+ self._origin_visits[origin_url][visit.visit - 1] = visit
+ visit_status_dict = visit.to_dict()
+ visit_status_dict.pop("type")
+ visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ self._origin_visit_status_add_one(visit_status)
+ else:
+ # visit ids are in the range [1, +inf[
+ visit_id = len(self._origin_visits[origin_url]) + 1
+ visit = attr.evolve(visit, visit=visit_id)
+ self.journal_writer.origin_visit_add([visit])
+ self._origin_visits[origin_url].append(visit)
+ visit_key = (origin_url, visit.visit)
+
+ visit_status_dict = visit.to_dict()
+ visit_status_dict.pop("type")
+ visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ self._origin_visit_status_add_one(visit_status)
+ self._objects[visit_key].append(("origin_visit", None))
+ assert visit.visit is not None
+ all_visits.append(visit)
- # return last visit
- return visit
+ return all_visits
def _origin_visit_status_add_one(self, visit_status: OriginVisitStatus) -> None:
"""Add an origin visit status without checks.
diff --git a/swh/storage/interface.py b/swh/storage/interface.py
--- a/swh/storage/interface.py
+++ b/swh/storage/interface.py
@@ -5,7 +5,7 @@
import datetime
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional
from swh.core.api import remote_api_endpoint
from swh.model.model import (
@@ -780,24 +780,18 @@
...
@remote_api_endpoint("origin/visit/add")
- def origin_visit_add(
- self, origin_url: str, date: Union[str, datetime.datetime], type: str
- ) -> OriginVisit:
- """Add an origin_visit for the origin at ts with status 'ongoing'.
+ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]:
+ """Add visits to storage. If the visits have no id, they will be created and assigned
+ one. The resulted visits are visit with their visit id set.
Args:
- origin_url: visited origin identifier (its URL)
- date: timestamp of such visit
- type: the type of loader used for the visit (hg, git, ...)
+ visits: Iterable of OriginVisit objects to add
Raises:
- StorageArgumentException if the date is mistyped, or the origin
- is unknown.
+ StorageArgumentException if some origin visit reference unknown origins
Returns:
- dict: dictionary with keys origin and visit where:
- - origin: origin object
- - visit: the visit object for the new visit occurrence
+ Iterable[OriginVisit] stored
"""
...
diff --git a/swh/storage/retry.py b/swh/storage/retry.py
--- a/swh/storage/retry.py
+++ b/swh/storage/retry.py
@@ -7,7 +7,7 @@
import traceback
from datetime import datetime
-from typing import Any, Dict, Iterable, Optional, Union
+from typing import Any, Dict, Iterable, Optional
from tenacity import (
retry,
@@ -108,10 +108,8 @@
return self.storage.origin_add_one(origin)
@swh_retry
- def origin_visit_add(
- self, origin_url: str, date: Union[datetime, str], type: str
- ) -> OriginVisit:
- return self.storage.origin_visit_add(origin_url, date, type)
+ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]:
+ return self.storage.origin_visit_add(visits)
@swh_retry
def origin_visit_update(
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -9,10 +9,9 @@
from collections import defaultdict
from contextlib import contextmanager
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional
import attr
-import dateutil.parser
import psycopg2
import psycopg2.pool
import psycopg2.errors
@@ -824,55 +823,36 @@
@timed
@db_transaction()
def origin_visit_add(
- self,
- origin_url: str,
- date: Union[str, datetime.datetime],
- type: str,
- db=None,
- cur=None,
- ) -> OriginVisit:
- if isinstance(date, str):
- # FIXME: Converge on iso8601 at some point
- date = dateutil.parser.parse(date)
- elif not isinstance(date, datetime.datetime):
- raise StorageArgumentException("Date must be a datetime or a string")
-
- origin = self.origin_get({"url": origin_url}, db=db, cur=cur)
- if not origin: # Cannot add a visit without an origin
- raise StorageArgumentException("Unknown origin %s", origin_url)
-
- with convert_validation_exceptions():
- visit_id = db.origin_visit_add(origin_url, date, type, cur=cur)
-
- status = "ongoing"
- # We can write to the journal only after inserting to the
- # DB, because we want the id of the visit
- visit = OriginVisit.from_dict(
- {
- "origin": origin_url,
- "date": date,
- "type": type,
- "visit": visit_id,
- # TODO: Remove when we remove those fields from the model
- "status": status,
- "metadata": None,
- "snapshot": None,
- }
- )
- self.journal_writer.origin_visit_add([visit])
+ self, visits: Iterable[OriginVisit], db=None, cur=None
+ ) -> Iterable[OriginVisit]:
+ for visit in visits:
+ origin = self.origin_get({"url": visit.origin}, db=db, cur=cur)
+ if not origin: # Cannot add a visit without an origin
+ raise StorageArgumentException("Unknown origin %s", visit.origin)
- with convert_validation_exceptions():
- visit_status = OriginVisitStatus(
- origin=origin_url,
- visit=visit_id,
- date=date,
- status=status,
- snapshot=None,
- metadata=None,
- )
- self._origin_visit_status_add(visit_status, db=db, cur=cur)
- send_metric("origin_visit:add", count=1, method_name="origin_visit")
- return visit
+ all_visits = []
+ nb_visits = 0
+ for visit in visits:
+ nb_visits += 1
+ if not visit.visit:
+ with convert_validation_exceptions():
+ visit_id = db.origin_visit_add(
+ visit.origin, visit.date, visit.type, cur=cur
+ )
+ visit = attr.evolve(visit, visit=visit_id)
+ else:
+ db.origin_visit_upsert(visit)
+ assert visit.visit is not None
+ all_visits.append(visit)
+ # Forced to write after for the case when the visit has no id
+ self.journal_writer.origin_visit_add([visit])
+ visit_status_dict = visit.to_dict()
+ visit_status_dict.pop("type")
+ visit_status = OriginVisitStatus.from_dict(visit_status_dict)
+ self._origin_visit_status_add(visit_status, db=db, cur=cur)
+
+ send_metric("origin_visit:add", count=nb_visits, method_name="origin_visit")
+ return all_visits
def _origin_visit_status_add(
self, visit_status: OriginVisitStatus, db, cur
diff --git a/swh/storage/tests/test_kafka_writer.py b/swh/storage/tests/test_kafka_writer.py
--- a/swh/storage/tests/test_kafka_writer.py
+++ b/swh/storage/tests/test_kafka_writer.py
@@ -53,7 +53,7 @@
for obj in objs:
assert isinstance(obj, OriginVisit)
storage.origin_add_one(Origin(url=obj.origin))
- visit = method(obj.origin, date=obj.date, type=obj.type)
+ visit = method([obj])[0]
expected_messages += 1 + 1 # 1 visit + 1 visit status
obj_d = obj.to_dict()
diff --git a/swh/storage/tests/test_replay.py b/swh/storage/tests/test_replay.py
--- a/swh/storage/tests/test_replay.py
+++ b/swh/storage/tests/test_replay.py
@@ -70,16 +70,10 @@
# Fill Kafka using a source storage
nb_sent = 0
for object_type, objects in TEST_OBJECTS.items():
+ method = getattr(src, object_type + "_add")
+ method(objects)
if object_type == "origin_visit":
- # src.origin_visit_upsert(objects)
- for visit in objects:
- src.origin_visit_add(
- origin_url=visit.origin, date=visit.date, type=visit.type
- )
- nb_sent += 1 # this adds origin-visit-status as well
- else:
- method = getattr(src, object_type + "_add")
- method(objects)
+ nb_sent += len(objects) # origin-visit-add adds origin-visit-status as well
nb_sent += len(objects)
caplog.set_level(logging.ERROR, "swh.journal.replay")
@@ -115,15 +109,10 @@
# Fill Kafka using a source storage
nb_sent = 0
for object_type, objects in TEST_OBJECTS.items():
+ method = getattr(src, object_type + "_add")
+ method(objects)
if object_type == "origin_visit":
- for visit in objects:
- src.origin_visit_add(
- origin_url=visit.origin, date=visit.date, type=visit.type
- )
- nb_sent += 1 # this adds origin-visit-status as well
- else:
- method = getattr(src, object_type + "_add")
- method(objects)
+ nb_sent += len(objects) # origin-visit-add adds origin-visit-status as well
nb_sent += len(objects)
# Create collision in input data
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -16,6 +16,7 @@
Snapshot,
SkippedContent,
Origin,
+ OriginVisit,
)
from swh.storage import get_storage
@@ -355,7 +356,10 @@
origin = list(swh_storage.origin_visit_get(origin_url))
assert not origin
- origin_visit = swh_storage.origin_visit_add(origin_url, date_visit1, "hg")
+ visit = OriginVisit(
+ origin=origin_url, date=date_visit1, type="hg", status="ongoing", snapshot=None
+ )
+ origin_visit = swh_storage.origin_visit_add([visit])[0]
assert origin_visit.origin == origin_url
assert isinstance(origin_visit.visit, int)
@@ -374,27 +378,26 @@
origin_url = swh_storage.origin_add_one(sample_origin)
mock_memory = mocker.patch("swh.storage.in_memory.InMemoryStorage.origin_visit_add")
+ visit = OriginVisit(
+ origin=origin_url, date=date_visit1, type="git", status="ongoing", snapshot=None
+ )
mock_memory.side_effect = [
# first try goes ko
fake_hash_collision,
# second try goes ko
psycopg2.IntegrityError("origin already inserted"),
# ok then!
- {"origin": origin_url, "visit": 1},
+ [visit],
]
origin = list(swh_storage.origin_visit_get(origin_url))
assert not origin
- r = swh_storage.origin_visit_add(origin_url, date_visit1, "git")
- assert r == {"origin": origin_url, "visit": 1}
+ r = swh_storage.origin_visit_add([visit])
+ assert r == [visit]
mock_memory.assert_has_calls(
- [
- call(origin_url, date_visit1, "git"),
- call(origin_url, date_visit1, "git"),
- call(origin_url, date_visit1, "git"),
- ]
+ [call([visit]), call([visit]), call([visit]),]
)
@@ -413,10 +416,17 @@
assert not origin
with pytest.raises(StorageArgumentException, match="Refuse to add"):
- swh_storage.origin_visit_add(origin_url, date_visit1, "svn")
+ visit = OriginVisit(
+ origin=origin_url,
+ date=date_visit1,
+ type="svn",
+ status="ongoing",
+ snapshot=None,
+ )
+ swh_storage.origin_visit_add([visit])
mock_memory.assert_has_calls(
- [call(origin_url, date_visit1, "svn"),]
+ [call([visit]),]
)
@@ -675,7 +685,11 @@
"""
sample_origin = sample_data["origin"][0]
origin_url = swh_storage.origin_add_one(sample_origin)
- origin_visit = swh_storage.origin_visit_add(origin_url, date_visit1, "hg")
+ visit = OriginVisit(
+ origin=origin_url, date=date_visit1, type="hg", status="ongoing", snapshot=None
+ )
+
+ origin_visit = swh_storage.origin_visit_add([visit])[0]
ov = next(swh_storage.origin_visit_get(origin_url))
assert ov["origin"] == origin_url
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -1412,9 +1412,15 @@
for origin in data.origins:
origin_url = origin["url"]
for date_visit in visits:
- visit = swh_storage.origin_visit_add(
- origin_url, date=date_visit, type=visit_type
+ visit = OriginVisit(
+ origin=origin_url,
+ date=date_visit,
+ type=visit_type,
+ status="ongoing",
+ snapshot=None,
)
+ visits_ = swh_storage.origin_visit_add([visit])
+ visit = visits_[0]
swh_storage.origin_visit_update(
origin_url, visit_id=visit.visit, status="full"
)
@@ -1440,9 +1446,15 @@
for origin in data.origins:
origin_url = origin["url"]
for date_visit in visits:
- visit = swh_storage.origin_visit_add(
- origin_url, date=date_visit, type=visit_type
+ visit = OriginVisit(
+ origin=origin_url,
+ date=date_visit,
+ type=visit_type,
+ status="ongoing",
+ snapshot=None,
)
+ visits_ = swh_storage.origin_visit_add([visit])
+ visit = visits_[0]
swh_storage.origin_visit_update(origin_url, visit.visit, status="full")
random_origin_visit = swh_storage.origin_visit_get_random(visit_type)
@@ -1593,11 +1605,23 @@
date_visit = date_visit.replace(microsecond=round(date_visit.microsecond, -3))
# when
- origin_visit = swh_storage.origin_visit_add(
- origin_url, type=data.type_visit1, date=date_visit
+ visit = OriginVisit(
+ origin=origin_url,
+ type=data.type_visit1,
+ date=date_visit,
+ status="ongoing",
+ snapshot=None,
)
- expected_origin_visit = {
+ # multiple calls to add the same visit changes nothing
+ origin_visit = swh_storage.origin_visit_add([visit])[0]
+
+ # insert the same origin visit should be ok as well
+ origin_visit2 = swh_storage.origin_visit_add([origin_visit])[0]
+
+ assert origin_visit == origin_visit2 # idempotent!
+
+ expected_ori_visit_dict = {
"origin": origin_url,
"date": date_visit,
"visit": origin_visit.visit,
@@ -1606,14 +1630,24 @@
"metadata": None,
"snapshot": None,
}
- assert origin_visit == OriginVisit.from_dict(expected_origin_visit)
- actual_origin_visits = list(swh_storage.origin_visit_get(origin_url))
+ expected_ori_visit_status_dict = copy.deepcopy(expected_ori_visit_dict)
+ expected_ori_visit_status_dict.pop("type")
- assert expected_origin_visit in actual_origin_visits
+ expected_origin_visit_status = OriginVisitStatus.from_dict(
+ expected_ori_visit_status_dict
+ )
+ expected_origin_visit = OriginVisit.from_dict(expected_ori_visit_dict)
+ assert origin_visit == expected_origin_visit
+
+ actual_origin_visits = list(swh_storage.origin_visit_get(origin_url))
+ assert expected_ori_visit_dict in actual_origin_visits
objects = list(swh_storage.journal_writer.journal.objects)
+
assert ("origin", Origin.from_dict(data.origin2)) in objects
- assert ("origin_visit", OriginVisit.from_dict(expected_origin_visit)) in objects
+ assert ("origin_visit", expected_origin_visit) in objects
+ assert ("origin_visit", expected_origin_visit) in objects
+ assert ("origin_visit_status", expected_origin_visit_status) in objects
def test_origin_visit_get__unknown_origin(self, swh_storage):
assert [] == list(swh_storage.origin_visit_get("foo"))
@@ -1633,13 +1667,27 @@
microsecond=round(date_visit2.microsecond, -3)
)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=date_visit, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=date_visit,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visits = swh_storage.origin_visit_add([visit1, visit2])
+
+ assert len(origin_visits) == 2
+ origin_visit1 = origin_visits[0]
+ origin_visit2 = origin_visits[1]
+
# then
assert origin_visit1.origin == origin_url
assert origin_visit1.visit is not None
@@ -1676,12 +1724,16 @@
assert ("origin_visit", OriginVisit.from_dict(visit)) in objects
def test_origin_visit_add_validation(self, swh_storage):
- origin_url = swh_storage.origin_add_one(data.origin2)
- with pytest.raises(StorageArgumentException) as cm:
- swh_storage.origin_visit_add(origin_url, date=[b"foo"], type="git")
-
- if type(cm.value) == psycopg2.ProgrammingError:
- assert cm.value.pgcode == psycopg2.errorcodes.UNDEFINED_FUNCTION
+ """Unknown origin when adding visits should raise"""
+ visit = OriginVisit(
+ origin="something-unknown",
+ date=now(),
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ with pytest.raises(StorageArgumentException, match="Unknown origin"):
+ swh_storage.origin_visit_add([visit])
def test_origin_visit_status_add_validation(self, swh_storage):
"""Wrong origin_visit_status input should raise storage argument error"""
@@ -1701,9 +1753,23 @@
"""
origin_url = swh_storage.origin_add_one(data.origin2)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
+ )
+ origin_url2 = swh_storage.origin_add_one({"url": "new-origin"})
+ visit2 = OriginVisit(
+ origin=origin_url2,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1, origin_visit2 = swh_storage.origin_visit_add([visit1, visit2])
+
snapshot_id = data.snapshot["id"]
date_visit_now = now()
visit_status1 = OriginVisitStatus(
@@ -1714,10 +1780,6 @@
snapshot=snapshot_id,
)
- origin_url2 = swh_storage.origin_add_one({"url": "new-origin"})
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url2, date=data.date_visit2, type=data.type_visit2
- )
date_visit_now = now()
visit_status2 = OriginVisitStatus(
origin=origin_visit2.origin,
@@ -1760,15 +1822,28 @@
microsecond=round(date_visit2.microsecond, -3)
)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=date_visit, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=date_visit,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit3 = swh_storage.origin_visit_add(
- origin_url2, date=date_visit2, type=data.type_visit3
+ visit3 = OriginVisit(
+ origin=origin_url2,
+ date=date_visit2,
+ type=data.type_visit3, # noqa
+ status="ongoing",
+ snapshot=None,
)
+ ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
# when
visit1_metadata = {
@@ -1776,11 +1851,9 @@
"directories": 22,
}
swh_storage.origin_visit_update(
- origin_url, origin_visit1.visit, status="full", metadata=visit1_metadata
- )
- swh_storage.origin_visit_update(
- origin_url2, origin_visit3.visit, status="partial"
+ origin_url, ov1.visit, status="full", metadata=visit1_metadata
)
+ swh_storage.origin_visit_update(origin_url2, ov3.visit, status="partial")
# then
actual_origin_visits = list(swh_storage.origin_visit_get(origin_url))
@@ -1788,7 +1861,7 @@
{
"origin": origin_url,
"date": date_visit,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"type": data.type_visit1,
"status": "full",
"metadata": visit1_metadata,
@@ -1797,7 +1870,7 @@
{
"origin": origin_url,
"date": date_visit2,
- "visit": origin_visit2.visit,
+ "visit": ov2.visit,
"type": data.type_visit2,
"status": "ongoing",
"metadata": None,
@@ -1814,7 +1887,7 @@
{
"origin": origin_url,
"date": date_visit,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"type": data.type_visit1,
"status": "full",
"metadata": visit1_metadata,
@@ -1823,13 +1896,13 @@
]
actual_origin_visits_ter = list(
- swh_storage.origin_visit_get(origin_url, last_visit=origin_visit1.visit)
+ swh_storage.origin_visit_get(origin_url, last_visit=ov1.visit)
)
assert actual_origin_visits_ter == [
{
"origin": origin_url,
"date": date_visit2,
- "visit": origin_visit2.visit,
+ "visit": ov2.visit,
"type": data.type_visit2,
"status": "ongoing",
"metadata": None,
@@ -1842,7 +1915,7 @@
{
"origin": origin_url2,
"date": date_visit2,
- "visit": origin_visit3.visit,
+ "visit": ov3.visit,
"type": data.type_visit3,
"status": "partial",
"metadata": None,
@@ -1853,7 +1926,7 @@
data1 = {
"origin": origin_url,
"date": date_visit,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"type": data.type_visit1,
"status": "ongoing",
"metadata": None,
@@ -1862,7 +1935,7 @@
data2 = {
"origin": origin_url,
"date": date_visit2,
- "visit": origin_visit2.visit,
+ "visit": ov2.visit,
"type": data.type_visit2,
"status": "ongoing",
"metadata": None,
@@ -1871,7 +1944,7 @@
data3 = {
"origin": origin_url2,
"date": date_visit2,
- "visit": origin_visit3.visit,
+ "visit": ov3.visit,
"type": data.type_visit3,
"status": "ongoing",
"metadata": None,
@@ -1880,7 +1953,7 @@
data4 = {
"origin": origin_url,
"date": date_visit,
- "visit": origin_visit1.visit,
+ "visit": ov1.visit,
"type": data.type_visit1,
"metadata": visit1_metadata,
"status": "full",
@@ -1889,7 +1962,7 @@
data5 = {
"origin": origin_url2,
"date": date_visit2,
- "visit": origin_visit3.visit,
+ "visit": ov3.visit,
"type": data.type_visit3,
"status": "partial",
"metadata": None,
@@ -1906,9 +1979,14 @@
def test_origin_visit_update_validation(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- visit = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ visit = swh_storage.origin_visit_add([visit])[0]
with pytest.raises(
(StorageArgumentException, ValueError), match="status"
) as cm:
@@ -1920,24 +1998,37 @@
def test_origin_visit_find_by_date(self, swh_storage):
# given
origin_url = swh_storage.origin_add_one(data.origin)
- swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
-
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit3, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit3,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit3 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit3
+ visit3 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit3,
+ status="ongoing",
+ snapshot=None,
)
+ _, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
+
# Simple case
visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit3)
- assert visit["visit"] == origin_visit2.visit
+ assert visit["visit"] == ov2.visit
# There are two visits at the same date, the latest must be returned
visit = swh_storage.origin_visit_find_by_date(origin_url, data.date_visit2)
- assert visit["visit"] == origin_visit3.visit
+ assert visit["visit"] == ov3.visit
def test_origin_visit_find_by_date__unknown_origin(self, swh_storage):
swh_storage.origin_visit_find_by_date("foo", data.date_visit2)
@@ -1945,9 +2036,14 @@
def test_origin_visit_update_missing_snapshot(self, swh_storage):
# given
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit = swh_storage.origin_visit_add([visit])[0]
# when
swh_storage.origin_visit_update(
@@ -1970,9 +2066,14 @@
def test_origin_visit_get_by(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
origin_url2 = swh_storage.origin_add_one(data.origin2)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
swh_storage.origin_visit_update(
@@ -1983,12 +2084,21 @@
)
# Add some other {origin, visit} entries
- swh_storage.origin_visit_add(
- origin_url, date=data.date_visit3, type=data.type_visit3
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit3,
+ type=data.type_visit3,
+ status="ongoing",
+ snapshot=None,
)
- swh_storage.origin_visit_add(
- origin_url2, date=data.date_visit3, type=data.type_visit3
+ visit3 = OriginVisit(
+ origin=origin_url2,
+ date=data.date_visit3,
+ type=data.type_visit3,
+ status="ongoing",
+ snapshot=None,
)
+ swh_storage.origin_visit_add([visit2, visit3])
# when
visit1_metadata = {
@@ -2167,9 +2277,14 @@
origin_url = swh_storage.origin_add_one(data.origin2)
# when
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit1])[0]
swh_storage.origin_visit_upsert(
[
@@ -2271,16 +2386,29 @@
def test_origin_visit_get_latest(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- ov1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
- ov2 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
# Add a visit with the same date as the previous one
- ov3 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit3 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
origin_visit1 = swh_storage.origin_visit_get_by(origin_url, ov1.visit)
origin_visit2 = swh_storage.origin_visit_get_by(origin_url, ov2.visit)
@@ -2396,9 +2524,14 @@
def test_snapshot_add_get_empty(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.empty_snapshot])
@@ -2458,9 +2591,14 @@
def test_snapshot_add_get_complete(self, swh_storage):
origin_url = data.origin["url"]
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit_id = origin_visit1.visit
actual_result = swh_storage.snapshot_add([data.complete_snapshot])
@@ -2620,9 +2758,14 @@
def test_snapshot_add_get_filtered(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
@@ -2734,9 +2877,14 @@
def test_snapshot_add_get(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
@@ -2775,9 +2923,14 @@
def test_snapshot_add_twice__by_origin_visit(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
visit1_id = origin_visit1.visit
swh_storage.snapshot_add([data.snapshot])
date_now2 = now()
@@ -2792,9 +2945,14 @@
by_ov1 = swh_storage.snapshot_get_by_origin_visit(origin_url, visit1_id)
assert by_ov1 == {**data.snapshot, "next_branch": None}
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit2 = swh_storage.origin_visit_add([visit2])[0]
visit2_id = origin_visit2.visit
swh_storage.snapshot_add([data.snapshot])
@@ -2876,16 +3034,29 @@
def test_snapshot_get_latest(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
# Add a visit with the same date as the previous one
- origin_visit3 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit3
+ visit3 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit3,
+ status="ongoing",
+ snapshot=None,
)
+ ov1, ov2, ov3 = swh_storage.origin_visit_add([visit1, visit2, visit3])
# Two visits, both with no snapshot: latest snapshot is None
assert swh_storage.snapshot_get_latest(origin_url) is None
@@ -2893,7 +3064,7 @@
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
origin_url,
- origin_visit1.visit,
+ ov1.visit,
status="ongoing",
snapshot=data.complete_snapshot["id"],
)
@@ -2910,7 +3081,7 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, origin_visit1.visit, status="full")
+ swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
assert {
**data.complete_snapshot,
"next_branch": None,
@@ -2919,10 +3090,7 @@
# Add snapshot to visit2 and check that the new snapshot is returned
swh_storage.snapshot_add([data.empty_snapshot])
swh_storage.origin_visit_update(
- origin_url,
- origin_visit2.visit,
- status="ongoing",
- snapshot=data.empty_snapshot["id"],
+ origin_url, ov2.visit, status="ongoing", snapshot=data.empty_snapshot["id"],
)
assert {
**data.empty_snapshot,
@@ -2940,7 +3108,7 @@
swh_storage.snapshot_add([data.complete_snapshot])
swh_storage.origin_visit_update(
origin_url,
- origin_visit3.visit,
+ ov3.visit,
status="ongoing",
snapshot=data.complete_snapshot["id"],
)
@@ -2952,12 +3120,21 @@
def test_snapshot_get_latest__missing_snapshot(self, swh_storage):
origin_url = swh_storage.origin_add_one(data.origin)
assert swh_storage.snapshot_get_latest(origin_url) is None
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit1, type=data.type_visit1
+ visit1 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit1,
+ type=data.type_visit1,
+ status="ongoing",
+ snapshot=None,
)
- origin_visit2 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit2 = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ ov1, ov2 = swh_storage.origin_visit_add([visit1, visit2])
# Two visits, both with no snapshot: latest snapshot is None
assert swh_storage.snapshot_get_latest(origin_url) is None
@@ -2966,7 +3143,7 @@
# detected
swh_storage.origin_visit_update(
origin_url,
- origin_visit1.visit,
+ ov1.visit,
status="ongoing",
snapshot=data.complete_snapshot["id"],
)
@@ -2982,7 +3159,7 @@
)
# Mark the first visit as completed and check status filter again
- swh_storage.origin_visit_update(origin_url, origin_visit1.visit, status="full")
+ swh_storage.origin_visit_update(origin_url, ov1.visit, status="full")
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
swh_storage.snapshot_get_latest(origin_url, allowed_statuses=["full"]),
@@ -2997,10 +3174,7 @@
# Add unknown snapshot to visit2 and check that the inconsistency
# is detected
swh_storage.origin_visit_update(
- origin_url,
- origin_visit2.visit,
- status="ongoing",
- snapshot=data.snapshot["id"],
+ origin_url, ov2.visit, status="ongoing", snapshot=data.snapshot["id"],
)
with pytest.raises(Exception):
# XXX: should the exception be more specific than this?
@@ -3061,9 +3235,14 @@
# Add other objects. Check their counter increased as well.
origin_url = swh_storage.origin_add_one(data.origin2)
- origin_visit1 = swh_storage.origin_visit_add(
- origin_url, date=data.date_visit2, type=data.type_visit2
+ visit = OriginVisit(
+ origin=origin_url,
+ date=data.date_visit2,
+ type=data.type_visit2,
+ status="ongoing",
+ snapshot=None,
)
+ origin_visit1 = swh_storage.origin_visit_add([visit])[0]
swh_storage.snapshot_add([data.snapshot])
swh_storage.origin_visit_update(
@@ -3778,7 +3957,10 @@
swh_storage.origin_add([{"url": url} for url in self.ORIGINS])
origin_url = "https://github.com/user1/repo1"
- swh_storage.origin_visit_add(origin_url, date=now(), type="git")
+ visit = OriginVisit(
+ origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ )
+ swh_storage.origin_visit_add([visit])
assert swh_storage.origin_count("github", with_visit=False) == 3
# it has a visit, but no snapshot, so with_visit=True => 0
@@ -3802,7 +3984,10 @@
swh_storage.snapshot_add([data.snapshot])
origin_url = "https://github.com/user1/repo1"
- visit = swh_storage.origin_visit_add(origin_url, date=now(), type="git")
+ visit = OriginVisit(
+ origin=origin_url, date=now(), type="git", status="ongoing", snapshot=None
+ )
+ visit = swh_storage.origin_visit_add([visit])[0]
swh_storage.origin_visit_update(
origin_url, visit.visit, status="ongoing", snapshot=data.snapshot["id"]
)
@@ -3830,7 +4015,14 @@
swh_storage.origin_add_one({"url": origin_url})
if "visit" in obj:
del obj["visit"]
- swh_storage.origin_visit_add(origin_url, obj["date"], obj["type"])
+ visit = OriginVisit(
+ origin=origin_url,
+ date=obj["date"],
+ type=obj["type"],
+ status="ongoing",
+ snapshot=None,
+ )
+ swh_storage.origin_visit_add([visit])
else:
if obj_type == "content" and obj["status"] == "absent":
obj_type = "skipped_content"
diff --git a/swh/storage/validate.py b/swh/storage/validate.py
--- a/swh/storage/validate.py
+++ b/swh/storage/validate.py
@@ -3,12 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import datetime
import contextlib
from typing import Dict, Iterable, Iterator, List, Optional, Tuple
from swh.model.model import (
- BaseModel,
SkippedContent,
Content,
Directory,
@@ -112,14 +110,8 @@
snapshots = [Snapshot.from_dict(s) for s in snapshots]
return self.storage.snapshot_add(snapshots)
- def origin_visit_add(
- self, origin_url: str, date: datetime.datetime, type: str
- ) -> Dict[str, BaseModel]:
- with convert_validation_exceptions():
- visit = OriginVisit(
- origin=origin_url, date=date, type=type, status="ongoing", snapshot=None
- )
- return self.storage.origin_visit_add(visit.origin, visit.date, visit.type)
+ def origin_visit_add(self, visits: Iterable[OriginVisit]) -> Iterable[OriginVisit]:
+ return self.storage.origin_visit_add(visits)
def origin_add(self, origins: Iterable[Dict]) -> List[Dict]:
with convert_validation_exceptions():
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 2:45 PM (1 w, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3222831
Attached To
D3262: storage*: Align origin-visit-add to take iterable of OriginVisit objects as input
Event Timeline
Log In to Comment