Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 817 Lines • ▼ Show 20 Lines | ) -> OriginVisit: | ||||
date=date, | date=date, | ||||
type=type, | type=type, | ||||
# TODO: Remove when we remove those fields from the model | # TODO: Remove when we remove those fields from the model | ||||
status=status, | status=status, | ||||
snapshot=None, | snapshot=None, | ||||
metadata=None, | metadata=None, | ||||
visit=visit_id, | visit=visit_id, | ||||
) | ) | ||||
self.journal_writer.origin_visit_add([visit]) | |||||
self._origin_visits[origin_url].append(visit) | self._origin_visits[origin_url].append(visit) | ||||
assert visit.visit is not None | assert visit.visit is not None | ||||
visit_key = (origin_url, visit.visit) | visit_key = (origin_url, visit.visit) | ||||
with convert_validation_exceptions(): | with convert_validation_exceptions(): | ||||
visit_update = OriginVisitStatus( | visit_update = OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=visit_id, | visit=visit_id, | ||||
date=date, | date=date, | ||||
status=status, | status=status, | ||||
snapshot=None, | snapshot=None, | ||||
metadata=None, | metadata=None, | ||||
) | ) | ||||
self._origin_visit_statuses[visit_key] = [visit_update] | self._origin_visit_status_add_one(visit_update) | ||||
self._objects[visit_key].append(("origin_visit", None)) | self._objects[visit_key].append(("origin_visit", None)) | ||||
self.journal_writer.origin_visit_add([visit]) | |||||
# return last visit | # return last visit | ||||
return visit | return visit | ||||
def _origin_visit_status_add_one(self, visit_status: OriginVisitStatus) -> None: | |||||
"""Add an origin visit status without checks. | |||||
""" | |||||
self.journal_writer.origin_visit_status_add([visit_status]) | |||||
visit_key = (visit_status.origin, visit_status.visit) | |||||
self._origin_visit_statuses.setdefault(visit_key, []) | |||||
self._origin_visit_statuses[visit_key].append(visit_status) | |||||
def origin_visit_status_add( | def origin_visit_status_add( | ||||
self, visit_statuses: Iterable[OriginVisitStatus], | self, visit_statuses: Iterable[OriginVisitStatus], | ||||
) -> None: | ) -> None: | ||||
# First round to check existence (fail early if any is ko) | # First round to check existence (fail early if any is ko) | ||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
origin_url = self.origin_get({"url": visit_status.origin}) | origin_url = self.origin_get({"url": visit_status.origin}) | ||||
if not origin_url: | if not origin_url: | ||||
raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | raise StorageArgumentException(f"Unknown origin {visit_status.origin}") | ||||
# Insert | |||||
for visit_status in visit_statuses: | for visit_status in visit_statuses: | ||||
visit_key = (visit_status.origin, visit_status.visit) | self._origin_visit_status_add_one(visit_status) | ||||
self.journal_writer.origin_visit_status_add([visit_status]) | |||||
self._origin_visit_statuses[visit_key].append(visit_status) | |||||
def origin_visit_update( | def origin_visit_update( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
visit_id: int, | visit_id: int, | ||||
status: str, | status: str, | ||||
metadata: Optional[Dict] = None, | metadata: Optional[Dict] = None, | ||||
snapshot: Optional[bytes] = None, | snapshot: Optional[bytes] = None, | ||||
date: Optional[datetime.datetime] = None, | date: Optional[datetime.datetime] = None, | ||||
): | ): | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url is None: | if origin_url is None: | ||||
raise StorageArgumentException("Unknown origin.") | raise StorageArgumentException("Unknown origin.") | ||||
try: | try: | ||||
visit = self._origin_visits[origin_url][visit_id - 1] | visit = self._origin_visits[origin_url][visit_id - 1] | ||||
except IndexError: | except IndexError: | ||||
raise StorageArgumentException("Unknown visit_id for this origin") from None | raise StorageArgumentException("Unknown visit_id for this origin") from None | ||||
updates: Dict[str, Any] = { | |||||
"status": status, | |||||
} | |||||
if metadata and metadata != visit.metadata: | |||||
updates["metadata"] = metadata | |||||
if snapshot and snapshot != visit.snapshot: | |||||
updates["snapshot"] = snapshot | |||||
if updates: | |||||
with convert_validation_exceptions(): | |||||
updated_visit = OriginVisit.from_dict({**visit.to_dict(), **updates}) | |||||
self.journal_writer.origin_visit_update([updated_visit]) | |||||
self._origin_visits[origin_url][visit_id - 1] = updated_visit | |||||
# Retrieve the previous visit status | # Retrieve the previous visit status | ||||
assert visit.visit is not None | assert visit.visit is not None | ||||
visit_key = (origin_url, visit.visit) | visit_key = (origin_url, visit.visit) | ||||
last_visit_update = max( | last_visit_status = self._origin_visit_get_updated(origin, visit_id) | ||||
self._origin_visit_statuses[visit_key], key=lambda v: v.date | assert last_visit_status is not None | ||||
) | |||||
with convert_validation_exceptions(): | with convert_validation_exceptions(): | ||||
visit_update = OriginVisitStatus( | visit_status = OriginVisitStatus( | ||||
origin=origin_url, | origin=origin_url, | ||||
visit=visit_id, | visit=visit_id, | ||||
date=date or now(), | date=date or now(), | ||||
status=status, | status=status, | ||||
snapshot=snapshot or last_visit_update.snapshot, | snapshot=snapshot or last_visit_status.snapshot, | ||||
metadata=metadata or last_visit_update.metadata, | metadata=metadata or last_visit_status.metadata, | ||||
) | ) | ||||
self._origin_visit_statuses[visit_key].append(visit_update) | visit_key = (visit_status.origin, visit_status.visit) | ||||
self._origin_visit_statuses.setdefault(visit_key, []) | |||||
self.journal_writer.origin_visit_update( | self._origin_visit_statuses[visit_key].append(visit_status) | ||||
ardumont: I just realigned the implementation with the other backends.
This also aligns the journal… | |||||
[self._origin_visit_get_updated(origin_url, visit_id)] | # self._origin_visit_status_add_one(visit_status) | ||||
) | |||||
self._origin_visits[origin_url][visit_id - 1] = visit | |||||
def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: | def origin_visit_upsert(self, visits: Iterable[OriginVisit]) -> None: | ||||
for visit in visits: | for visit in visits: | ||||
if visit.visit is None: | if visit.visit is None: | ||||
raise StorageArgumentException(f"Missing visit id for visit {visit}") | raise StorageArgumentException(f"Missing visit id for visit {visit}") | ||||
self.journal_writer.origin_visit_upsert(visits) | self.journal_writer.origin_visit_upsert(visits) | ||||
▲ Show 20 Lines • Show All 366 Lines • Show Last 20 Lines |
I just realigned the implementation with the other backends.
This also aligns the journal order writes (which is tested and failed due to origin-visit-status being written there now)