Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 1,258 Lines • ▼ Show 20 Lines | |||||
def origin_visit_upsert(self, visits): | def origin_visit_upsert(self, visits): | ||||
"""Add a origin_visits with a specific id and with all its data. | """Add a origin_visits with a specific id and with all its data. | ||||
If there is already an origin_visit with the same | If there is already an origin_visit with the same | ||||
`(origin_id, visit_id)`, updates it instead of inserting a new one. | `(origin_id, visit_id)`, updates it instead of inserting a new one. | ||||
Args: | Args: | ||||
visits: iterable of dicts with keys: | visits: iterable of dicts with keys: | ||||
origin: Visited Origin id | origin: dict with keys either `id` or `url` | ||||
visit: origin visit id | visit: origin visit id | ||||
type: type of loader used for the visit | type: type of loader used for the visit | ||||
date: timestamp of such visit | date: timestamp of such visit | ||||
status: Visit's new status | status: Visit's new status | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (sha1_git): identifier of the snapshot to add to | snapshot (sha1_git): identifier of the snapshot to add to | ||||
the visit | the visit | ||||
""" | """ | ||||
visits = copy.deepcopy(visits) | visits = copy.deepcopy(visits) | ||||
for visit in visits: | for visit in visits: | ||||
if isinstance(visit['date'], str): | if isinstance(visit['date'], str): | ||||
visit['date'] = dateutil.parser.parse(visit['date']) | visit['date'] = dateutil.parser.parse(visit['date']) | ||||
if isinstance(visit['origin'], str): | origin = visit['origin'] | ||||
origin = \ | visit['origin'] = self.origin_get([origin])[0] | ||||
self.origin_get([{'url': visit['origin']}])[0] | if not visit['origin']: | ||||
if not origin: | raise ValueError('Unknown origin: %s' % origin) | ||||
raise ValueError('Unknown origin: %s' % visit['origin']) | |||||
douardda: there is behavior change here (no more ValueError raised), it could be documented in the commit… | |||||
visit['origin'] = origin['id'] | |||||
if self.journal_writer: | if self.journal_writer: | ||||
for visit in visits: | for visit in visits: | ||||
visit = visit.copy() | visit = copy.deepcopy(visit) | ||||
Not Done Inline Actionswhy this deepcopy necessary? I mean at this point, visits is already a deepcopy of the given visits argument. douardda: why this deepcopy necessary? I mean at this point, `visits` is already a deepcopy of the given… | |||||
Done Inline Actionsbecause the list of visits is re-used below, and it needs the 'id'. vlorentz: because the list of visits is re-used below, and it needs the `'id'`. | |||||
visit['origin'] = self.origin_get([{'id': visit['origin']}])[0] | |||||
del visit['origin']['id'] | del visit['origin']['id'] | ||||
self.journal_writer.write_addition('origin_visit', visit) | self.journal_writer.write_addition('origin_visit', visit) | ||||
for visit in visits: | for visit in visits: | ||||
origin_id = visit['origin'] | origin_id = visit['origin']['id'] | ||||
visit_id = visit['visit'] | visit_id = visit['visit'] | ||||
self._objects[(origin_id, visit_id)].append( | self._objects[(origin_id, visit_id)].append( | ||||
('origin_visit', None)) | ('origin_visit', None)) | ||||
while len(self._origin_visits[origin_id-1]) < visit_id: | while len(self._origin_visits[origin_id-1]) < visit_id: | ||||
self._origin_visits[origin_id-1].append(None) | self._origin_visits[origin_id-1].append(None) | ||||
▲ Show 20 Lines • Show All 359 Lines • Show Last 20 Lines |
there is behavior change here (no more ValueError raised), it could be documented in the commit message I guess.