Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 1,269 Lines • ▼ Show 20 Lines | |||||
def origin_visit_upsert(self, visits, db=None, cur=None): | def origin_visit_upsert(self, visits, db=None, cur=None): | ||||
"""Add a origin_visits with a specific id and with all its data. | """Add a origin_visits with a specific id and with all its data. | ||||
If there is already an origin_visit with the same | If there is already an origin_visit with the same | ||||
`(origin_id, visit_id)`, overwrites it. | `(origin_id, visit_id)`, overwrites it. | ||||
Args: | Args: | ||||
visits: iterable of dicts with keys: | visits: iterable of dicts with keys: | ||||
origin: Visited Origin id | origin: dict with keys either `id` or `url` | ||||
visit: origin visit id | visit: origin visit id | ||||
date: timestamp of such visit | date: timestamp of such visit | ||||
status: Visit's new status | status: Visit's new status | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (sha1_git): identifier of the snapshot to add to | snapshot (sha1_git): identifier of the snapshot to add to | ||||
the visit | the visit | ||||
""" | """ | ||||
visits = copy.deepcopy(visits) | visits = copy.deepcopy(visits) | ||||
for visit in visits: | for visit in visits: | ||||
if isinstance(visit['date'], str): | if isinstance(visit['date'], str): | ||||
visit['date'] = dateutil.parser.parse(visit['date']) | visit['date'] = dateutil.parser.parse(visit['date']) | ||||
if isinstance(visit['origin'], str): | |||||
visit['origin'] = \ | visit['origin'] = \ | ||||
self.origin_get({'url': visit['origin']})['id'] | self.origin_get([visit['origin']], db=db, cur=cur)[0] | ||||
if self.journal_writer: | if self.journal_writer: | ||||
for visit in visits: | for visit in visits: | ||||
visit = visit.copy() | visit = copy.deepcopy(visit) | ||||
douardda: same question a above. | |||||
origin = self.origin_get( | |||||
[{'id': visit['origin']}], db=db, cur=cur)[0] | |||||
visit['origin'] = origin | |||||
if visit.get('type') is None: | if visit.get('type') is None: | ||||
visit['type'] = origin['type'] | visit['type'] = visit['origin']['type'] | ||||
del visit['origin']['id'] | del visit['origin']['id'] | ||||
self.journal_writer.write_addition('origin_visit', visit) | self.journal_writer.write_addition('origin_visit', visit) | ||||
for visit in visits: | for visit in visits: | ||||
visit['origin'] = visit['origin']['id'] | |||||
# TODO: upsert them all in a single query | # TODO: upsert them all in a single query | ||||
db.origin_visit_upsert(**visit, cur=cur) | db.origin_visit_upsert(**visit, cur=cur) | ||||
@db_transaction_generator(statement_timeout=500) | @db_transaction_generator(statement_timeout=500) | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | ||||
cur=None): | cur=None): | ||||
"""Retrieve all the origin's visit's information. | """Retrieve all the origin's visit's information. | ||||
▲ Show 20 Lines • Show All 599 Lines • Show Last 20 Lines |
same question a above.