Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 1,220 Lines • ▼ Show 20 Lines | def origin_visit_add(self, origin, date, type, | ||||
Returns: | Returns: | ||||
dict: dictionary with keys origin and visit where: | dict: dictionary with keys origin and visit where: | ||||
- origin: origin identifier | - origin: origin identifier | ||||
- visit: the visit identifier for the new visit occurrence | - visit: the visit identifier for the new visit occurrence | ||||
""" | """ | ||||
origin_url = origin | origin_url = origin | ||||
origin = self.origin_get({'url': origin_url}, db=db, cur=cur) | origin = self.origin_get({'url': origin_url}, db=db, cur=cur) | ||||
olasd: I guess these two lines aren't really needed anymore. You can also add an isinstance like you… | |||||
if isinstance(date, str): | if isinstance(date, str): | ||||
# FIXME: Converge on iso8601 at some point | # FIXME: Converge on iso8601 at some point | ||||
date = dateutil.parser.parse(date) | date = dateutil.parser.parse(date) | ||||
visit_id = db.origin_visit_add(origin_url, date, type, cur) | visit_id = db.origin_visit_add(origin_url, date, type, cur) | ||||
if self.journal_writer: | if self.journal_writer: | ||||
# We can write to the journal only after inserting to the | # We can write to the journal only after inserting to the | ||||
# DB, because we want the id of the visit | # DB, because we want the id of the visit | ||||
self.journal_writer.write_addition('origin_visit', { | self.journal_writer.write_addition('origin_visit', { | ||||
'origin': origin, 'date': date, 'type': type, | 'origin': origin_url, 'date': date, 'type': type, | ||||
'visit': visit_id, | 'visit': visit_id, | ||||
'status': 'ongoing', 'metadata': None, 'snapshot': None}) | 'status': 'ongoing', 'metadata': None, 'snapshot': None}) | ||||
return { | return { | ||||
'origin': origin_url, | 'origin': origin_url, | ||||
'visit': visit_id, | 'visit': visit_id, | ||||
} | } | ||||
Show All 10 Lines | def origin_visit_update(self, origin, visit_id, status=None, | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (sha1_git): identifier of the snapshot to add to | snapshot (sha1_git): identifier of the snapshot to add to | ||||
the visit | the visit | ||||
Returns: | Returns: | ||||
None | None | ||||
""" | """ | ||||
if not isinstance(origin, str): | |||||
raise TypeError('origin must be a string, not %r' % (origin,)) | |||||
origin_url = origin | origin_url = origin | ||||
visit = db.origin_visit_get(origin_url, visit_id, cur=cur) | visit = db.origin_visit_get(origin_url, visit_id, cur=cur) | ||||
if not visit: | if not visit: | ||||
raise ValueError('Invalid visit_id for this origin.') | raise ValueError('Invalid visit_id for this origin.') | ||||
visit = dict(zip(db.origin_visit_get_cols, visit)) | visit = dict(zip(db.origin_visit_get_cols, visit)) | ||||
updates = {} | updates = {} | ||||
if status and status != visit['status']: | if status and status != visit['status']: | ||||
updates['status'] = status | updates['status'] = status | ||||
if metadata and metadata != visit['metadata']: | if metadata and metadata != visit['metadata']: | ||||
updates['metadata'] = metadata | updates['metadata'] = metadata | ||||
if snapshot and snapshot != visit['snapshot']: | if snapshot and snapshot != visit['snapshot']: | ||||
updates['snapshot'] = snapshot | updates['snapshot'] = snapshot | ||||
if updates: | if updates: | ||||
if self.journal_writer: | if self.journal_writer: | ||||
origin = self.origin_get( | |||||
[{'url': origin_url}], db=db, cur=cur)[0] | |||||
self.journal_writer.write_update('origin_visit', { | self.journal_writer.write_update('origin_visit', { | ||||
**visit, **updates, 'origin': origin}) | **visit, **updates}) | ||||
db.origin_visit_update(origin_url, visit_id, updates, cur) | db.origin_visit_update(origin_url, visit_id, updates, cur) | ||||
@db_transaction() | @db_transaction() | ||||
def origin_visit_upsert(self, visits, db=None, cur=None): | def origin_visit_upsert(self, visits, db=None, cur=None): | ||||
"""Add a origin_visits with a specific id and with all its data. | """Add a origin_visits with a specific id and with all its data. | ||||
If there is already an origin_visit with the same | If there is already an origin_visit with the same | ||||
`(origin_id, visit_id)`, overwrites it. | `(origin_id, visit_id)`, overwrites it. | ||||
Args: | Args: | ||||
visits: iterable of dicts with keys: | visits: iterable of dicts with keys: | ||||
origin: dict with keys either `id` or `url` | origin: dict with keys either `id` or `url` | ||||
visit: origin visit id | visit: origin visit id | ||||
date: timestamp of such visit | date: timestamp of such visit | ||||
status: Visit's new status | status: Visit's new status | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (sha1_git): identifier of the snapshot to add to | snapshot (sha1_git): identifier of the snapshot to add to | ||||
the visit | the visit | ||||
""" | """ | ||||
visits = copy.deepcopy(visits) | visits = copy.deepcopy(visits) | ||||
for visit in visits: | for visit in visits: | ||||
if isinstance(visit['date'], str): | if isinstance(visit['date'], str): | ||||
visit['date'] = dateutil.parser.parse(visit['date']) | visit['date'] = dateutil.parser.parse(visit['date']) | ||||
visit['origin'] = \ | if not isinstance(visit['origin'], str): | ||||
self.origin_get([visit['origin']], db=db, cur=cur)[0] | raise TypeError("visit['origin'] must be a string, not %r" | ||||
% (visit['origin'],)) | |||||
if self.journal_writer: | if self.journal_writer: | ||||
for visit in visits: | for visit in visits: | ||||
visit = copy.deepcopy(visit) | |||||
if visit.get('type') is None: | |||||
visit['type'] = visit['origin']['type'] | |||||
self.journal_writer.write_addition('origin_visit', visit) | self.journal_writer.write_addition('origin_visit', visit) | ||||
for visit in visits: | for visit in visits: | ||||
visit['origin'] = visit['origin']['url'] | |||||
# TODO: upsert them all in a single query | # TODO: upsert them all in a single query | ||||
db.origin_visit_upsert(**visit, cur=cur) | db.origin_visit_upsert(**visit, cur=cur) | ||||
@db_transaction_generator(statement_timeout=500) | @db_transaction_generator(statement_timeout=500) | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | ||||
cur=None): | cur=None): | ||||
"""Retrieve all the origin's visit's information. | """Retrieve all the origin's visit's information. | ||||
▲ Show 20 Lines • Show All 496 Lines • Show Last 20 Lines |
I guess these two lines aren't really needed anymore. You can also add an isinstance like you did other methods.