Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 692 Lines • ▼ Show 20 Lines | def snapshot_add(self, snapshots, origin=None, visit=None): | ||||
snapshot_added: Count of object actually stored in db | snapshot_added: Count of object actually stored in db | ||||
""" | """ | ||||
if origin: | if origin: | ||||
if not visit: | if not visit: | ||||
raise TypeError( | raise TypeError( | ||||
'snapshot_add expects one argument (or, as a legacy ' | 'snapshot_add expects one argument (or, as a legacy ' | ||||
'behavior, three arguments), not two') | 'behavior, three arguments), not two') | ||||
if isinstance(snapshots, (int, bytes)): | if isinstance(snapshots, (int, str)): | ||||
# Called by legacy code that uses the new api/client.py | # Called by legacy code that uses the new api/client.py | ||||
(origin, visit, snapshots) = \ | (origin, visit, snapshots) = \ | ||||
(snapshots, origin, [visit]) | (snapshots, origin, [visit]) | ||||
else: | else: | ||||
# Called by legacy code that uses the old api/client.py | # Called by legacy code that uses the old api/client.py | ||||
snapshots = [snapshots] | snapshots = [snapshots] | ||||
count = 0 | count = 0 | ||||
▲ Show 20 Lines • Show All 100 Lines • ▼ Show 20 Lines | def snapshot_get_latest(self, origin, allowed_statuses=None): | ||||
* **id**: identifier of the snapshot | * **id**: identifier of the snapshot | ||||
* **branches**: a dict of branches contained in the snapshot | * **branches**: a dict of branches contained in the snapshot | ||||
whose keys are the branches' names. | whose keys are the branches' names. | ||||
* **next_branch**: the name of the first branch not returned | * **next_branch**: the name of the first branch not returned | ||||
or :const:`None` if the snapshot has less than 1000 | or :const:`None` if the snapshot has less than 1000 | ||||
branches. | branches. | ||||
""" | """ | ||||
if isinstance(origin, int): | if isinstance(origin, int): | ||||
origin = self.origin_get({'id': origin})['url'] | origin = self.origin_get({'id': origin}) | ||||
if not origin: | |||||
return | |||||
origin = origin['url'] | |||||
visit = self.origin_visit_get_latest( | visit = self.origin_visit_get_latest( | ||||
origin, allowed_statuses=allowed_statuses, require_snapshot=True) | origin, allowed_statuses=allowed_statuses, require_snapshot=True) | ||||
if visit and visit['snapshot']: | if visit and visit['snapshot']: | ||||
snapshot = self.snapshot_get(visit['snapshot']) | snapshot = self.snapshot_get(visit['snapshot']) | ||||
if not snapshot: | if not snapshot: | ||||
raise ValueError( | raise ValueError( | ||||
'last origin visit references an unknown snapshot') | 'last origin visit references an unknown snapshot') | ||||
▲ Show 20 Lines • Show All 442 Lines • ▼ Show 20 Lines | def origin_visit_upsert(self, visits): | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (sha1_git): identifier of the snapshot to add to | snapshot (sha1_git): identifier of the snapshot to add to | ||||
the visit | the visit | ||||
""" | """ | ||||
visits = copy.deepcopy(visits) | visits = copy.deepcopy(visits) | ||||
for visit in visits: | for visit in visits: | ||||
if isinstance(visit['date'], str): | if isinstance(visit['date'], str): | ||||
visit['date'] = dateutil.parser.parse(visit['date']) | visit['date'] = dateutil.parser.parse(visit['date']) | ||||
if isinstance(visit['origin'], str): | |||||
origin = \ | |||||
self.origin_get([{'url': visit['origin']}])[0] | |||||
if not origin: | |||||
raise ValueError('Unknown origin: %s' % visit['origin']) | |||||
visit['origin'] = origin['id'] | |||||
if self.journal_writer: | if self.journal_writer: | ||||
for visit in visits: | for visit in visits: | ||||
visit = visit.copy() | visit = visit.copy() | ||||
visit['origin'] = self.origin_get([{'id': visit['origin']}])[0] | visit['origin'] = self.origin_get([{'id': visit['origin']}])[0] | ||||
del visit['origin']['id'] | del visit['origin']['id'] | ||||
self.journal_writer.write_addition('origin_visit', visit) | self.journal_writer.write_addition('origin_visit', visit) | ||||
for visit in visits: | for visit in visits: | ||||
origin_id = visit['origin'] | origin_id = visit['origin'] | ||||
visit_id = visit['visit'] | visit_id = visit['visit'] | ||||
self._objects[(origin_id, visit_id)].append( | self._objects[(origin_id, visit_id)].append( | ||||
('origin_visit', None)) | ('origin_visit', None)) | ||||
while len(self._origin_visits[origin_id-1]) < visit_id: | while len(self._origin_visits[origin_id-1]) < visit_id: | ||||
self._origin_visits[origin_id-1].append(None) | self._origin_visits[origin_id-1].append(None) | ||||
visit = visit.copy() | |||||
visit['origin'] = origin_id | |||||
visit = self._origin_visits[origin_id-1][visit_id-1] = visit | visit = self._origin_visits[origin_id-1][visit_id-1] = visit | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None): | def origin_visit_get(self, origin, last_visit=None, limit=None): | ||||
"""Retrieve all the origin's visit's information. | """Retrieve all the origin's visit's information. | ||||
Args: | Args: | ||||
origin (int): the origin's identifier | origin (int): the origin's identifier | ||||
last_visit (int): visit's id from which listing the next ones, | last_visit (int): visit's id from which listing the next ones, | ||||
Show All 30 Lines | def origin_visit_find_by_date(self, origin, visit_date): | ||||
Args: | Args: | ||||
origin (str): The occurrence's origin (URL). | origin (str): The occurrence's origin (URL). | ||||
target (datetime): target timestamp | target (datetime): target timestamp | ||||
Returns: | Returns: | ||||
A visit. | A visit. | ||||
""" | """ | ||||
origin = self.origin_get([{'url': origin}])[0]['id'] | origin = self.origin_get([{'url': origin}])[0] | ||||
if not origin: | |||||
return | |||||
origin = origin['id'] | |||||
if origin <= len(self._origin_visits): | if origin <= len(self._origin_visits): | ||||
visits = self._origin_visits[origin-1] | visits = self._origin_visits[origin-1] | ||||
return min( | return min( | ||||
visits, | visits, | ||||
key=lambda v: (abs(v['date'] - visit_date), -v['visit'])) | key=lambda v: (abs(v['date'] - visit_date), -v['visit'])) | ||||
def origin_visit_get_by(self, origin, visit): | def origin_visit_get_by(self, origin, visit): | ||||
"""Retrieve origin visit's information. | """Retrieve origin visit's information. | ||||
Args: | Args: | ||||
origin (int): the origin's identifier | origin (int): the origin's identifier | ||||
Returns: | Returns: | ||||
The information on that particular (origin, visit) or None if | The information on that particular (origin, visit) or None if | ||||
it does not exist | it does not exist | ||||
""" | """ | ||||
if isinstance(origin, str): | if isinstance(origin, str): | ||||
origin = self.origin_get({'url': origin})['id'] | origin = self.origin_get({'url': origin}) | ||||
if not origin: | |||||
return | |||||
origin = origin['id'] | |||||
origin_visit = None | origin_visit = None | ||||
if origin <= len(self._origin_visits) and \ | if origin <= len(self._origin_visits) and \ | ||||
visit <= len(self._origin_visits[origin-1]): | visit <= len(self._origin_visits[origin-1]): | ||||
origin_visit = self._origin_visits[origin-1][visit-1] | origin_visit = self._origin_visits[origin-1][visit-1] | ||||
return copy.deepcopy(origin_visit) | return copy.deepcopy(origin_visit) | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, origin, allowed_statuses=None, require_snapshot=False): | self, origin, allowed_statuses=None, require_snapshot=False): | ||||
Show All 16 Lines | def origin_visit_get_latest( | ||||
visit: origin visit id | visit: origin visit id | ||||
type: type of loader used for the visit | type: type of loader used for the visit | ||||
date: timestamp of such visit | date: timestamp of such visit | ||||
status: Visit's new status | status: Visit's new status | ||||
metadata: Data associated to the visit | metadata: Data associated to the visit | ||||
snapshot (Optional[sha1_git]): identifier of the snapshot | snapshot (Optional[sha1_git]): identifier of the snapshot | ||||
associated to the visit | associated to the visit | ||||
""" | """ | ||||
origin = self.origin_get({'url': origin})['id'] | origin = self.origin_get({'url': origin}) | ||||
if not origin: | |||||
return | |||||
origin = origin['id'] | |||||
visits = self._origin_visits[origin-1] | visits = self._origin_visits[origin-1] | ||||
if allowed_statuses is not None: | if allowed_statuses is not None: | ||||
visits = [visit for visit in visits | visits = [visit for visit in visits | ||||
if visit['status'] in allowed_statuses] | if visit['status'] in allowed_statuses] | ||||
if require_snapshot: | if require_snapshot: | ||||
visits = [visit for visit in visits | visits = [visit for visit in visits | ||||
if visit['snapshot']] | if visit['snapshot']] | ||||
▲ Show 20 Lines • Show All 240 Lines • Show Last 20 Lines |