Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/indexer.py
Show First 20 Lines • Show All 255 Lines • ▼ Show 20 Lines | def next_step(self, results, task): | ||||
(This is not an abstractmethod since it is optional). | (This is not an abstractmethod since it is optional). | ||||
Args: | Args: | ||||
results ([result]): List of results (dict) as returned | results ([result]): List of results (dict) as returned | ||||
by index function. | by index function. | ||||
task (dict): a dict in the form expected by | task (dict): a dict in the form expected by | ||||
`scheduler.backend.SchedulerBackend.create_tasks` | `scheduler.backend.SchedulerBackend.create_tasks` | ||||
without `next_run`, plus a `result_name` key. | without `next_run`, plus an optional `result_name` key. | ||||
Returns: | Returns: | ||||
None | None | ||||
""" | """ | ||||
if task: | if task: | ||||
if getattr(self, 'scheduler', None): | if getattr(self, 'scheduler', None): | ||||
scheduler = self.scheduler | scheduler = self.scheduler | ||||
else: | else: | ||||
scheduler = get_scheduler(**self.config['scheduler']) | scheduler = get_scheduler(**self.config['scheduler']) | ||||
task = deepcopy(task) | task = deepcopy(task) | ||||
result_name = task.pop('result_name') | result_name = task.pop('result_name', None) | ||||
task['next_run'] = datetime.datetime.now() | task['next_run'] = datetime.datetime.now() | ||||
if result_name: | |||||
task['arguments']['kwargs'][result_name] = self.results | task['arguments']['kwargs'][result_name] = self.results | ||||
scheduler.create_tasks([task]) | scheduler.create_tasks([task]) | ||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def run(self, ids, policy_update, | def run(self, ids, policy_update, | ||||
next_step=None, **kwargs): | next_step=None, **kwargs): | ||||
"""Given a list of ids: | """Given a list of ids: | ||||
- retrieves the data from the storage | - retrieves the data from the storage | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | def run(self, ids, policy_update, | ||||
Args: | Args: | ||||
ids ([bytes]): sha1's identifier list | ids ([bytes]): sha1's identifier list | ||||
policy_update (str): either 'update-dups' or 'ignore-dups' to | policy_update (str): either 'update-dups' or 'ignore-dups' to | ||||
respectively update duplicates or ignore | respectively update duplicates or ignore | ||||
them | them | ||||
next_step (dict): a dict in the form expected by | next_step (dict): a dict in the form expected by | ||||
`scheduler.backend.SchedulerBackend.create_tasks` | `scheduler.backend.SchedulerBackend.create_tasks` | ||||
without `next_run`, plus a `result_name` key. | without `next_run`, plus an optional `result_name` key. | ||||
**kwargs: passed to the `index` method | **kwargs: passed to the `index` method | ||||
""" | """ | ||||
results = [] | results = [] | ||||
try: | try: | ||||
for sha1 in ids: | for sha1 in ids: | ||||
try: | try: | ||||
raw_content = self.objstorage.get(sha1) | raw_content = self.objstorage.get(sha1) | ||||
▲ Show 20 Lines • Show All 147 Lines • ▼ Show 20 Lines | def run(self, ids, policy_update='update-dups', parse_ids=True, | ||||
Args: | Args: | ||||
ids ([Union[int, Tuple[str, bytes]]]): list of origin ids or | ids ([Union[int, Tuple[str, bytes]]]): list of origin ids or | ||||
(type, url) tuples. | (type, url) tuples. | ||||
policy_update (str): either 'update-dups' or 'ignore-dups' to | policy_update (str): either 'update-dups' or 'ignore-dups' to | ||||
respectively update duplicates (default) | respectively update duplicates (default) | ||||
or ignore them | or ignore them | ||||
next_step (dict): a dict in the form expected by | next_step (dict): a dict in the form expected by | ||||
`scheduler.backend.SchedulerBackend.create_tasks` | `scheduler.backend.SchedulerBackend.create_tasks` | ||||
without `next_run`, plus a `result_name` key. | without `next_run`, plus an optional `result_name` key. | ||||
parse_ids (bool): Do we need to parse id or not (default) | parse_ids (bool): Do we need to parse id or not (default) | ||||
**kwargs: passed to the `index` method | **kwargs: passed to the `index` method | ||||
""" | """ | ||||
if parse_ids: | if parse_ids: | ||||
ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id | ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id | ||||
for o in ids] | for o in ids] | ||||
▲ Show 20 Lines • Show All 73 Lines • Show Last 20 Lines |