diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -521,6 +521,22 @@ class. """ + def origin_get_params(self, id_): + if isinstance(id_, str): + # Data coming from JSON, which requires string keys, so + # one extra level of deserialization is needed + id_ = ast.literal_eval(id_) + if isinstance(id_, (tuple, list)): + if len(id_) != 2: + raise TypeError('Expected a (type, url) tuple.') + (type_, url) = id_ + params = {'type': type_, 'url': url} + elif isinstance(id_, int): + params = {'id': id_} + else: + raise TypeError('Invalid value in "ids": %r' % id_) + return params + def run(self, ids, policy_update='update-dups', parse_ids=True, next_step=None, **kwargs): """Given a list of origin ids: @@ -545,37 +561,33 @@ ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id for o in ids] - results = [] - + origins = [] for id_ in ids: - if isinstance(id_, str): - # Data coming from JSON, which requires string keys, so - # one extra level of deserialization is needed - id_ = ast.literal_eval(id_) - if isinstance(id_, (tuple, list)): - if len(id_) != 2: - raise TypeError('Expected a (type, url) tuple.') - (type_, url) = id_ - params = {'type': type_, 'url': url} - elif isinstance(id_, int): - params = {'id': id_} - else: - raise TypeError('Invalid value in "ids": %r' % id_) - origin = self.storage.origin_get(params) + origin = self.storage.origin_get(self.origin_get_params(id_)) if not origin: self.log.warning('Origin %s not found in storage' % id_) continue + origins.append(origin) + + results = self.index_list(origins, **kwargs) + + self.persist_index_computations(results, policy_update) + self.results = results + return self.next_step(results, task=next_step) + + def index_list(self, origins, **kwargs): + results = [] + for origin in origins: try: res = self.index(origin, **kwargs) if res: # If no results, skip it results.append(res) except Exception: self.log.exception( - 'Problem when processing origin %s' % (id_,)) - self.persist_index_computations(results, policy_update) - self.results = results - return self.next_step(results, task=next_step) + 'Problem when processing origin %s', + origin['id']) + return results class RevisionIndexer(BaseIndexer):