Page MenuHomeSoftware Heritage

D1090.id3429.diff
No OneTemporary

D1090.id3429.diff

diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -511,6 +511,37 @@
return with_indexed_data
+def origin_get_params(id_):
+ """From any of the two types of origin identifiers (int or
+ type+url), returns a dict that can be passed to Storage.origin_get.
+ Also accepts JSON-encoded forms of these (used via the task scheduler).
+
+ >>> from pprint import pprint
+ >>> origin_get_params(123)
+ {'id': 123}
+ >>> pprint(origin_get_params(['git', 'https://example.com/foo.git']))
+ {'type': 'git', 'url': 'https://example.com/foo.git'}
+ >>> origin_get_params("123")
+ {'id': 123}
+ >>> pprint(origin_get_params('["git", "https://example.com/foo.git"]'))
+ {'type': 'git', 'url': 'https://example.com/foo.git'}
+ """
+ if isinstance(id_, str):
+ # Data coming from JSON, which requires string keys, so
+ # one extra level of deserialization is needed
+ id_ = ast.literal_eval(id_)
+ if isinstance(id_, (tuple, list)):
+ if len(id_) != 2:
+ raise TypeError('Expected a (type, url) tuple.')
+ (type_, url) = id_
+ params = {'type': type_, 'url': url}
+ elif isinstance(id_, int):
+ params = {'id': id_}
+ else:
+ raise TypeError('Invalid value in "ids": %r' % id_)
+ return params
+
+
class OriginIndexer(BaseIndexer):
"""An object type indexer, inherits from the :class:`BaseIndexer` and
implements Origin indexing using the run method
@@ -545,37 +576,33 @@
ids = [o.split('+', 1) if ':' in o else int(o) # type+url or id
for o in ids]
- results = []
-
+ origins = []
for id_ in ids:
- if isinstance(id_, str):
- # Data coming from JSON, which requires string keys, so
- # one extra level of deserialization is needed
- id_ = ast.literal_eval(id_)
- if isinstance(id_, (tuple, list)):
- if len(id_) != 2:
- raise TypeError('Expected a (type, url) tuple.')
- (type_, url) = id_
- params = {'type': type_, 'url': url}
- elif isinstance(id_, int):
- params = {'id': id_}
- else:
- raise TypeError('Invalid value in "ids": %r' % id_)
- origin = self.storage.origin_get(params)
+ origin = self.storage.origin_get(origin_get_params(id_))
if not origin:
self.log.warning('Origin %s not found in storage' %
id_)
continue
+ origins.append(origin)
+
+ results = self.index_list(origins, **kwargs)
+
+ self.persist_index_computations(results, policy_update)
+ self.results = results
+ return self.next_step(results, task=next_step)
+
+ def index_list(self, origins, **kwargs):
+ results = []
+ for origin in origins:
try:
res = self.index(origin, **kwargs)
if res: # If no results, skip it
results.append(res)
except Exception:
self.log.exception(
- 'Problem when processing origin %s' % (id_,))
- self.persist_index_computations(results, policy_update)
- self.results = results
- return self.next_step(results, task=next_step)
+ 'Problem when processing origin %s',
+ origin['id'])
+ return results
class RevisionIndexer(BaseIndexer):

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:37 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220588

Event Timeline