Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9341083
D654.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
D654.diff
View Options
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -110,6 +110,48 @@
for obj in db.content_mimetype_missing_from_list(mimetypes, cur):
yield obj[0]
+ @remote_api_endpoint('content_mimetype/range')
+ @db_transaction()
+ def content_mimetype_range(self, start, end, indexer_configuration_id,
+ limit=1000, db=None, cur=None):
+ """Retrieve mimetypes within range [start, end] bound by limit.
+
+ Args:
+ **start** (bytes): Starting identifier range (expected smaller
+ than end)
+ **end** (bytes): Ending identifier range (expected larger
+ than start)
+ **indexer_configuration_id** (int): The tool used to indexed data
+ **limit** (int): Limit result (default to 1000)
+
+ Returns:
+ a dict with keys:
+ - contents [dict]: iterable of contents in between the range.
+ - next (bytes): There remains content in the range
+ starting from this next sha1
+
+ Yields:
+ an iterable of mimetypes within the specified range
+
+ """
+ if limit is None:
+ raise ValueError('Development error: limit should not be None')
+
+ ids = []
+ next_id = None
+ for counter, obj in enumerate(db.content_get_range(
+ 'content_mimetype', start, end,
+ limit=limit+1, cur=cur)):
+ _id = obj[0]
+ if counter >= limit:
+ next_id = _id
+ ids.append(_id)
+
+ return {
+ 'ids': ids,
+ 'next_id': next_id
+ }
+
@remote_api_endpoint('content_mimetype/add')
@db_transaction()
def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -113,6 +113,38 @@
((_id,) for _id in ids)
)
+ content_tables = {
+ 'mimetype': 'content_mimetype',
+ 'language': 'content_language',
+ }
+
+ def content_get_range(self, content_type, start, end,
+ indexer_configuration_id, limit=1000, cur=None):
+ """Retrieve contents with content_type, within range [start, end]
+ bound by limit and associated to the given indexer
+ configuration id.
+
+ """
+ cur = self._cursor(cur)
+ table = self.content_tables.get(content_type)
+ if not table:
+ raise ValueError(
+ 'Development error: Wrong type. Should be one of [%s]' % (
+ ','.join(self.content_tables)))
+ query = """select %s
+ from %s t
+ inner join indexer_configuration ic
+ on t.indexer_configuration_id=ic.id
+ where tc.id=%%s and
+ %%s <= t.sha1 and t.sha1 <= %%s
+ order by t.indexer_configuration_id, t.sha1
+ limit %%s""" % (
+ ', '.join(self.content_get_metadata_keys), # keys
+ table
+ )
+ cur.execute(query, (start, end, indexer_configuration_id, limit))
+ yield from cursor_to_bytes(cur)
+
def content_mimetype_get_from_list(self, ids, cur=None):
yield from self._get_from_list(
'content_mimetype', ids, self.content_mimetype_cols, cur=cur)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 11:37 AM (3 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218725
Attached To
D654: swh.indexer.storage: Open content_mimetype_get_range
Event Timeline
Log In to Comment