Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/storage/__init__.py
Show First 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | def content_mimetype_missing(self, mimetypes, db=None, cur=None): | ||||
Yields: | Yields: | ||||
tuple (id, indexer_configuration_id): missing id | tuple (id, indexer_configuration_id): missing id | ||||
""" | """ | ||||
for obj in db.content_mimetype_missing_from_list(mimetypes, cur): | for obj in db.content_mimetype_missing_from_list(mimetypes, cur): | ||||
yield obj[0] | yield obj[0] | ||||
@remote_api_endpoint('content_mimetype/range') | |||||
@db_transaction() | |||||
def content_mimetype_range(self, start, end, indexer_configuration_id, | |||||
limit=1000, db=None, cur=None): | |||||
"""Retrieve mimetypes within range [start, end] bound by limit. | |||||
Args: | |||||
**start** (bytes): Starting identifier range (expected smaller | |||||
than end) | |||||
**end** (bytes): Ending identifier range (expected larger | |||||
than start) | |||||
**indexer_configuration_id** (int): The tool used to indexed data | |||||
**limit** (int): Limit result (default to 1000) | |||||
Returns: | |||||
a dict with keys: | |||||
- contents [dict]: iterable of contents in between the range. | |||||
- next (bytes): There remains content in the range | |||||
starting from this next sha1 | |||||
Yields: | |||||
an iterable of mimetypes within the specified range | |||||
""" | |||||
vlorentz: `Optional[bytes]` means it's either `None` or an instance of `bytes`. You should probably use… | |||||
Done Inline Actionsnext is either None or an instance of bytes. ardumont: next is either None or an instance of bytes. | |||||
Not Done Inline ActionsOh sorry, I misread the description. I assumed from "The next range" that it was a range -_- vlorentz: Oh sorry, I misread the description. I assumed from "The next range" that it was a range -_- | |||||
if limit is None: | |||||
raise ValueError('Development error: limit should not be None') | |||||
ids = [] | |||||
next_id = None | |||||
for counter, obj in enumerate(db.content_get_range( | |||||
'content_mimetype', start, end, | |||||
Not Done Inline ActionsShould be content_types, not content_tables. Also, that means this line is not tested. vlorentz: Should be `content_types`, not `content_tables`. Also, that means this line is not tested. | |||||
Done Inline ActionsNice catch. ardumont: Nice catch. | |||||
Done Inline Actionsfyi, that conditional is not tested... because i don't know how to test due to the db thing... ardumont: fyi, that conditional is not tested... because i don't know how to test due to the `db` thing... | |||||
limit=limit+1, cur=cur)): | |||||
_id = obj[0] | |||||
if counter >= limit: | |||||
next_id = _id | |||||
ids.append(_id) | |||||
return { | |||||
Not Done Inline ActionsWhy the +1? vlorentz: Why the `+1`? | |||||
Done Inline ActionsTo take the last one as next (if any). ardumont: To take the last one as `next` (if any). | |||||
'ids': ids, | |||||
'next_id': next_id | |||||
} | |||||
@remote_api_endpoint('content_mimetype/add') | @remote_api_endpoint('content_mimetype/add') | ||||
@db_transaction() | @db_transaction() | ||||
def content_mimetype_add(self, mimetypes, conflict_update=False, db=None, | def content_mimetype_add(self, mimetypes, conflict_update=False, db=None, | ||||
cur=None): | cur=None): | ||||
"""Add mimetypes not present in storage. | """Add mimetypes not present in storage. | ||||
Args: | Args: | ||||
mimetypes (iterable): dictionaries with keys: | mimetypes (iterable): dictionaries with keys: | ||||
Show All 13 Lines | def content_mimetype_add(self, mimetypes, conflict_update=False, db=None, | ||||
['id', 'mimetype', 'encoding', 'indexer_configuration_id'], | ['id', 'mimetype', 'encoding', 'indexer_configuration_id'], | ||||
cur) | cur) | ||||
db.content_mimetype_add_from_temp(conflict_update, cur) | db.content_mimetype_add_from_temp(conflict_update, cur) | ||||
@remote_api_endpoint('content_mimetype') | @remote_api_endpoint('content_mimetype') | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
def content_mimetype_get(self, ids, db=None, cur=None): | def content_mimetype_get(self, ids, db=None, cur=None): | ||||
"""Retrieve full content mimetype per ids. | """Retrieve full content mimetype per ids. | ||||
Done Inline ActionsSame comment as on _content_get_range's doc. vlorentz: Same comment as on `_content_get_range`'s doc. | |||||
Done Inline Actionssame answer, next is an Optional[bytes]. ardumont: same answer, `next` is an Optional[bytes]. | |||||
Args: | Args: | ||||
ids (iterable): sha1 identifier | ids (iterable): sha1 identifier | ||||
Yields: | Yields: | ||||
mimetypes (iterable): dictionaries with keys: | mimetypes (iterable): dictionaries with keys: | ||||
- **id** (bytes): sha1 identifier | - **id** (bytes): sha1 identifier | ||||
- **mimetype** (bytes): raw content's mimetype | - **mimetype** (bytes): raw content's mimetype | ||||
▲ Show 20 Lines • Show All 457 Lines • Show Last 20 Lines |
Optional[bytes] means it's either None or an instance of bytes. You should probably use Optional[Tuple[bytes, bytes]].