Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 1,684 Lines • ▼ Show 20 Lines | def origin_get_range(self, origin_from=1, origin_count=100, | ||||
Yields: | Yields: | ||||
dicts containing origin information as returned | dicts containing origin information as returned | ||||
by :meth:`swh.storage.storage.Storage.origin_get`. | by :meth:`swh.storage.storage.Storage.origin_get`. | ||||
""" | """ | ||||
for origin in db.origin_get_range(origin_from, origin_count, cur): | for origin in db.origin_get_range(origin_from, origin_count, cur): | ||||
yield dict(zip(db.origin_get_range_cols, origin)) | yield dict(zip(db.origin_get_range_cols, origin)) | ||||
@remote_api_endpoint('origin/list') | |||||
@timed | |||||
@db_transaction() | |||||
def origin_list(self, page_token=None, limit=100, *, db, cur): | |||||
"""Returns the list of origins | |||||
Args: | |||||
page_token (Optional[str]): opaque token used for pagination. | |||||
limit (int): the maximum number of results to return | |||||
Returns: | |||||
dict: dict with the following keys: | |||||
- **next_page_token** (str, optional): opaque token to be used as | |||||
`page_token` for retrieveing the next page. if absent, there is | |||||
no more pages to gather. | |||||
ardumont: same remark than for in-memory implementation (type, name, typo). | |||||
- **origins** (List[dict]): list of origins, as returned by | |||||
`origin_get`. | |||||
""" | |||||
Not Done Inline ActionsWhy do we force the string type here? ardumont: Why do we force the string type here?
| |||||
Done Inline ActionsBecause it's an opaque token, so clients shouldn't know what it actually contains so we can change it at any time. (eg. for Cassandra, it will be a large byte array) vlorentz: Because it's an opaque token, so clients shouldn't know what it actually contains so we can… | |||||
page_token = page_token or '0' | |||||
if not isinstance(page_token, str): | |||||
raise TypeError('page_token must be a string.') | |||||
origin_from = int(page_token) | |||||
result = { | |||||
'origins': [ | |||||
dict(zip(db.origin_get_range_cols, origin)) | |||||
for origin in db.origin_get_range(origin_from, limit, cur) | |||||
], | |||||
} | |||||
assert len(result['origins']) <= limit | |||||
if len(result['origins']) == limit: | |||||
result['next_page_token'] = str(result['origins'][limit-1]['id']+1) | |||||
Not Done Inline ActionsWhy don't you change the db.origin_get_range implementation so that it retuns what you want instead? ardumont: Why don't you change the `db.origin_get_range` implementation so that it retuns what you want… | |||||
Done Inline ActionsBecause I need the origin id to compute the next_page_token. vlorentz: Because I need the origin id to compute the `next_page_token`. | |||||
for origin in result['origins']: | |||||
del origin['id'] | |||||
return result | |||||
@remote_api_endpoint('origin/search') | @remote_api_endpoint('origin/search') | ||||
@timed | @timed | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, with_visit=False, db=None, cur=None): | regexp=False, with_visit=False, db=None, cur=None): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The search is performed in a case insensitive way. | The search is performed in a case insensitive way. | ||||
▲ Show 20 Lines • Show All 347 Lines • Show Last 20 Lines |
same remark than for in-memory implementation (type, name, typo).