Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/storage.py
Show First 20 Lines • Show All 699 Lines • ▼ Show 20 Lines | def origin_get_by_sha1(self, sha1s): | ||||
for sha1 in sha1s: | for sha1 in sha1s: | ||||
rows = self._cql_runner.origin_get_by_sha1(sha1) | rows = self._cql_runner.origin_get_by_sha1(sha1) | ||||
if rows: | if rows: | ||||
results.append({"url": rows.one().url}) | results.append({"url": rows.one().url}) | ||||
else: | else: | ||||
results.append(None) | results.append(None) | ||||
return results | return results | ||||
def origin_list(self, page_token: Optional[str] = None, limit: int = 100) -> dict: | def origin_list( | ||||
self, page_token: Optional[str] = None, limit: int = 100 | |||||
) -> PagedResult[Origin]: | |||||
# Compute what token to begin the listing from | # Compute what token to begin the listing from | ||||
start_token = TOKEN_BEGIN | start_token = TOKEN_BEGIN | ||||
if page_token: | if page_token: | ||||
start_token = int(page_token) | start_token = int(page_token) | ||||
if not (TOKEN_BEGIN <= start_token <= TOKEN_END): | if not (TOKEN_BEGIN <= start_token <= TOKEN_END): | ||||
raise StorageArgumentException("Invalid page_token.") | raise StorageArgumentException("Invalid page_token.") | ||||
next_page_token = None | |||||
rows = self._cql_runner.origin_list(start_token, limit) | origins = [] | ||||
rows = list(rows) | # Take one more origin so we can reuse it as the next page token if any | ||||
for row in self._cql_runner.origin_list(start_token, limit + 1): | |||||
origins.append(Origin(url=row.url)) | |||||
# keep reference of the last id for pagination purposes | |||||
last_id = row.tok | |||||
if len(origins) > limit: | |||||
# last origin id is the next page token | |||||
next_page_token = str(last_id) | |||||
# excluding that origin from the result to respect the limit size | |||||
origins = origins[:limit] | |||||
if len(rows) == limit: | assert (len(origins)) <= limit | ||||
next_page_token: Optional[str] = str(rows[-1].tok + 1) | |||||
else: | |||||
next_page_token = None | |||||
return { | return PagedResult(results=origins, next_page_token=next_page_token) | ||||
"origins": [{"url": row.url} for row in rows], | |||||
"next_page_token": next_page_token, | |||||
} | |||||
def origin_search( | def origin_search( | ||||
self, url_pattern, offset=0, limit=50, regexp=False, with_visit=False | self, url_pattern, offset=0, limit=50, regexp=False, with_visit=False | ||||
): | ): | ||||
# TODO: remove this endpoint, swh-search should be used instead. | # TODO: remove this endpoint, swh-search should be used instead. | ||||
origins = self._cql_runner.origin_iter_all() | origins = self._cql_runner.origin_iter_all() | ||||
if regexp: | if regexp: | ||||
pat = re.compile(url_pattern) | pat = re.compile(url_pattern) | ||||
▲ Show 20 Lines • Show All 459 Lines • Show Last 20 Lines |