Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import contextlib | import contextlib | ||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from typing import ( | from typing import ( | ||||
Any, | |||||
Counter, | Counter, | ||||
Dict, | Dict, | ||||
Iterable, | Iterable, | ||||
List, | List, | ||||
Optional, | Optional, | ||||
Tuple, | Tuple, | ||||
Union, | Union, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 1,056 Lines • ▼ Show 20 Lines | class Storage: | ||||
def origin_get_range(self, origin_from=1, origin_count=100, db=None, cur=None): | def origin_get_range(self, origin_from=1, origin_count=100, db=None, cur=None): | ||||
for origin in db.origin_get_range(origin_from, origin_count, cur): | for origin in db.origin_get_range(origin_from, origin_count, cur): | ||||
yield dict(zip(db.origin_get_range_cols, origin)) | yield dict(zip(db.origin_get_range_cols, origin)) | ||||
@timed | @timed | ||||
@db_transaction() | @db_transaction() | ||||
def origin_list( | def origin_list( | ||||
self, page_token: Optional[str] = None, limit: int = 100, *, db=None, cur=None | self, page_token: Optional[str] = None, limit: int = 100, *, db=None, cur=None | ||||
) -> dict: | ) -> PagedResult[Origin]: | ||||
page_token = page_token or "0" | page_token = page_token or "0" | ||||
if not isinstance(page_token, str): | if not isinstance(page_token, str): | ||||
raise StorageArgumentException("page_token must be a string.") | raise StorageArgumentException("page_token must be a string.") | ||||
origin_from = int(page_token) | origin_from = int(page_token) | ||||
result: Dict[str, Any] = { | next_page_token = None | ||||
"origins": [ | |||||
dict(zip(db.origin_get_range_cols, origin)) | |||||
for origin in db.origin_get_range(origin_from, limit, cur) | |||||
], | |||||
} | |||||
assert len(result["origins"]) <= limit | |||||
if len(result["origins"]) == limit: | |||||
result["next_page_token"] = str(result["origins"][limit - 1]["id"] + 1) | |||||
for origin in result["origins"]: | origins: List[Origin] = [] | ||||
del origin["id"] | # Take one more origin so we can reuse it as the next page token if any | ||||
for row_d in self.origin_get_range(origin_from, limit + 1, db=db, cur=cur): | |||||
origins.append(Origin(url=row_d["url"])) | |||||
# keep the last_id for the pagination if needed | |||||
last_id = row_d["id"] | |||||
if len(origins) > limit: # data left for subsequent call | |||||
# last origin id is the next page token | |||||
next_page_token = str(last_id) | |||||
# excluding that origin from the result to respect the limit size | |||||
origins = origins[:limit] | |||||
return result | assert len(origins) <= limit | ||||
return PagedResult(results=origins, next_page_token=next_page_token) | |||||
@timed | @timed | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
def origin_search( | def origin_search( | ||||
self, | self, | ||||
url_pattern, | url_pattern, | ||||
offset=0, | offset=0, | ||||
limit=50, | limit=50, | ||||
▲ Show 20 Lines • Show All 292 Lines • Show Last 20 Lines |