Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import bisect | import bisect | ||||
import dateutil | import dateutil | ||||
import collections | import collections | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
import random | import random | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from datetime import timedelta | from datetime import timedelta | ||||
from typing import Any, Dict, Mapping | from typing import Any, Dict, Mapping, Optional | ||||
import attr | import attr | ||||
from swh.model.model import \ | from swh.model.model import \ | ||||
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin | Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS | from swh.model.hashutil import DEFAULT_ALGORITHMS | ||||
from swh.objstorage import get_objstorage | from swh.objstorage import get_objstorage | ||||
from swh.objstorage.exc import ObjNotFoundError | from swh.objstorage.exc import ObjNotFoundError | ||||
▲ Show 20 Lines • Show All 1,125 Lines • ▼ Show 20 Lines | def origin_get_range(self, origin_from=1, origin_count=100): | ||||
if origin_from <= len(self._origins_by_id): | if origin_from <= len(self._origins_by_id): | ||||
max_idx = origin_from + origin_count - 1 | max_idx = origin_from + origin_count - 1 | ||||
if max_idx > len(self._origins_by_id): | if max_idx > len(self._origins_by_id): | ||||
max_idx = len(self._origins_by_id) | max_idx = len(self._origins_by_id) | ||||
for idx in range(origin_from-1, max_idx): | for idx in range(origin_from-1, max_idx): | ||||
origin = self._convert_origin( | origin = self._convert_origin( | ||||
self._origins[self._origins_by_id[idx]]) | self._origins[self._origins_by_id[idx]]) | ||||
yield {'id': idx+1, **origin} | yield {'id': idx+1, **origin} | ||||
ardumont: Why don't you add type here and not within the docstring? | |||||
Done Inline ActionsBecause I wrote that diff in mid-november and didn't touch it since; and we still used Python 3.5 at the time. I'll have to rebase it vlorentz: Because I wrote that diff in mid-november and didn't touch it since; and we still used Python 3. | |||||
def origin_list(self, page_token: Optional[str] = None, limit: int = 100 | |||||
) -> dict: | |||||
"""Returns the list of origins | |||||
Args: | |||||
Not Done Inline ActionsI thought this was to be named count? ardumont: I thought this was to be named `count`? | |||||
Done Inline ActionsYeah I guess we're not consistent here. I have a small preference for limit, sorry for asking the change on the other diff, I didn't realize the inconsistency vlorentz: Yeah I guess we're not consistent here. I have a small preference for `limit`, sorry for asking… | |||||
Not Done Inline ActionsWell, i don't mind ;) What's the conclusion though, shall we stay on limit or use count? ardumont: Well, i don't mind ;)
What's the conclusion though, shall we stay on limit or use count?
I ask… | |||||
page_token: opaque token used for pagination. | |||||
limit: the maximum number of results to return | |||||
Returns: | |||||
dict: dict with the following keys: | |||||
Done Inline Actionsretrieving ardumont: retrieving | |||||
- **next_page_token** (str, optional): opaque token to be used as | |||||
`page_token` for retrieving the next page. if absent, there is | |||||
no more pages to gather. | |||||
- **origins** (List[dict]): list of origins, as returned by | |||||
`origin_get`. | |||||
""" | |||||
origin_urls = sorted(self._origins) | |||||
if page_token: | |||||
from_ = bisect.bisect_left(origin_urls, page_token) | |||||
else: | |||||
from_ = 0 | |||||
result = { | |||||
'origins': [{'url': origin_url} | |||||
for origin_url in origin_urls[from_:from_+limit]] | |||||
} | |||||
if from_+limit < len(origin_urls): | |||||
result['next_page_token'] = origin_urls[from_+limit] | |||||
return result | |||||
def origin_search(self, url_pattern, offset=0, limit=50, | def origin_search(self, url_pattern, offset=0, limit=50, | ||||
regexp=False, with_visit=False, db=None, cur=None): | regexp=False, with_visit=False, db=None, cur=None): | ||||
"""Search for origins whose urls contain a provided string pattern | """Search for origins whose urls contain a provided string pattern | ||||
or match a provided regular expression. | or match a provided regular expression. | ||||
The search is performed in a case insensitive way. | The search is performed in a case insensitive way. | ||||
Args: | Args: | ||||
url_pattern (str): the string pattern to search for in origin urls | url_pattern (str): the string pattern to search for in origin urls | ||||
▲ Show 20 Lines • Show All 615 Lines • Show Last 20 Lines |
Why don't you add type here and not within the docstring?