Differential D2324 Diff 8704 swh/storage/in_memory.py

Changeset View

Standalone View

swh/storage/in_memory.py

# Copyright (C) 2015-2019 The Software Heritage developers		# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution		# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version		# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information		# See top-level LICENSE file for more information

import re		import re
import bisect		import bisect
import dateutil		import dateutil
import collections		import collections
import copy		import copy
import datetime		import datetime
import itertools		import itertools
import random		import random

from collections import defaultdict		from collections import defaultdict
from datetime import timedelta		from datetime import timedelta
from typing import Any, Dict, Mapping		from typing import Any, Dict, Mapping, Optional

import attr		import attr

from swh.model.model import \		from swh.model.model import \
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin		Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin
from swh.model.hashutil import DEFAULT_ALGORITHMS		from swh.model.hashutil import DEFAULT_ALGORITHMS
from swh.objstorage import get_objstorage		from swh.objstorage import get_objstorage
from swh.objstorage.exc import ObjNotFoundError		from swh.objstorage.exc import ObjNotFoundError
▲ Show 20 Lines • Show All 1,125 Lines • ▼ Show 20 Lines	def origin_get_range(self, origin_from=1, origin_count=100):
if origin_from <= len(self._origins_by_id):		if origin_from <= len(self._origins_by_id):
max_idx = origin_from + origin_count - 1		max_idx = origin_from + origin_count - 1
if max_idx > len(self._origins_by_id):		if max_idx > len(self._origins_by_id):
max_idx = len(self._origins_by_id)		max_idx = len(self._origins_by_id)
for idx in range(origin_from-1, max_idx):		for idx in range(origin_from-1, max_idx):
origin = self._convert_origin(		origin = self._convert_origin(
self._origins[self._origins_by_id[idx]])		self._origins[self._origins_by_id[idx]])
yield {'id': idx+1, **origin}		yield {'id': idx+1, **origin}

		ardumontUnsubmitted Done Inline Actions Why don't you add type here and not within the docstring? ardumont: Why don't you add type here and not within the docstring?
		vlorentzAuthorUnsubmitted Done Inline Actions Because I wrote that diff in mid-november and didn't touch it since; and we still used Python 3.5 at the time. I'll have to rebase it vlorentz: Because I wrote that diff in mid-november and didn't touch it since; and we still used Python 3.
		def origin_list(self, page_token: Optional[str] = None, limit: int = 100
		) -> dict:
		"""Returns the list of origins

		Args:
		ardumontUnsubmitted Not Done Inline Actions I thought this was to be named `count`? ardumont: I thought this was to be named `count`?
		vlorentzAuthorUnsubmitted Done Inline Actions Yeah I guess we're not consistent here. I have a small preference for `limit`, sorry for asking the change on the other diff, I didn't realize the inconsistency vlorentz: Yeah I guess we're not consistent here. I have a small preference for `limit`, sorry for asking…
		ardumontUnsubmitted Not Done Inline Actions Well, i don't mind ;) What's the conclusion though, shall we stay on limit or use count? I ask because i'd have to change back in the other diff (uses count now). ardumont: Well, i don't mind ;) What's the conclusion though, shall we stay on limit or use count? I ask…
		page_token: opaque token used for pagination.
		limit: the maximum number of results to return

		Returns:
		dict: dict with the following keys:
		ardumontUnsubmitted Done Inline Actions retrieving ardumont: retrieving
		- next_page_token (str, optional): opaque token to be used as
		`page_token` for retrieving the next page. if absent, there is
		no more pages to gather.
		- origins (List[dict]): list of origins, as returned by
		`origin_get`.
		"""
		origin_urls = sorted(self._origins)
		if page_token:
		from_ = bisect.bisect_left(origin_urls, page_token)
		else:
		from_ = 0

		result = {
		'origins': [{'url': origin_url}
		for origin_url in origin_urls[from_:from_+limit]]
		}

		if from_+limit < len(origin_urls):
		result['next_page_token'] = origin_urls[from_+limit]

		return result

def origin_search(self, url_pattern, offset=0, limit=50,		def origin_search(self, url_pattern, offset=0, limit=50,
regexp=False, with_visit=False, db=None, cur=None):		regexp=False, with_visit=False, db=None, cur=None):
"""Search for origins whose urls contain a provided string pattern		"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.		or match a provided regular expression.
The search is performed in a case insensitive way.		The search is performed in a case insensitive way.

Args:		Args:
url_pattern (str): the string pattern to search for in origin urls		url_pattern (str): the string pattern to search for in origin urls
▲ Show 20 Lines • Show All 615 Lines • Show Last 20 Lines