Differential D2324 Diff 8688 swh/storage/storage.py

Changeset View

Standalone View

swh/storage/storage.py

Show First 20 Lines • Show All 1,684 Lines • ▼ Show 20 Lines	def origin_get_range(self, origin_from=1, origin_count=100,

Yields:		Yields:
dicts containing origin information as returned		dicts containing origin information as returned
by :meth:`swh.storage.storage.Storage.origin_get`.		by :meth:`swh.storage.storage.Storage.origin_get`.
"""		"""
for origin in db.origin_get_range(origin_from, origin_count, cur):		for origin in db.origin_get_range(origin_from, origin_count, cur):
yield dict(zip(db.origin_get_range_cols, origin))		yield dict(zip(db.origin_get_range_cols, origin))

		@remote_api_endpoint('origin/list')
		@timed
		@db_transaction()
		def origin_list(self, page_token=None, limit=100, *, db, cur):
		"""Returns the list of origins

		Args:
		page_token (Optional[str]): opaque token used for pagination.
		limit (int): the maximum number of results to return

		Returns:
		dict: dict with the following keys:
		- next_page_token (str, optional): opaque token to be used as
		`page_token` for retrieveing the next page. if absent, there is
		no more pages to gather.
		ardumontUnsubmitted Done Inline Actions same remark than for in-memory implementation (type, name, typo). ardumont: same remark than for in-memory implementation (type, name, typo).
		- origins (List[dict]): list of origins, as returned by
		`origin_get`.
		"""
		ardumontUnsubmitted Not Done Inline Actions Why do we force the string type here? ardumont: Why do we force the string type here?
		vlorentzAuthorUnsubmitted Done Inline Actions Because it's an opaque token, so clients shouldn't know what it actually contains so we can change it at any time. (eg. for Cassandra, it will be a large byte array) vlorentz: Because it's an opaque token, so clients shouldn't know what it actually contains so we can…
		page_token = page_token or '0'
		if not isinstance(page_token, str):
		raise TypeError('page_token must be a string.')
		origin_from = int(page_token)
		result = {
		'origins': [
		dict(zip(db.origin_get_range_cols, origin))
		for origin in db.origin_get_range(origin_from, limit, cur)
		],
		}

		assert len(result['origins']) <= limit
		if len(result['origins']) == limit:
		result['next_page_token'] = str(result['origins'][limit-1]['id']+1)
		ardumontUnsubmitted Not Done Inline Actions Why don't you change the `db.origin_get_range` implementation so that it retuns what you want instead? ardumont: Why don't you change the `db.origin_get_range` implementation so that it retuns what you want…
		vlorentzAuthorUnsubmitted Done Inline Actions Because I need the origin id to compute the `next_page_token`. vlorentz: Because I need the origin id to compute the `next_page_token`.

		for origin in result['origins']:
		del origin['id']

		return result

@remote_api_endpoint('origin/search')		@remote_api_endpoint('origin/search')
@timed		@timed
@db_transaction_generator()		@db_transaction_generator()
def origin_search(self, url_pattern, offset=0, limit=50,		def origin_search(self, url_pattern, offset=0, limit=50,
regexp=False, with_visit=False, db=None, cur=None):		regexp=False, with_visit=False, db=None, cur=None):
"""Search for origins whose urls contain a provided string pattern		"""Search for origins whose urls contain a provided string pattern
or match a provided regular expression.		or match a provided regular expression.
The search is performed in a case insensitive way.		The search is performed in a case insensitive way.
▲ Show 20 Lines • Show All 347 Lines • Show Last 20 Lines