D2587.id.diff
No OneTemporary
Actions

Size

148 KB

Subscribers

None

D2587.id.diff
View Options

	diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
	--- a/swh/storage/api/client.py
	+++ b/swh/storage/api/client.py
	@@ -6,13 +6,13 @@
	from swh.core.api import RPCClient

	from ..exc import StorageAPIError
	-from ..storage import Storage
	+from ..interface import StorageInterface


	class RemoteStorage(RPCClient):
	"""Proxy to a remote storage API"""
	api_exception = StorageAPIError
	- backend_class = Storage
	+ backend_class = StorageInterface

	def reset(self):
	return self.post('reset', {})
	diff --git a/swh/storage/api/server.py b/swh/storage/api/server.py
	--- a/swh/storage/api/server.py
	+++ b/swh/storage/api/server.py
	@@ -12,7 +12,7 @@
	error_handler,
	encode_data_server as encode_data)

	-from ..storage import Storage
	+from ..interface import StorageInterface
	from ..metrics import timed


	@@ -25,7 +25,7 @@


	app = RPCServerApp(__name__,
	- backend_class=Storage,
	+ backend_class=StorageInterface,
	backend_factory=get_storage)
	storage = None

	diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
	--- a/swh/storage/in_memory.py
	+++ b/swh/storage/in_memory.py
	@@ -72,7 +72,6 @@
	self.objstorage = get_objstorage('memory', {})

	def check_config(self, *, check_write):
	- """Check that the storage is configured and ready to go."""
	return True

	def _content_add(self, contents, with_data):
	@@ -159,66 +158,18 @@
	return count

	def _content_to_model(self, contents):
	- """Takes a list of content dicts, optionally with an extra 'origin'
	- key, and yields tuples (model.Content, origin)."""
	for content in contents:
	content = content.copy()
	content.pop('origin', None)
	yield Content.from_dict(content)

	def content_add(self, content):
	- """Add content blobs to the storage
	-
	- Args:
	- content (iterable): iterable of dictionaries representing
	- individual pieces of content to add. Each dictionary has the
	- following keys:
	-
	- - data (bytes): the actual content
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.DEFAULT_ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	- - reason (str): if status = absent, the reason why
	- - origin (int): if status = absent, the origin we saw the
	- content in
	-
	- Raises:
	- HashCollision in case of collision
	-
	- Returns:
	- Summary dict with the following key and associated values:
	-
	- content:add: New contents added
	- content_bytes:add: Sum of the contents' length data
	- skipped_content:add: New skipped contents (no data) added
	-
	- """
	now = datetime.datetime.now(tz=datetime.timezone.utc)
	content = [attr.evolve(c, ctime=now)
	for c in self._content_to_model(content)]
	return self._content_add(content, with_data=True)

	def content_update(self, content, keys=[]):
	- """Update content blobs to the storage. Does nothing for unknown
	- contents or skipped ones.
	-
	- Args:
	- content (iterable): iterable of dictionaries representing
	- individual pieces of content to update. Each dictionary has the
	- following keys:
	-
	- - data (bytes): the actual content
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	-
	- keys (list): List of keys (str) whose values needs an update, e.g.,
	- new hash column
	- """
	if self.journal_writer:
	raise NotImplementedError(
	'content_update is not yet supported with a journal_writer.')
	@@ -243,58 +194,10 @@
	self._content_indexes[algorithm][hash_].add(new_key)

	def content_add_metadata(self, content):
	- """Add content metadata to the storage (like `content_add`, but
	- without inserting to the objstorage).
	-
	- Args:
	- content (iterable): iterable of dictionaries representing
	- individual pieces of content to add. Each dictionary has the
	- following keys:
	-
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.DEFAULT_ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	- - reason (str): if status = absent, the reason why
	- - origin (int): if status = absent, the origin we saw the
	- content in
	- - ctime (datetime): time of insertion in the archive
	-
	- Raises:
	- HashCollision in case of collision
	-
	- Returns:
	- Summary dict with the following key and associated values:
	-
	- content:add: New contents added
	- skipped_content:add: New skipped contents (no data) added
	-
	- """
	content = list(self._content_to_model(content))
	return self._content_add(content, with_data=False)

	def content_get(self, content):
	- """Retrieve in bulk contents and their data.
	-
	- This function may yield more blobs than provided sha1 identifiers,
	- in case they collide.
	-
	- Args:
	- content: iterables of sha1
	-
	- Yields:
	- Dict[str, bytes]: Generates streams of contents as dict with their
	- raw data:
	-
	- - sha1 (bytes): content id
	- - data (bytes): content's raw data
	-
	- Raises:
	- ValueError in case of too much contents are required.
	- cf. BULK_BLOCK_CONTENT_LEN_MAX
	-
	- """
	# FIXME: Make this method support slicing the `data`.
	if len(content) > BULK_BLOCK_CONTENT_LEN_MAX:
	raise ValueError(
	@@ -309,26 +212,6 @@
	yield {'sha1': obj_id, 'data': data}

	def content_get_range(self, start, end, limit=1000):
	- """Retrieve contents within range [start, end] bound by limit.
	-
	- Note that this function may return more than one blob per hash. The
	- limit is enforced with multiplicity (ie. two blobs with the same hash
	- will count twice toward the limit).
	-
	- Args:
	- start (bytes): Starting identifier range (expected smaller
	- than end)
	- end (bytes): Ending identifier range (expected larger
	- than start)
	- limit (int): Limit result (default to 1000)
	-
	- Returns:
	- a dict with keys:
	- - contents [dict]: iterable of contents in between the range.
	- - next (bytes): There remains content in the range
	- starting from this next sha1
	-
	- """
	if limit is None:
	raise ValueError('Development error: limit should not be None')
	from_index = bisect.bisect_left(self._sorted_sha1s, start)
	@@ -353,25 +236,6 @@
	def content_get_partition(
	self, partition_id: int, nb_partitions: int, limit: int = 1000,
	page_token: str = None):
	- """Splits contents into nb_partitions, and returns one of these based on
	- partition_id (which must be in [0, nb_partitions-1])
	-
	- There is no guarantee on how the partitioning is done, or the
	- result order.
	-
	- Args:
	- partition_id (int): index of the partition to fetch
	- nb_partitions (int): total number of partitions to split into
	- limit (int): Limit result (default to 1000)
	- page_token (Optional[str]): opaque token used for pagination.
	-
	- Returns:
	- a dict with keys:
	- - contents (List[dict]): iterable of contents in the partition.
	- - next_page_token (Optional[str]): opaque token to be used as
	- `page_token` for retrieving the next page. if absent, there is
	- no more pages to gather.
	- """
	if limit is None:
	raise ValueError('Development error: limit should not be None')
	(start, end) = get_partition_bounds_bytes(
	@@ -391,17 +255,6 @@

	def content_get_metadata(
	self, contents: List[bytes]) -> Dict[bytes, List[Dict]]:
	- """Retrieve content metadata in bulk
	-
	- Args:
	- content: iterable of content identifiers (sha1)
	-
	- Returns:
	- a dict with keys the content's sha1 and the associated value
	- either the existing content's metadata or None if the content does
	- not exist.
	-
	- """
	result: Dict = {sha1: [] for sha1 in contents}
	for sha1 in contents:
	if sha1 in self._content_indexes['sha1']:
	@@ -433,22 +286,6 @@
	return [self._contents[key].to_dict() for key in keys]

	def content_missing(self, content, key_hash='sha1'):
	- """List content missing from storage
	-
	- Args:
	- contents ([dict]): iterable of dictionaries whose keys are
	- either 'length' or an item of
	- :data:`swh.model.hashutil.ALGORITHMS`;
	- mapped to the corresponding checksum
	- (or length).
	-
	- key_hash (str): name of the column to use as hash id
	- result (default: 'sha1')
	-
	- Returns:
	- iterable ([bytes]): missing content ids (as per the
	- key_hash column)
	- """
	for cont in content:
	for (algo, hash_) in cont.items():
	if algo not in DEFAULT_ALGORITHMS:
	@@ -462,45 +299,16 @@
	yield cont[key_hash]

	def content_missing_per_sha1(self, contents):
	- """List content missing from storage based only on sha1.
	-
	- Args:
	- contents: Iterable of sha1 to check for absence.
	-
	- Returns:
	- iterable: missing ids
	-
	- Raises:
	- TODO: an exception when we get a hash collision.
	-
	- """
	for content in contents:
	if content not in self._content_indexes['sha1']:
	yield content

	def content_missing_per_sha1_git(self, contents):
	- """List content missing from storage based only on sha1_git.
	-
	- Args:
	- contents: An iterable of content id (sha1_git)
	-
	- Yields:
	- missing contents sha1_git
	- """
	for content in contents:
	if content not in self._content_indexes['sha1_git']:
	yield content

	def skipped_content_missing(self, contents):
	- """List all skipped_content missing from storage
	-
	- Args:
	- contents: Iterable of sha1 to check for skipped content entry
	-
	- Returns:
	- iterable: dict of skipped content entry
	- """
	-
	for content in contents:
	for (key, algorithm) in self._content_key_algorithm(content):
	if algorithm == 'blake2s256':
	@@ -512,37 +320,9 @@
	break

	def content_get_random(self):
	- """Finds a random content id.
	-
	- Returns:
	- a sha1_git
	- """
	return random.choice(list(self._content_indexes['sha1_git']))

	def directory_add(self, directories):
	- """Add directories to the storage
	-
	- Args:
	- directories (iterable): iterable of dictionaries representing the
	- individual directories to add. Each dict has the following
	- keys:
	-
	- - id (sha1_git): the id of the directory to add
	- - entries (list): list of dicts for each entry in the
	- directory. Each dict has the following keys:
	-
	- - name (bytes)
	- - type (one of 'file', 'dir', 'rev'): type of the
	- directory entry (file, directory, revision)
	- - target (sha1_git): id of the object pointed at by the
	- directory entry
	- - perms (int): entry permissions
	- Returns:
	- Summary dict of keys with associated count as values:
	-
	- directory:add: Number of directories actually added
	-
	- """
	directories = list(directories)
	if self.journal_writer:
	self.journal_writer.write_additions(
	@@ -563,15 +343,6 @@
	return {'directory:add': count}

	def directory_missing(self, directories):
	- """List directories missing from storage
	-
	- Args:
	- directories (iterable): an iterable of directory ids
	-
	- Yields:
	- missing directory ids
	-
	- """
	for id in directories:
	if id not in self._directories:
	yield id
	@@ -607,41 +378,12 @@
	ret['target'], True, prefix + ret['name'] + b'/')

	def directory_ls(self, directory, recursive=False):
	- """Get entries for one directory.
	-
	- Args:
	- - directory: the directory to list entries from.
	- - recursive: if flag on, this list recursively from this directory.
	-
	- Returns:
	- List of entries for such directory.
	-
	- If `recursive=True`, names in the path of a dir/file not at the
	- root are concatenated with a slash (`/`).
	- """
	yield from self._directory_ls(directory, recursive)

	def directory_entry_get_by_path(self, directory, paths):
	- """Get the directory entry (either file or dir) from directory with path.
	-
	- Args:
	- - directory: sha1 of the top level directory
	- - paths: path to lookup from the top level directory. From left
	- (top) to right (bottom).
	-
	- Returns:
	- The corresponding directory entry if found, None otherwise.
	-
	- """
	return self._directory_entry_get_by_path(directory, paths, b'')

	def directory_get_random(self):
	- """Finds a random directory id.
	-
	- Returns:
	- a sha1_git if any
	-
	- """
	if not self._directories:
	return None
	return random.choice(list(self._directories))
	@@ -674,42 +416,6 @@
	first_item['target'], paths[1:], prefix + paths[0] + b'/')

	def revision_add(self, revisions):
	- """Add revisions to the storage
	-
	- Args:
	- revisions (Iterable[dict]): iterable of dictionaries representing
	- the individual revisions to add. Each dict has the following
	- keys:
	-
	- - id (:class:`sha1_git`): id of the revision to add
	- - date (:class:`dict`): date the revision was written
	- - committer_date (:class:`dict`): date the revision got
	- added to the origin
	- - type (one of 'git', 'tar'): type of the
	- revision added
	- - directory (:class:`sha1_git`): the directory the
	- revision points at
	- - message (:class:`bytes`): the message associated with
	- the revision
	- - author (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	- - committer (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	- - metadata (:class:`jsonb`): extra information as
	- dictionary
	- - synthetic (:class:`bool`): revision's nature (tarball,
	- directory creates synthetic revision`)
	- - parents (:class:`list[sha1_git]`): the parents of
	- this revision
	-
	- date dictionaries have the form defined in :mod:`swh.model`.
	-
	- Returns:
	- Summary dict of keys with associated count as values
	-
	- revision_added: New objects actually stored in db
	-
	- """
	revisions = list(revisions)
	if self.journal_writer:
	self.journal_writer.write_additions(
	@@ -734,15 +440,6 @@
	return {'revision:add': count}

	def revision_missing(self, revisions):
	- """List revisions missing from storage
	-
	- Args:
	- revisions (iterable): revision ids
	-
	- Yields:
	- missing revision ids
	-
	- """
	for id in revisions:
	if id not in self._revisions:
	yield id
	@@ -765,68 +462,18 @@
	yield from self._get_parent_revs(parent, seen, limit)

	def revision_log(self, revisions, limit=None):
	- """Fetch revision entry from the given root revisions.
	-
	- Args:
	- revisions: array of root revision to lookup
	- limit: limitation on the output result. Default to None.
	-
	- Yields:
	- List of revision log from such revisions root.
	-
	- """
	seen = set()
	for rev_id in revisions:
	yield from self._get_parent_revs(rev_id, seen, limit)

	def revision_shortlog(self, revisions, limit=None):
	- """Fetch the shortlog for the given revisions
	-
	- Args:
	- revisions: list of root revisions to lookup
	- limit: depth limitation for the output
	-
	- Yields:
	- a list of (id, parents) tuples.
	-
	- """
	yield from ((rev['id'], rev['parents'])
	for rev in self.revision_log(revisions, limit))

	def revision_get_random(self):
	- """Finds a random revision id.
	-
	- Returns:
	- a sha1_git
	- """
	return random.choice(list(self._revisions))

	def release_add(self, releases):
	- """Add releases to the storage
	-
	- Args:
	- releases (Iterable[dict]): iterable of dictionaries representing
	- the individual releases to add. Each dict has the following
	- keys:
	-
	- - id (:class:`sha1_git`): id of the release to add
	- - revision (:class:`sha1_git`): id of the revision the
	- release points to
	- - date (:class:`dict`): the date the release was made
	- - name (:class:`bytes`): the name of the release
	- - comment (:class:`bytes`): the comment associated with
	- the release
	- - author (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	-
	- the date dictionary has the form defined in :mod:`swh.model`.
	-
	- Returns:
	- Summary dict of keys with associated count as values
	-
	- release:add: New objects contents actually stored in db
	-
	- """
	releases = list(releases)
	if self.journal_writer:
	self.journal_writer.write_additions(
	@@ -849,28 +496,9 @@
	return {'release:add': count}

	def release_missing(self, releases):
	- """List releases missing from storage
	-
	- Args:
	- releases: an iterable of release ids
	-
	- Returns:
	- a list of missing release ids
	-
	- """
	yield from (rel for rel in releases if rel not in self._releases)

	def release_get(self, releases):
	- """Given a list of sha1, return the releases's information
	-
	- Args:
	- releases: list of sha1s
	-
	- Yields:
	- dicts with the same keys as those given to `release_add`
	- (or ``None`` if a release does not exist)
	-
	- """
	for rel_id in releases:
	if rel_id in self._releases:
	yield self._releases[rel_id].to_dict()
	@@ -878,42 +506,9 @@
	yield None

	def release_get_random(self):
	- """Finds a random release id.
	-
	- Returns:
	- a sha1_git
	- """
	return random.choice(list(self._releases))

	def snapshot_add(self, snapshots):
	- """Add a snapshot to the storage
	-
	- Args:
	- snapshot ([dict]): the snapshots to add, containing the
	- following keys:
	-
	- - id (:class:`bytes`): id of the snapshot
	- - branches (:class:`dict`): branches the snapshot contains,
	- mapping the branch name (:class:`bytes`) to the branch target,
	- itself a :class:`dict` (or ``None`` if the branch points to an
	- unknown object)
	-
	- - target_type (:class:`str`): one of ``content``,
	- ``directory``, ``revision``, ``release``,
	- ``snapshot``, ``alias``
	- - target (:class:`bytes`): identifier of the target
	- (currently a ``sha1_git`` for all object kinds, or the name
	- of the target branch for aliases)
	-
	- Raises:
	- ValueError: if the origin's or visit's identifier does not exist.
	-
	- Returns:
	- Summary dict of keys with associated count as values
	-
	- snapshot_added: Count of object actually stored in db
	-
	- """
	count = 0
	snapshots = (Snapshot.from_dict(d) for d in snapshots)
	snapshots = (snap for snap in snapshots
	@@ -930,67 +525,14 @@
	return {'snapshot:add': count}

	def snapshot_missing(self, snapshots):
	- """List snapshot missing from storage
	-
	- Args:
	- snapshots (iterable): an iterable of snapshot ids
	-
	- Yields:
	- missing snapshot ids
	- """
	for id in snapshots:
	if id not in self._snapshots:
	yield id

	def snapshot_get(self, snapshot_id):
	- """Get the content, possibly partial, of a snapshot with the given id
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the method :meth:`snapshot_get_branches`
	- should be used instead.
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	- Returns:
	- dict: a dict with three keys:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	- """
	return self.snapshot_get_branches(snapshot_id)

	def snapshot_get_by_origin_visit(self, origin, visit):
	- """Get the content, possibly partial, of a snapshot for the given origin visit
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the method :meth:`snapshot_get_branches`
	- should be used instead.
	-
	- Args:
	- origin (int): the origin's identifier
	- visit (int): the visit's identifier
	- Returns:
	- dict: None if the snapshot does not exist;
	- a dict with three keys otherwise:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	-
	- """
	origin_url = self._get_origin_url(origin)
	if not origin_url:
	return
	@@ -1005,33 +547,6 @@
	return None

	def snapshot_get_latest(self, origin, allowed_statuses=None):
	- """Get the content, possibly partial, of the latest snapshot for the
	- given origin, optionally only from visits that have one of the given
	- allowed_statuses
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the methods :meth:`origin_visit_get_latest`
	- and :meth:`snapshot_get_branches` should be used instead.
	-
	- Args:
	- origin (str): the origin's URL
	- allowed_statuses (list of str): list of visit statuses considered
	- to find the latest snapshot for the origin. For instance,
	- ``allowed_statuses=['full']`` will only consider visits that
	- have successfully run to completion.
	- Returns:
	- dict: a dict with three keys:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	- """
	origin_url = self._get_origin_url(origin)
	if not origin_url:
	return
	@@ -1048,46 +563,12 @@
	return snapshot

	def snapshot_count_branches(self, snapshot_id):
	- """Count the number of branches in the snapshot with the given id
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	-
	- Returns:
	- dict: A dict whose keys are the target types of branches and
	- values their corresponding amount
	- """
	(snapshot, _) = self._snapshots[snapshot_id]
	return collections.Counter(branch.target_type.value if branch else None
	for branch in snapshot.branches.values())

	def snapshot_get_branches(self, snapshot_id, branches_from=b'',
	branches_count=1000, target_types=None):
	- """Get the content, possibly partial, of a snapshot with the given id
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	- branches_from (bytes): optional parameter used to skip branches
	- whose name is lesser than it before returning them
	- branches_count (int): optional parameter used to restrain
	- the amount of returned branches
	- target_types (list): optional parameter used to filter the
	- target types of branch to return (possible values that can be
	- contained in that list are `'content', 'directory',
	- 'revision', 'release', 'snapshot', 'alias'`)
	- Returns:
	- dict: None if the snapshot does not exist;
	- a dict with three keys otherwise:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than
	- `branches_count` branches after `branches_from` included.
	- """
	res = self._snapshots.get(snapshot_id)
	if res is None:
	return None
	@@ -1126,27 +607,9 @@
	}

	def snapshot_get_random(self):
	- """Finds a random snapshot id.
	-
	- Returns:
	- a sha1_git
	- """
	return random.choice(list(self._snapshots))

	def object_find_by_sha1_git(self, ids):
	- """Return the objects found with the given ids.
	-
	- Args:
	- ids: a generator of sha1_gits
	-
	- Returns:
	- dict: a mapping from id to the list of objects found. Each object
	- found is itself a dict with keys:
	-
	- - sha1_git: the input id
	- - type: the type of object found
	-
	- """
	ret = {}
	for id_ in ids:
	objs = self._objects.get(id_, [])
	@@ -1163,30 +626,6 @@
	return t.to_dict()

	def origin_get(self, origins):
	- """Return origins, either all identified by their ids or all
	- identified by urls.
	-
	- Args:
	- origin: a list of dictionaries representing the individual
	- origins to find.
	- These dicts have either the key url (and optionally type):
	-
	- - url (bytes): the url the origin points to
	-
	- or the id:
	-
	- - id (int): the origin's identifier
	-
	- Returns:
	- dict: the origin dictionary with the keys:
	-
	- - id: origin's id
	- - url: origin's url
	-
	- Raises:
	- ValueError: if the keys does not match (url and type) nor id.
	-
	- """
	if isinstance(origins, dict):
	# Old API
	return_single = True
	@@ -1223,36 +662,12 @@
	return results

	def origin_get_by_sha1(self, sha1s):
	- """Return origins, identified by the sha1 of their URLs.
	-
	- Args:
	- sha1s (list[bytes]): a list of sha1s
	-
	- Yields:
	- dicts containing origin information as returned
	- by :meth:`swh.storage.in_memory.Storage.origin_get`, or None if an
	- origin matching the sha1 is not found.
	- """
	return [
	self._convert_origin(self._origins_by_sha1.get(sha1))
	for sha1 in sha1s
	]

	def origin_get_range(self, origin_from=1, origin_count=100):
	- """Retrieve ``origin_count`` origins whose ids are greater
	- or equal than ``origin_from``.
	-
	- Origins are sorted by id before retrieving them.
	-
	- Args:
	- origin_from (int): the minimum id of origins to retrieve
	- origin_count (int): the maximum number of origins to retrieve
	-
	- Yields:
	- dicts containing origin information as returned
	- by :meth:`swh.storage.in_memory.Storage.origin_get`, plus
	- an 'id' key.
	- """
	origin_from = max(origin_from, 1)
	if origin_from <= len(self._origins_by_id):
	max_idx = origin_from + origin_count - 1
	@@ -1265,20 +680,6 @@

	def origin_list(self, page_token: Optional[str] = None, limit: int = 100
	) -> dict:
	- """Returns the list of origins
	-
	- Args:
	- page_token: opaque token used for pagination.
	- limit: the maximum number of results to return
	-
	- Returns:
	- dict: dict with the following keys:
	- - next_page_token (str, optional): opaque token to be used as
	- `page_token` for retrieving the next page. if absent, there is
	- no more pages to gather.
	- - origins (List[dict]): list of origins, as returned by
	- `origin_get`.
	- """
	origin_urls = sorted(self._origins)
	if page_token:
	from_ = bisect.bisect_left(origin_urls, page_token)
	@@ -1297,23 +698,6 @@

	def origin_search(self, url_pattern, offset=0, limit=50,
	regexp=False, with_visit=False):
	- """Search for origins whose urls contain a provided string pattern
	- or match a provided regular expression.
	- The search is performed in a case insensitive way.
	-
	- Args:
	- url_pattern (str): the string pattern to search for in origin urls
	- offset (int): number of found origins to skip before returning
	- results
	- limit (int): the maximum number of found origins to return
	- regexp (bool): if True, consider the provided pattern as a regular
	- expression and return origins whose urls match it
	- with_visit (bool): if True, filter out origins with no visit
	-
	- Returns:
	- An iterable of dict containing origin information as returned
	- by :meth:`swh.storage.storage.Storage.origin_get`.
	- """
	origins = map(self._convert_origin, self._origins.values())
	if regexp:
	pat = re.compile(url_pattern)
	@@ -1332,56 +716,17 @@
	return origins[offset:offset+limit]

	def origin_count(self, url_pattern, regexp=False, with_visit=False):
	- """Count origins whose urls contain a provided string pattern
	- or match a provided regular expression.
	- The pattern search in origin urls is performed in a case insensitive
	- way.
	-
	- Args:
	- url_pattern (str): the string pattern to search for in origin urls
	- regexp (bool): if True, consider the provided pattern as a regular
	- expression and return origins whose urls match it
	- with_visit (bool): if True, filter out origins with no visit
	-
	- Returns:
	- int: The number of origins matching the search criterion.
	- """
	return len(self.origin_search(url_pattern, regexp=regexp,
	with_visit=with_visit,
	limit=len(self._origins)))

	def origin_add(self, origins):
	- """Add origins to the storage
	-
	- Args:
	- origins: list of dictionaries representing the individual origins,
	- with the following keys:
	-
	- - url (bytes): the url the origin points to
	-
	- Returns:
	- list: given origins as dict updated with their id
	-
	- """
	origins = copy.deepcopy(list(origins))
	for origin in origins:
	self.origin_add_one(origin)
	return origins

	def origin_add_one(self, origin):
	- """Add origin to the storage
	-
	- Args:
	- origin: dictionary representing the individual origin to add. This
	- dict has the following keys:
	-
	- - url (bytes): the url the origin points to
	-
	- Returns:
	- the id of the added origin, or of the identical one that already
	- exists.
	-
	- """
	origin = Origin.from_dict(origin)
	if origin.url not in self._origins:
	if self.journal_writer:
	@@ -1401,20 +746,6 @@
	return origin.url

	def origin_visit_add(self, origin, date, type):
	- """Add an origin_visit for the origin at date with status 'ongoing'.
	-
	- Args:
	- origin (str): visited origin's identifier or URL
	- date (Union[str,datetime]): timestamp of such visit
	- type (str): the type of loader used for the visit (hg, git, ...)
	-
	- Returns:
	- dict: dictionary with keys origin and visit where:
	-
	- - origin: origin's identifier
	- - visit: the visit's identifier for the new visit occurrence
	-
	- """
	origin_url = origin
	if origin_url is None:
	raise ValueError('Unknown origin.')
	@@ -1456,20 +787,6 @@

	def origin_visit_update(self, origin, visit_id, status=None,
	metadata=None, snapshot=None):
	- """Update an origin_visit's status.
	-
	- Args:
	- origin (str): visited origin's URL
	- visit_id (int): visit's identifier
	- status: visit's new status
	- metadata: data associated to the visit
	- snapshot (sha1_git): identifier of the snapshot to add to
	- the visit
	-
	- Returns:
	- None
	-
	- """
	if not isinstance(origin, str):
	raise TypeError('origin must be a string, not %r' % (origin,))
	origin_url = self._get_origin_url(origin)
	@@ -1498,22 +815,6 @@
	self._origin_visits[origin_url][visit_id-1] = visit

	def origin_visit_upsert(self, visits):
	- """Add a origin_visits with a specific id and with all its data.
	- If there is already an origin_visit with the same
	- `(origin_url, visit_id)`, updates it instead of inserting a new one.
	-
	- Args:
	- visits: iterable of dicts with keys:
	-
	- - origin: origin url
	- - visit: origin visit id
	- - type: type of loader used for the visit
	- - date: timestamp of such visit
	- - status: Visit's new status
	- - metadata: Data associated to the visit
	- - snapshot: identifier of the snapshot to add to
	- the visit
	- """
	for visit in visits:
	if not isinstance(visit['origin'], str):
	raise TypeError("visit['origin'] must be a string, not %r"
	@@ -1547,19 +848,6 @@
	return visit

	def origin_visit_get(self, origin, last_visit=None, limit=None):
	- """Retrieve all the origin's visit's information.
	-
	- Args:
	- origin (int): the origin's identifier
	- last_visit (int): visit's id from which listing the next ones,
	- default to None
	- limit (int): maximum number of results to return,
	- default to None
	-
	- Yields:
	- List of visits.
	-
	- """
	origin_url = self._get_origin_url(origin)
	if origin_url in self._origin_visits:
	visits = self._origin_visits[origin_url]
	@@ -1576,18 +864,6 @@
	self._origin_visits[origin_url][visit_id-1])

	def origin_visit_find_by_date(self, origin, visit_date):
	- """Retrieves the origin visit whose date is closest to the provided
	- timestamp.
	- In case of a tie, the visit with largest id is selected.
	-
	- Args:
	- origin (str): The occurrence's origin (URL).
	- target (datetime): target timestamp
	-
	- Returns:
	- A visit.
	-
	- """
	origin_url = self._get_origin_url(origin)
	if origin_url in self._origin_visits:
	visits = self._origin_visits[origin_url]
	@@ -1597,16 +873,6 @@
	return self._convert_visit(visit)

	def origin_visit_get_by(self, origin, visit):
	- """Retrieve origin visit's information.
	-
	- Args:
	- origin (int): the origin's identifier
	-
	- Returns:
	- The information on that particular (origin, visit) or None if
	- it does not exist
	-
	- """
	origin_url = self._get_origin_url(origin)
	if origin_url in self._origin_visits and \
	visit <= len(self._origin_visits[origin_url]):
	@@ -1615,31 +881,6 @@

	def origin_visit_get_latest(
	self, origin, allowed_statuses=None, require_snapshot=False):
	- """Get the latest origin visit for the given origin, optionally
	- looking only for those with one of the given allowed_statuses
	- or for those with a known snapshot.
	-
	- Args:
	- origin (str): the origin's URL
	- allowed_statuses (list of str): list of visit statuses considered
	- to find the latest visit. For instance,
	- ``allowed_statuses=['full']`` will only consider visits that
	- have successfully run to completion.
	- require_snapshot (bool): If True, only a visit with a snapshot
	- will be returned.
	-
	- Returns:
	- dict: a dict with the following keys:
	-
	- - origin: the URL of the origin
	- - visit: origin visit id
	- - type: type of loader used for the visit
	- - date: timestamp of such visit
	- - status: Visit's new status
	- - metadata: Data associated to the visit
	- - snapshot (Optional[sha1_git]): identifier of the snapshot
	- associated to the visit
	- """
	origin = self._origins.get(origin)
	if not origin:
	return
	@@ -1656,7 +897,6 @@
	return self._convert_visit(visit)

	def _select_random_origin_visit_by_type(self, type: str) -> str:
	- """Select randomly an origin visit """
	while True:
	url = random.choice(list(self._origin_visits.keys()))
	random_origin_visits = self._origin_visits[url]
	@@ -1664,14 +904,6 @@
	return url

	def origin_visit_get_random(self, type: str) -> Optional[Dict[str, Any]]:
	- """Randomly select one successful origin visit with <type>
	- made in the last 3 months.
	-
	- Returns:
	- dict representing an origin visit, in the same format as
	- `origin_visit_get`.
	-
	- """
	url = self._select_random_origin_visit_by_type(type)
	random_origin_visits = copy.deepcopy(self._origin_visits[url])
	random_origin_visits.reverse()
	@@ -1684,13 +916,6 @@
	return None

	def stat_counters(self):
	- """compute statistics about the number of tuples in various tables
	-
	- Returns:
	- dict: a dictionary mapping textual labels (e.g., content) to
	- integer values (e.g., the number of tuples in table content)
	-
	- """
	keys = (
	'content',
	'directory',
	@@ -1710,20 +935,9 @@
	return stats

	def refresh_stat_counters(self):
	- """Recomputes the statistics for `stat_counters`."""
	pass

	def origin_metadata_add(self, origin_url, ts, provider, tool, metadata):
	- """ Add an origin_metadata for the origin at ts with provenance and
	- metadata.
	-
	- Args:
	- origin_url (str): the origin url for which the metadata is added
	- ts (datetime): timestamp of the found metadata
	- provider: id of the provider of metadata (ex:'hal')
	- tool: id of the tool used to extract metadata
	- metadata (jsonb): the metadata retrieved at the time and location
	- """
	if not isinstance(origin_url, str):
	raise TypeError('origin_id must be str, not %r' % (origin_url,))

	@@ -1741,25 +955,6 @@
	return None

	def origin_metadata_get_by(self, origin_url, provider_type=None):
	- """Retrieve list of all origin_metadata entries for the origin_url
	-
	- Args:
	- origin_url (str): the origin's url
	- provider_type (str): (optional) type of provider
	-
	- Returns:
	- list of dicts: the origin_metadata dictionary with the keys:
	-
	- - origin_url (int): origin's URL
	- - discovery_date (datetime): timestamp of discovery
	- - tool_id (int): metadata's extracting tool
	- - metadata (jsonb)
	- - provider_id (int): metadata's provider
	- - provider_name (str)
	- - provider_type (str)
	- - provider_url (str)
	-
	- """
	if not isinstance(origin_url, str):
	raise TypeError('origin_url must be str, not %r' % (origin_url,))
	metadata = []
	@@ -1773,23 +968,6 @@
	return metadata

	def tool_add(self, tools):
	- """Add new tools to the storage.
	-
	- Args:
	- tools (iterable of :class:`dict`): Tool information to add to
	- storage. Each tool is a :class:`dict` with the following keys:
	-
	- - name (:class:`str`): name of the tool
	- - version (:class:`str`): version of the tool
	- - configuration (:class:`dict`): configuration of the tool,
	- must be json-encodable
	-
	- Returns:
	- :class:`dict`: All the tools inserted in storage
	- (including the internal ``id``). The order of the list is not
	- guaranteed to match the order of the initial list.
	-
	- """
	inserted = []
	for tool in tools:
	key = self._tool_key(tool)
	@@ -1803,32 +981,10 @@
	return inserted

	def tool_get(self, tool):
	- """Retrieve tool information.
	-
	- Args:
	- tool (dict): Tool information we want to retrieve from storage.
	- The dicts have the same keys as those used in :func:`tool_add`.
	-
	- Returns:
	- dict: The full tool information if it exists (``id`` included),
	- None otherwise.
	-
	- """
	return self._tools.get(self._tool_key(tool))

	def metadata_provider_add(self, provider_name, provider_type, provider_url,
	metadata):
	- """Add a metadata provider.
	-
	- Args:
	- provider_name (str): Its name
	- provider_type (str): Its type
	- provider_url (str): Its URL
	- metadata: JSON-encodable object
	-
	- Returns:
	- an identifier of the provider
	- """
	provider = {
	'provider_name': provider_name,
	'provider_type': provider_type,
	@@ -1841,28 +997,9 @@
	return key

	def metadata_provider_get(self, provider_id):
	- """Get a metadata provider
	-
	- Args:
	- provider_id: Its identifier, as given by `metadata_provider_add`.
	-
	- Returns:
	- dict: same as `metadata_provider_add`;
	- or None if it does not exist.
	- """
	return self._metadata_providers.get(provider_id)

	def metadata_provider_get_by(self, provider):
	- """Get a metadata provider
	-
	- Args:
	- provider_name: Its name
	- provider_url: Its URL
	-
	- Returns:
	- dict: same as `metadata_provider_add`;
	- or None if it does not exist.
	- """
	key = self._metadata_provider_key(provider)
	return self._metadata_providers.get(key)

	@@ -1873,14 +1010,6 @@
	raise TypeError('origin must be a string.')

	def _person_add(self, person):
	- """Add a person in storage.
	-
	- Note: Private method, do not use outside of this class.
	-
	- Args:
	- person: dictionary with keys fullname, name and email.
	-
	- """
	key = ('person', person.fullname)
	if key not in self._objects:
	person_id = len(self._persons) + 1
	@@ -1913,3 +1042,12 @@
	@staticmethod
	def _metadata_provider_key(provider):
	return '%r %r' % (provider['provider_name'], provider['provider_url'])
	+
	+ def diff_directories(self, from_dir, to_dir, track_renaming=False):
	+ raise NotImplementedError('InMemoryStorage.diff_directories')
	+
	+ def diff_revisions(self, from_rev, to_rev, track_renaming=False):
	+ raise NotImplementedError('InMemoryStorage.diff_revisions')
	+
	+ def diff_revision(self, revision, track_renaming=False):
	+ raise NotImplementedError('InMemoryStorage.diff_revision')
	diff --git a/swh/storage/interface.py b/swh/storage/interface.py
	new file mode 100644
	--- /dev/null
	+++ b/swh/storage/interface.py
	@@ -0,0 +1,1224 @@
	+# Copyright (C) 2015-2020 The Software Heritage developers
	+# See the AUTHORS file at the top-level directory of this distribution
	+# License: GNU General Public License version 3, or any later version
	+# See top-level LICENSE file for more information
	+
	+from typing import Any, Dict, List, Optional
	+
	+from swh.core.api import remote_api_endpoint
	+
	+
	+class StorageInterface:
	+ @remote_api_endpoint('check_config')
	+ def check_config(self, *, check_write):
	+ """Check that the storage is configured and ready to go."""
	+ ...
	+
	+ @remote_api_endpoint('content/add')
	+ def content_add(self, content):
	+ """Add content blobs to the storage
	+
	+ Args:
	+ contents (iterable): iterable of dictionaries representing
	+ individual pieces of content to add. Each dictionary has the
	+ following keys:
	+
	+ - data (bytes): the actual content
	+ - length (int): content length (default: -1)
	+ - one key for each checksum algorithm in
	+ :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	+ corresponding checksum
	+ - status (str): one of visible, hidden, absent
	+ - reason (str): if status = absent, the reason why
	+ - origin (int): if status = absent, the origin we saw the
	+ content in
	+
	+ Raises:
	+
	+ The following exceptions can occur:
	+
	+ - HashCollision in case of collision
	+ - Any other exceptions raise by the db
	+
	+ In case of errors, some of the content may have been stored in
	+ the DB and in the objstorage.
	+ Since additions to both idempotent, that should not be a problem.
	+
	+ Returns:
	+ Summary dict with the following key and associated values:
	+
	+ content:add: New contents added
	+ content:add:bytes: Sum of the contents' length data
	+ skipped_content:add: New skipped contents (no data) added
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/update')
	+ def content_update(self, content, keys=[]):
	+ """Update content blobs to the storage. Does nothing for unknown
	+ contents or skipped ones.
	+
	+ Args:
	+ content (iterable): iterable of dictionaries representing
	+ individual pieces of content to update. Each dictionary has the
	+ following keys:
	+
	+ - data (bytes): the actual content
	+ - length (int): content length (default: -1)
	+ - one key for each checksum algorithm in
	+ :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	+ corresponding checksum
	+ - status (str): one of visible, hidden, absent
	+
	+ keys (list): List of keys (str) whose values needs an update, e.g.,
	+ new hash column
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/add_metadata')
	+ def content_add_metadata(self, content):
	+ """Add content metadata to the storage (like `content_add`, but
	+ without inserting to the objstorage).
	+
	+ Args:
	+ content (iterable): iterable of dictionaries representing
	+ individual pieces of content to add. Each dictionary has the
	+ following keys:
	+
	+ - length (int): content length (default: -1)
	+ - one key for each checksum algorithm in
	+ :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	+ corresponding checksum
	+ - status (str): one of visible, hidden, absent
	+ - reason (str): if status = absent, the reason why
	+ - origin (int): if status = absent, the origin we saw the
	+ content in
	+ - ctime (datetime): time of insertion in the archive
	+
	+ Returns:
	+ Summary dict with the following key and associated values:
	+
	+ content:add: New contents added
	+ skipped_content:add: New skipped contents (no data) added
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/data')
	+ def content_get(self, content):
	+ """Retrieve in bulk contents and their data.
	+
	+ This generator yields exactly as many items than provided sha1
	+ identifiers, but callers should not assume this will always be true.
	+
	+ It may also yield `None` values in case an object was not found.
	+
	+ Args:
	+ content: iterables of sha1
	+
	+ Yields:
	+ Dict[str, bytes]: Generates streams of contents as dict with their
	+ raw data:
	+
	+ - sha1 (bytes): content id
	+ - data (bytes): content's raw data
	+
	+ Raises:
	+ ValueError in case of too much contents are required.
	+ cf. BULK_BLOCK_CONTENT_LEN_MAX
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/range')
	+ def content_get_range(self, start, end, limit=1000):
	+ """Retrieve contents within range [start, end] bound by limit.
	+
	+ Note that this function may return more than one blob per hash. The
	+ limit is enforced with multiplicity (ie. two blobs with the same hash
	+ will count twice toward the limit).
	+
	+ Args:
	+ start (bytes): Starting identifier range (expected smaller
	+ than end)
	+ end (bytes): Ending identifier range (expected larger
	+ than start)
	+ limit (int): Limit result (default to 1000)
	+
	+ Returns:
	+ a dict with keys:
	+ - contents [dict]: iterable of contents in between the range.
	+ - next (bytes): There remains content in the range
	+ starting from this next sha1
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/partition')
	+ def content_get_partition(
	+ self, partition_id: int, nb_partitions: int, limit: int = 1000,
	+ page_token: str = None):
	+ """Splits contents into nb_partitions, and returns one of these based on
	+ partition_id (which must be in [0, nb_partitions-1])
	+
	+ There is no guarantee on how the partitioning is done, or the
	+ result order.
	+
	+ Args:
	+ partition_id (int): index of the partition to fetch
	+ nb_partitions (int): total number of partitions to split into
	+ limit (int): Limit result (default to 1000)
	+ page_token (Optional[str]): opaque token used for pagination.
	+
	+ Returns:
	+ a dict with keys:
	+ - contents (List[dict]): iterable of contents in the partition.
	+ - next_page_token (Optional[str]): opaque token to be used as
	+ `page_token` for retrieving the next page. if absent, there is
	+ no more pages to gather.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/metadata')
	+ def content_get_metadata(
	+ self, contents: List[bytes]) -> Dict[bytes, List[Dict]]:
	+ """Retrieve content metadata in bulk
	+
	+ Args:
	+ content: iterable of content identifiers (sha1)
	+
	+ Returns:
	+ a dict with keys the content's sha1 and the associated value
	+ either the existing content's metadata or None if the content does
	+ not exist.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/missing')
	+ def content_missing(self, content, key_hash='sha1'):
	+ """List content missing from storage
	+
	+ Args:
	+ content ([dict]): iterable of dictionaries whose keys are
	+ either 'length' or an item of
	+ :data:`swh.model.hashutil.ALGORITHMS`;
	+ mapped to the corresponding checksum
	+ (or length).
	+
	+ key_hash (str): name of the column to use as hash id
	+ result (default: 'sha1')
	+
	+ Returns:
	+ iterable ([bytes]): missing content ids (as per the
	+ key_hash column)
	+
	+ Raises:
	+ TODO: an exception when we get a hash collision.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/missing/sha1')
	+ def content_missing_per_sha1(self, contents):
	+ """List content missing from storage based only on sha1.
	+
	+ Args:
	+ contents: Iterable of sha1 to check for absence.
	+
	+ Returns:
	+ iterable: missing ids
	+
	+ Raises:
	+ TODO: an exception when we get a hash collision.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/missing/sha1_git')
	+ def content_missing_per_sha1_git(self, contents):
	+ """List content missing from storage based only on sha1_git.
	+
	+ Args:
	+ contents (Iterable): An iterable of content id (sha1_git)
	+
	+ Yields:
	+ missing contents sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/skipped/missing')
	+ def skipped_content_missing(self, contents):
	+ """List skipped_content missing from storage
	+
	+ Args:
	+ content: iterable of dictionaries containing the data for each
	+ checksum algorithm.
	+
	+ Returns:
	+ iterable: missing signatures
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/present')
	+ def content_find(self, content):
	+ """Find a content hash in db.
	+
	+ Args:
	+ content: a dictionary representing one content hash, mapping
	+ checksum algorithm names (see swh.model.hashutil.ALGORITHMS) to
	+ checksum values
	+
	+ Returns:
	+ a triplet (sha1, sha1_git, sha256) if the content exist
	+ or None otherwise.
	+
	+ Raises:
	+ ValueError: in case the key of the dictionary is not sha1, sha1_git
	+ nor sha256.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('content/get_random')
	+ def content_get_random(self):
	+ """Finds a random content id.
	+
	+ Returns:
	+ a sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('directory/add')
	+ def directory_add(self, directories):
	+ """Add directories to the storage
	+
	+ Args:
	+ directories (iterable): iterable of dictionaries representing the
	+ individual directories to add. Each dict has the following
	+ keys:
	+
	+ - id (sha1_git): the id of the directory to add
	+ - entries (list): list of dicts for each entry in the
	+ directory. Each dict has the following keys:
	+
	+ - name (bytes)
	+ - type (one of 'file', 'dir', 'rev'): type of the
	+ directory entry (file, directory, revision)
	+ - target (sha1_git): id of the object pointed at by the
	+ directory entry
	+ - perms (int): entry permissions
	+
	+ Returns:
	+ Summary dict of keys with associated count as values:
	+
	+ directory:add: Number of directories actually added
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('directory/missing')
	+ def directory_missing(self, directories):
	+ """List directories missing from storage
	+
	+ Args:
	+ directories (iterable): an iterable of directory ids
	+
	+ Yields:
	+ missing directory ids
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('directory/ls')
	+ def directory_ls(self, directory, recursive=False):
	+ """Get entries for one directory.
	+
	+ Args:
	+ - directory: the directory to list entries from.
	+ - recursive: if flag on, this list recursively from this directory.
	+
	+ Returns:
	+ List of entries for such directory.
	+
	+ If `recursive=True`, names in the path of a dir/file not at the
	+ root are concatenated with a slash (`/`).
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('directory/path')
	+ def directory_entry_get_by_path(self, directory, paths):
	+ """Get the directory entry (either file or dir) from directory with path.
	+
	+ Args:
	+ - directory: sha1 of the top level directory
	+ - paths: path to lookup from the top level directory. From left
	+ (top) to right (bottom).
	+
	+ Returns:
	+ The corresponding directory entry if found, None otherwise.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('directory/get_random')
	+ def directory_get_random(self):
	+ """Finds a random directory id.
	+
	+ Returns:
	+ a sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision/add')
	+ def revision_add(self, revisions):
	+ """Add revisions to the storage
	+
	+ Args:
	+ revisions (Iterable[dict]): iterable of dictionaries representing
	+ the individual revisions to add. Each dict has the following
	+ keys:
	+
	+ - id (:class:`sha1_git`): id of the revision to add
	+ - date (:class:`dict`): date the revision was written
	+ - committer_date (:class:`dict`): date the revision got
	+ added to the origin
	+ - type (one of 'git', 'tar'): type of the
	+ revision added
	+ - directory (:class:`sha1_git`): the directory the
	+ revision points at
	+ - message (:class:`bytes`): the message associated with
	+ the revision
	+ - author (:class:`Dict[str, bytes]`): dictionary with
	+ keys: name, fullname, email
	+ - committer (:class:`Dict[str, bytes]`): dictionary with
	+ keys: name, fullname, email
	+ - metadata (:class:`jsonb`): extra information as
	+ dictionary
	+ - synthetic (:class:`bool`): revision's nature (tarball,
	+ directory creates synthetic revision`)
	+ - parents (:class:`list[sha1_git]`): the parents of
	+ this revision
	+
	+ date dictionaries have the form defined in :mod:`swh.model`.
	+
	+ Returns:
	+ Summary dict of keys with associated count as values
	+
	+ revision:add: New objects actually stored in db
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision/missing')
	+ def revision_missing(self, revisions):
	+ """List revisions missing from storage
	+
	+ Args:
	+ revisions (iterable): revision ids
	+
	+ Yields:
	+ missing revision ids
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision')
	+ def revision_get(self, revisions):
	+ """Get all revisions from storage
	+
	+ Args:
	+ revisions: an iterable of revision ids
	+
	+ Returns:
	+ iterable: an iterable of revisions as dictionaries (or None if the
	+ revision doesn't exist)
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision/log')
	+ def revision_log(self, revisions, limit=None):
	+ """Fetch revision entry from the given root revisions.
	+
	+ Args:
	+ revisions: array of root revision to lookup
	+ limit: limitation on the output result. Default to None.
	+
	+ Yields:
	+ List of revision log from such revisions root.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision/shortlog')
	+ def revision_shortlog(self, revisions, limit=None):
	+ """Fetch the shortlog for the given revisions
	+
	+ Args:
	+ revisions: list of root revisions to lookup
	+ limit: depth limitation for the output
	+
	+ Yields:
	+ a list of (id, parents) tuples.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('revision/get_random')
	+ def revision_get_random(self):
	+ """Finds a random revision id.
	+
	+ Returns:
	+ a sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('release/add')
	+ def release_add(self, releases):
	+ """Add releases to the storage
	+
	+ Args:
	+ releases (Iterable[dict]): iterable of dictionaries representing
	+ the individual releases to add. Each dict has the following
	+ keys:
	+
	+ - id (:class:`sha1_git`): id of the release to add
	+ - revision (:class:`sha1_git`): id of the revision the
	+ release points to
	+ - date (:class:`dict`): the date the release was made
	+ - name (:class:`bytes`): the name of the release
	+ - comment (:class:`bytes`): the comment associated with
	+ the release
	+ - author (:class:`Dict[str, bytes]`): dictionary with
	+ keys: name, fullname, email
	+
	+ the date dictionary has the form defined in :mod:`swh.model`.
	+
	+ Returns:
	+ Summary dict of keys with associated count as values
	+
	+ release:add: New objects contents actually stored in db
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('release/missing')
	+ def release_missing(self, releases):
	+ """List releases missing from storage
	+
	+ Args:
	+ releases: an iterable of release ids
	+
	+ Returns:
	+ a list of missing release ids
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('release')
	+ def release_get(self, releases):
	+ """Given a list of sha1, return the releases's information
	+
	+ Args:
	+ releases: list of sha1s
	+
	+ Yields:
	+ dicts with the same keys as those given to `release_add`
	+ (or ``None`` if a release does not exist)
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('release/get_random')
	+ def release_get_random(self):
	+ """Finds a random release id.
	+
	+ Returns:
	+ a sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/add')
	+ def snapshot_add(self, snapshots):
	+ """Add snapshots to the storage.
	+
	+ Args:
	+ snapshot ([dict]): the snapshots to add, containing the
	+ following keys:
	+
	+ - id (:class:`bytes`): id of the snapshot
	+ - branches (:class:`dict`): branches the snapshot contains,
	+ mapping the branch name (:class:`bytes`) to the branch target,
	+ itself a :class:`dict` (or ``None`` if the branch points to an
	+ unknown object)
	+
	+ - target_type (:class:`str`): one of ``content``,
	+ ``directory``, ``revision``, ``release``,
	+ ``snapshot``, ``alias``
	+ - target (:class:`bytes`): identifier of the target
	+ (currently a ``sha1_git`` for all object kinds, or the name
	+ of the target branch for aliases)
	+
	+ Raises:
	+ ValueError: if the origin or visit id does not exist.
	+
	+ Returns:
	+
	+ Summary dict of keys with associated count as values
	+
	+ snapshot:add: Count of object actually stored in db
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/missing')
	+ def snapshot_missing(self, snapshots):
	+ """List snapshots missing from storage
	+
	+ Args:
	+ snapshots (iterable): an iterable of snapshot ids
	+
	+ Yields:
	+ missing snapshot ids
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot')
	+ def snapshot_get(self, snapshot_id):
	+ """Get the content, possibly partial, of a snapshot with the given id
	+
	+ The branches of the snapshot are iterated in the lexicographical
	+ order of their names.
	+
	+ .. warning:: At most 1000 branches contained in the snapshot will be
	+ returned for performance reasons. In order to browse the whole
	+ set of branches, the method :meth:`snapshot_get_branches`
	+ should be used instead.
	+
	+ Args:
	+ snapshot_id (bytes): identifier of the snapshot
	+ Returns:
	+ dict: a dict with three keys:
	+ * id: identifier of the snapshot
	+ * branches: a dict of branches contained in the snapshot
	+ whose keys are the branches' names.
	+ * next_branch: the name of the first branch not returned
	+ or :const:`None` if the snapshot has less than 1000
	+ branches.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/by_origin_visit')
	+ def snapshot_get_by_origin_visit(self, origin, visit):
	+ """Get the content, possibly partial, of a snapshot for the given origin visit
	+
	+ The branches of the snapshot are iterated in the lexicographical
	+ order of their names.
	+
	+ .. warning:: At most 1000 branches contained in the snapshot will be
	+ returned for performance reasons. In order to browse the whole
	+ set of branches, the method :meth:`snapshot_get_branches`
	+ should be used instead.
	+
	+ Args:
	+ origin (int): the origin identifier
	+ visit (int): the visit identifier
	+ Returns:
	+ dict: None if the snapshot does not exist;
	+ a dict with three keys otherwise:
	+ * id: identifier of the snapshot
	+ * branches: a dict of branches contained in the snapshot
	+ whose keys are the branches' names.
	+ * next_branch: the name of the first branch not returned
	+ or :const:`None` if the snapshot has less than 1000
	+ branches.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/latest')
	+ def snapshot_get_latest(self, origin, allowed_statuses=None):
	+ """Get the content, possibly partial, of the latest snapshot for the
	+ given origin, optionally only from visits that have one of the given
	+ allowed_statuses
	+
	+ The branches of the snapshot are iterated in the lexicographical
	+ order of their names.
	+
	+ .. warning:: At most 1000 branches contained in the snapshot will be
	+ returned for performance reasons. In order to browse the whole
	+ set of branches, the method :meth:`snapshot_get_branches`
	+ should be used instead.
	+
	+ Args:
	+ origin (str): the origin's URL
	+ allowed_statuses (list of str): list of visit statuses considered
	+ to find the latest snapshot for the visit. For instance,
	+ ``allowed_statuses=['full']`` will only consider visits that
	+ have successfully run to completion.
	+ Returns:
	+ dict: a dict with three keys:
	+ * id: identifier of the snapshot
	+ * branches: a dict of branches contained in the snapshot
	+ whose keys are the branches' names.
	+ * next_branch: the name of the first branch not returned
	+ or :const:`None` if the snapshot has less than 1000
	+ branches.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/count_branches')
	+ def snapshot_count_branches(self, snapshot_id):
	+ """Count the number of branches in the snapshot with the given id
	+
	+ Args:
	+ snapshot_id (bytes): identifier of the snapshot
	+
	+ Returns:
	+ dict: A dict whose keys are the target types of branches and
	+ values their corresponding amount
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/get_branches')
	+ def snapshot_get_branches(self, snapshot_id, branches_from=b'',
	+ branches_count=1000, target_types=None):
	+ """Get the content, possibly partial, of a snapshot with the given id
	+
	+ The branches of the snapshot are iterated in the lexicographical
	+ order of their names.
	+
	+ Args:
	+ snapshot_id (bytes): identifier of the snapshot
	+ branches_from (bytes): optional parameter used to skip branches
	+ whose name is lesser than it before returning them
	+ branches_count (int): optional parameter used to restrain
	+ the amount of returned branches
	+ target_types (list): optional parameter used to filter the
	+ target types of branch to return (possible values that can be
	+ contained in that list are `'content', 'directory',
	+ 'revision', 'release', 'snapshot', 'alias'`)
	+ Returns:
	+ dict: None if the snapshot does not exist;
	+ a dict with three keys otherwise:
	+ * id: identifier of the snapshot
	+ * branches: a dict of branches contained in the snapshot
	+ whose keys are the branches' names.
	+ * next_branch: the name of the first branch not returned
	+ or :const:`None` if the snapshot has less than
	+ `branches_count` branches after `branches_from` included.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('snapshot/get_random')
	+ def snapshot_get_random(self):
	+ """Finds a random snapshot id.
	+
	+ Returns:
	+ a sha1_git
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/add')
	+ def origin_visit_add(self, origin, date, type):
	+ """Add an origin_visit for the origin at ts with status 'ongoing'.
	+
	+ Args:
	+ origin (str): visited origin's identifier or URL
	+ date (Union[str,datetime]): timestamp of such visit
	+ type (str): the type of loader used for the visit (hg, git, ...)
	+
	+ Returns:
	+ dict: dictionary with keys origin and visit where:
	+
	+ - origin: origin identifier
	+ - visit: the visit identifier for the new visit occurrence
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/update')
	+ def origin_visit_update(self, origin, visit_id, status=None,
	+ metadata=None, snapshot=None):
	+ """Update an origin_visit's status.
	+
	+ Args:
	+ origin (str): visited origin's URL
	+ visit_id: Visit's id
	+ status: Visit's new status
	+ metadata: Data associated to the visit
	+ snapshot (sha1_git): identifier of the snapshot to add to
	+ the visit
	+
	+ Returns:
	+ None
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/upsert')
	+ def origin_visit_upsert(self, visits):
	+ """Add a origin_visits with a specific id and with all its data.
	+ If there is already an origin_visit with the same
	+ `(origin_id, visit_id)`, overwrites it.
	+
	+ Args:
	+ visits: iterable of dicts with keys:
	+
	+ - origin: dict with keys either `id` or `url`
	+ - visit: origin visit id
	+ - date: timestamp of such visit
	+ - status: Visit's new status
	+ - metadata: Data associated to the visit
	+ - snapshot: identifier of the snapshot to add to
	+ the visit
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/get')
	+ def origin_visit_get(self, origin, last_visit=None, limit=None):
	+ """Retrieve all the origin's visit's information.
	+
	+ Args:
	+ origin (str): The visited origin
	+ last_visit: Starting point from which listing the next visits
	+ Default to None
	+ limit (int): Number of results to return from the last visit.
	+ Default to None
	+
	+ Yields:
	+ List of visits.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/find_by_date')
	+ def origin_visit_find_by_date(self, origin, visit_date):
	+ """Retrieves the origin visit whose date is closest to the provided
	+ timestamp.
	+ In case of a tie, the visit with largest id is selected.
	+
	+ Args:
	+ origin (str): The occurrence's origin (URL).
	+ target (datetime): target timestamp
	+
	+ Returns:
	+ A visit.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/getby')
	+ def origin_visit_get_by(self, origin, visit):
	+ """Retrieve origin visit's information.
	+
	+ Args:
	+ origin: The occurrence's origin (identifier).
	+
	+ Returns:
	+ The information on that particular (origin, visit) or None if
	+ it does not exist
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/get_latest')
	+ def origin_visit_get_latest(
	+ self, origin, allowed_statuses=None, require_snapshot=False):
	+ """Get the latest origin visit for the given origin, optionally
	+ looking only for those with one of the given allowed_statuses
	+ or for those with a known snapshot.
	+
	+ Args:
	+ origin (str): the origin's URL
	+ allowed_statuses (list of str): list of visit statuses considered
	+ to find the latest visit. For instance,
	+ ``allowed_statuses=['full']`` will only consider visits that
	+ have successfully run to completion.
	+ require_snapshot (bool): If True, only a visit with a snapshot
	+ will be returned.
	+
	+ Returns:
	+ dict: a dict with the following keys:
	+
	+ - origin: the URL of the origin
	+ - visit: origin visit id
	+ - type: type of loader used for the visit
	+ - date: timestamp of such visit
	+ - status: Visit's new status
	+ - metadata: Data associated to the visit
	+ - snapshot (Optional[sha1_git]): identifier of the snapshot
	+ associated to the visit
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/visit/get_random')
	+ def origin_visit_get_random(
	+ self, type: str) -> Optional[Dict[str, Any]]:
	+ """Randomly select one successful origin visit with <type>
	+ made in the last 3 months.
	+
	+ Returns:
	+ dict representing an origin visit, in the same format as
	+ :py:meth:`origin_visit_get`.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('object/find_by_sha1_git')
	+ def object_find_by_sha1_git(self, ids):
	+ """Return the objects found with the given ids.
	+
	+ Args:
	+ ids: a generator of sha1_gits
	+
	+ Returns:
	+ dict: a mapping from id to the list of objects found. Each object
	+ found is itself a dict with keys:
	+
	+ - sha1_git: the input id
	+ - type: the type of object found
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/get')
	+ def origin_get(self, origins):
	+ """Return origins, either all identified by their ids or all
	+ identified by tuples (type, url).
	+
	+ If the url is given and the type is omitted, one of the origins with
	+ that url is returned.
	+
	+ Args:
	+ origin: a list of dictionaries representing the individual
	+ origins to find.
	+ These dicts have the key url:
	+
	+ - url (bytes): the url the origin points to
	+
	+ Returns:
	+ dict: the origin dictionary with the keys:
	+
	+ - id: origin's id
	+ - url: origin's url
	+
	+ Raises:
	+ ValueError: if the url or the id don't exist.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/get_sha1')
	+ def origin_get_by_sha1(self, sha1s):
	+ """Return origins, identified by the sha1 of their URLs.
	+
	+ Args:
	+ sha1s (list[bytes]): a list of sha1s
	+
	+ Yields:
	+ dicts containing origin information as returned
	+ by :meth:`swh.storage.storage.Storage.origin_get`, or None if an
	+ origin matching the sha1 is not found.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/get_range')
	+ def origin_get_range(self, origin_from=1, origin_count=100):
	+ """Retrieve ``origin_count`` origins whose ids are greater
	+ or equal than ``origin_from``.
	+
	+ Origins are sorted by id before retrieving them.
	+
	+ Args:
	+ origin_from (int): the minimum id of origins to retrieve
	+ origin_count (int): the maximum number of origins to retrieve
	+
	+ Yields:
	+ dicts containing origin information as returned
	+ by :meth:`swh.storage.storage.Storage.origin_get`.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/list')
	+ def origin_list(
	+ self, page_token: Optional[str] = None, limit: int = 100) -> dict:
	+ """Returns the list of origins
	+
	+ Args:
	+ page_token: opaque token used for pagination.
	+ limit: the maximum number of results to return
	+
	+ Returns:
	+ dict: dict with the following keys:
	+ - next_page_token (str, optional): opaque token to be used as
	+ `page_token` for retrieving the next page. if absent, there is
	+ no more pages to gather.
	+ - origins (List[dict]): list of origins, as returned by
	+ `origin_get`.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/search')
	+ def origin_search(self, url_pattern, offset=0, limit=50,
	+ regexp=False, with_visit=False):
	+ """Search for origins whose urls contain a provided string pattern
	+ or match a provided regular expression.
	+ The search is performed in a case insensitive way.
	+
	+ Args:
	+ url_pattern (str): the string pattern to search for in origin urls
	+ offset (int): number of found origins to skip before returning
	+ results
	+ limit (int): the maximum number of found origins to return
	+ regexp (bool): if True, consider the provided pattern as a regular
	+ expression and return origins whose urls match it
	+ with_visit (bool): if True, filter out origins with no visit
	+
	+ Yields:
	+ dicts containing origin information as returned
	+ by :meth:`swh.storage.storage.Storage.origin_get`.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/count')
	+ def origin_count(self, url_pattern, regexp=False,
	+ with_visit=False):
	+ """Count origins whose urls contain a provided string pattern
	+ or match a provided regular expression.
	+ The pattern search in origin urls is performed in a case insensitive
	+ way.
	+
	+ Args:
	+ url_pattern (str): the string pattern to search for in origin urls
	+ regexp (bool): if True, consider the provided pattern as a regular
	+ expression and return origins whose urls match it
	+ with_visit (bool): if True, filter out origins with no visit
	+
	+ Returns:
	+ int: The number of origins matching the search criterion.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/add_multi')
	+ def origin_add(self, origins):
	+ """Add origins to the storage
	+
	+ Args:
	+ origins: list of dictionaries representing the individual origins,
	+ with the following keys:
	+
	+ - type: the origin type ('git', 'svn', 'deb', ...)
	+ - url (bytes): the url the origin points to
	+
	+ Returns:
	+ list: given origins as dict updated with their id
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/add')
	+ def origin_add_one(self, origin):
	+ """Add origin to the storage
	+
	+ Args:
	+ origin: dictionary representing the individual origin to add. This
	+ dict has the following keys:
	+
	+ - type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
	+ - url (bytes): the url the origin points to
	+
	+ Returns:
	+ the id of the added origin, or of the identical one that already
	+ exists.
	+
	+ """
	+ ...
	+
	+ def stat_counters(self):
	+ """compute statistics about the number of tuples in various tables
	+
	+ Returns:
	+ dict: a dictionary mapping textual labels (e.g., content) to
	+ integer values (e.g., the number of tuples in table content)
	+
	+ """
	+ ...
	+
	+ def refresh_stat_counters(self):
	+ """Recomputes the statistics for `stat_counters`."""
	+ ...
	+
	+ @remote_api_endpoint('origin/metadata/add')
	+ def origin_metadata_add(self, origin_url, ts, provider, tool, metadata):
	+ """ Add an origin_metadata for the origin at ts with provenance and
	+ metadata.
	+
	+ Args:
	+ origin_url (str): the origin url for which the metadata is added
	+ ts (datetime): timestamp of the found metadata
	+ provider (int): the provider of metadata (ex:'hal')
	+ tool (int): tool used to extract metadata
	+ metadata (jsonb): the metadata retrieved at the time and location
	+ """
	+ ...
	+
	+ @remote_api_endpoint('origin/metadata/get')
	+ def origin_metadata_get_by(self, origin_url, provider_type=None):
	+ """Retrieve list of all origin_metadata entries for the origin_id
	+
	+ Args:
	+ origin_url (str): the origin's URL
	+ provider_type (str): (optional) type of provider
	+
	+ Returns:
	+ list of dicts: the origin_metadata dictionary with the keys:
	+
	+ - origin_id (int): origin's id
	+ - discovery_date (datetime): timestamp of discovery
	+ - tool_id (int): metadata's extracting tool
	+ - metadata (jsonb)
	+ - provider_id (int): metadata's provider
	+ - provider_name (str)
	+ - provider_type (str)
	+ - provider_url (str)
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('tool/add')
	+ def tool_add(self, tools):
	+ """Add new tools to the storage.
	+
	+ Args:
	+ tools (iterable of :class:`dict`): Tool information to add to
	+ storage. Each tool is a :class:`dict` with the following keys:
	+
	+ - name (:class:`str`): name of the tool
	+ - version (:class:`str`): version of the tool
	+ - configuration (:class:`dict`): configuration of the tool,
	+ must be json-encodable
	+
	+ Returns:
	+ :class:`dict`: All the tools inserted in storage
	+ (including the internal ``id``). The order of the list is not
	+ guaranteed to match the order of the initial list.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('tool/data')
	+ def tool_get(self, tool):
	+ """Retrieve tool information.
	+
	+ Args:
	+ tool (dict): Tool information we want to retrieve from storage.
	+ The dicts have the same keys as those used in :func:`tool_add`.
	+
	+ Returns:
	+ dict: The full tool information if it exists (``id`` included),
	+ None otherwise.
	+
	+ """
	+ ...
	+
	+ @remote_api_endpoint('provider/add')
	+ def metadata_provider_add(self, provider_name, provider_type, provider_url,
	+ metadata):
	+ """Add a metadata provider.
	+
	+ Args:
	+ provider_name (str): Its name
	+ provider_type (str): Its type (eg. `'deposit-client'`)
	+ provider_url (str): Its URL
	+ metadata: JSON-encodable object
	+
	+ Returns:
	+ int: an identifier of the provider
	+ """
	+ ...
	+
	+ @remote_api_endpoint('provider/get')
	+ def metadata_provider_get(self, provider_id):
	+ """Get a metadata provider
	+
	+ Args:
	+ provider_id: Its identifier, as given by `metadata_provider_add`.
	+
	+ Returns:
	+ dict: same as `metadata_provider_add`;
	+ or None if it does not exist.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('provider/getby')
	+ def metadata_provider_get_by(self, provider):
	+ """Get a metadata provider
	+
	+ Args:
	+ provider (dict): A dictionary with keys:
	+ * provider_name: Its name
	+ * provider_url: Its URL
	+
	+ Returns:
	+ dict: same as `metadata_provider_add`;
	+ or None if it does not exist.
	+ """
	+ ...
	+
	+ @remote_api_endpoint('algos/diff_directories')
	+ def diff_directories(self, from_dir, to_dir, track_renaming=False):
	+ """Compute the list of file changes introduced between two arbitrary
	+ directories (insertion / deletion / modification / renaming of files).
	+
	+ Args:
	+ from_dir (bytes): identifier of the directory to compare from
	+ to_dir (bytes): identifier of the directory to compare to
	+ track_renaming (bool): whether or not to track files renaming
	+
	+ Returns:
	+ A list of dict describing the introduced file changes
	+ (see :func:`swh.storage.algos.diff.diff_directories`
	+ for more details).
	+ """
	+ ...
	+
	+ @remote_api_endpoint('algos/diff_revisions')
	+ def diff_revisions(self, from_rev, to_rev, track_renaming=False):
	+ """Compute the list of file changes introduced between two arbitrary
	+ revisions (insertion / deletion / modification / renaming of files).
	+
	+ Args:
	+ from_rev (bytes): identifier of the revision to compare from
	+ to_rev (bytes): identifier of the revision to compare to
	+ track_renaming (bool): whether or not to track files renaming
	+
	+ Returns:
	+ A list of dict describing the introduced file changes
	+ (see :func:`swh.storage.algos.diff.diff_directories`
	+ for more details).
	+ """
	+ ...
	+
	+ @remote_api_endpoint('algos/diff_revision')
	+ def diff_revision(self, revision, track_renaming=False):
	+ """Compute the list of file changes introduced by a specific revision
	+ (insertion / deletion / modification / renaming of files) by comparing
	+ it against its first parent.
	+
	+ Args:
	+ revision (bytes): identifier of the revision from which to
	+ compute the list of files changes
	+ track_renaming (bool): whether or not to track files renaming
	+
	+ Returns:
	+ A list of dict describing the introduced file changes
	+ (see :func:`swh.storage.algos.diff.diff_directories`
	+ for more details).
	+ """
	+ ...
	diff --git a/swh/storage/storage.py b/swh/storage/storage.py
	--- a/swh/storage/storage.py
	+++ b/swh/storage/storage.py
	@@ -17,7 +17,6 @@
	import psycopg2
	import psycopg2.pool

	-from swh.core.api import remote_api_endpoint
	from swh.model.model import SHA1_SIZE
	from swh.model.hashutil import ALGORITHMS, hash_to_bytes, hash_to_hex
	from swh.objstorage import get_objstorage
	@@ -99,11 +98,9 @@
	if db:
	self.put_db(db)

	- @remote_api_endpoint('check_config')
	@timed
	@db_transaction()
	def check_config(self, *, check_write, db=None, cur=None):
	- """Check that the storage is configured and ready to go."""

	if not self.objstorage.check_config(check_write=check_write):
	return False
	@@ -232,48 +229,10 @@
	# move metadata in place
	db.skipped_content_add_from_temp(cur)

	- @remote_api_endpoint('content/add')
	@timed
	@process_metrics
	@db_transaction()
	def content_add(self, content, db=None, cur=None):
	- """Add content blobs to the storage
	-
	- Note: in case of DB errors, objects might have already been added to
	- the object storage and will not be removed. Since addition to the
	- object storage is idempotent, that should not be a problem.
	-
	- Args:
	- contents (iterable): iterable of dictionaries representing
	- individual pieces of content to add. Each dictionary has the
	- following keys:
	-
	- - data (bytes): the actual content
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	- - reason (str): if status = absent, the reason why
	- - origin (int): if status = absent, the origin we saw the
	- content in
	-
	- Raises:
	-
	- In case of errors, nothing is stored in the db (in the
	- objstorage, it could though). The following exceptions can
	- occur:
	-
	- - HashCollision in case of collision
	- - Any other exceptions raise by the db
	-
	- Returns:
	- Summary dict with the following key and associated values:
	-
	- content:add: New contents added
	- content:add:bytes: Sum of the contents' length data
	- skipped_content:add: New skipped contents (no data) added
	- """
	content = [dict(c.items()) for c in content] # semi-shallow copy
	now = datetime.datetime.now(tz=datetime.timezone.utc)
	for item in content:
	@@ -329,35 +288,15 @@
	summary['content:add:bytes'] = content_bytes_added
	return summary

	- @remote_api_endpoint('content/update')
	@timed
	@db_transaction()
	def content_update(self, content, keys=[], db=None, cur=None):
	- """Update content blobs to the storage. Does nothing for unknown
	- contents or skipped ones.
	-
	- Args:
	- content (iterable): iterable of dictionaries representing
	- individual pieces of content to update. Each dictionary has the
	- following keys:
	-
	- - data (bytes): the actual content
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	-
	- keys (list): List of keys (str) whose values needs an update, e.g.,
	- new hash column
	-
	- """
	# TODO: Add a check on input keys. How to properly implement
	# this? We don't know yet the new columns.

	if self.journal_writer:
	raise NotImplementedError(
	- 'content_update is not yet support with a journal_writer.')
	+ 'content_update is not yet supported with a journal_writer.')

	db.mktemp('content', cur)
	select_keys = list(set(db.content_get_metadata_keys).union(set(keys)))
	@@ -365,36 +304,10 @@
	db.content_update_from_temp(keys_to_update=keys,
	cur=cur)

	- @remote_api_endpoint('content/add_metadata')
	@timed
	@process_metrics
	@db_transaction()
	def content_add_metadata(self, content, db=None, cur=None):
	- """Add content metadata to the storage (like `content_add`, but
	- without inserting to the objstorage).
	-
	- Args:
	- content (iterable): iterable of dictionaries representing
	- individual pieces of content to add. Each dictionary has the
	- following keys:
	-
	- - length (int): content length (default: -1)
	- - one key for each checksum algorithm in
	- :data:`swh.model.hashutil.ALGORITHMS`, mapped to the
	- corresponding checksum
	- - status (str): one of visible, hidden, absent
	- - reason (str): if status = absent, the reason why
	- - origin (int): if status = absent, the origin we saw the
	- content in
	- - ctime (datetime): time of insertion in the archive
	-
	- Returns:
	- Summary dict with the following key and associated values:
	-
	- content:add: New contents added
	- skipped_content:add: New skipped contents (no data) added
	- """
	-
	content = [self._normalize_content(c) for c in content]
	for c in content:
	self._validate_content(c)
	@@ -413,31 +326,8 @@

	return summary

	- @remote_api_endpoint('content/data')
	@timed
	def content_get(self, content):
	- """Retrieve in bulk contents and their data.
	-
	- This generator yields exactly as many items than provided sha1
	- identifiers, but callers should not assume this will always be true.
	-
	- It may also yield `None` values in case an object was not found.
	-
	- Args:
	- content: iterables of sha1
	-
	- Yields:
	- Dict[str, bytes]: Generates streams of contents as dict with their
	- raw data:
	-
	- - sha1 (bytes): content id
	- - data (bytes): content's raw data
	-
	- Raises:
	- ValueError in case of too much contents are required.
	- cf. BULK_BLOCK_CONTENT_LEN_MAX
	-
	- """
	# FIXME: Make this method support slicing the `data`.
	if len(content) > BULK_BLOCK_CONTENT_LEN_MAX:
	raise ValueError(
	@@ -452,30 +342,9 @@

	yield {'sha1': obj_id, 'data': data}

	- @remote_api_endpoint('content/range')
	@timed
	@db_transaction()
	def content_get_range(self, start, end, limit=1000, db=None, cur=None):
	- """Retrieve contents within range [start, end] bound by limit.
	-
	- Note that this function may return more than one blob per hash. The
	- limit is enforced with multiplicity (ie. two blobs with the same hash
	- will count twice toward the limit).
	-
	- Args:
	- start (bytes): Starting identifier range (expected smaller
	- than end)
	- end (bytes): Ending identifier range (expected larger
	- than start)
	- limit (int): Limit result (default to 1000)
	-
	- Returns:
	- a dict with keys:
	- - contents [dict]: iterable of contents in between the range.
	- - next (bytes): There remains content in the range
	- starting from this next sha1
	-
	- """
	if limit is None:
	raise ValueError('Development error: limit should not be None')
	contents = []
	@@ -493,31 +362,11 @@
	'next': next_content,
	}

	- @remote_api_endpoint('content/partition')
	@timed
	@db_transaction()
	def content_get_partition(
	self, partition_id: int, nb_partitions: int, limit: int = 1000,
	page_token: str = None, db=None, cur=None):
	- """Splits contents into nb_partitions, and returns one of these based on
	- partition_id (which must be in [0, nb_partitions-1])
	-
	- There is no guarantee on how the partitioning is done, or the
	- result order.
	-
	- Args:
	- partition_id (int): index of the partition to fetch
	- nb_partitions (int): total number of partitions to split into
	- limit (int): Limit result (default to 1000)
	- page_token (Optional[str]): opaque token used for pagination.
	-
	- Returns:
	- a dict with keys:
	- - contents (List[dict]): iterable of contents in the partition.
	- - next_page_token (Optional[str]): opaque token to be used as
	- `page_token` for retrieving the next page. if absent, there is
	- no more pages to gather.
	- """
	if limit is None:
	raise ValueError('Development error: limit should not be None')
	(start, end) = get_partition_bounds_bytes(
	@@ -535,53 +384,20 @@
	result2['next_page_token'] = hash_to_hex(result['next'])
	return result2

	- @remote_api_endpoint('content/metadata')
	@timed
	@db_transaction(statement_timeout=500)
	def content_get_metadata(
	self, contents: List[bytes],
	db=None, cur=None) -> Dict[bytes, List[Dict]]:
	- """Retrieve content metadata in bulk
	-
	- Args:
	- content: iterable of content identifiers (sha1)
	-
	- Returns:
	- a dict with keys the content's sha1 and the associated value
	- either the existing content's metadata or None if the content does
	- not exist.
	-
	- """
	result: Dict[bytes, List[Dict]] = {sha1: [] for sha1 in contents}
	for row in db.content_get_metadata_from_sha1s(contents, cur):
	content_meta = dict(zip(db.content_get_metadata_keys, row))
	result[content_meta['sha1']].append(content_meta)
	return result

	- @remote_api_endpoint('content/missing')
	@timed
	@db_transaction_generator()
	def content_missing(self, content, key_hash='sha1', db=None, cur=None):
	- """List content missing from storage
	-
	- Args:
	- content ([dict]): iterable of dictionaries whose keys are
	- either 'length' or an item of
	- :data:`swh.model.hashutil.ALGORITHMS`;
	- mapped to the corresponding checksum
	- (or length).
	-
	- key_hash (str): name of the column to use as hash id
	- result (default: 'sha1')
	-
	- Returns:
	- iterable ([bytes]): missing content ids (as per the
	- key_hash column)
	-
	- Raises:
	- TODO: an exception when we get a hash collision.
	-
	- """
	keys = db.content_hash_keys

	if key_hash not in keys:
	@@ -595,77 +411,27 @@
	for obj in db.content_missing_from_list(content, cur):
	yield obj[key_hash_idx]

	- @remote_api_endpoint('content/missing/sha1')
	@timed
	@db_transaction_generator()
	def content_missing_per_sha1(self, contents, db=None, cur=None):
	- """List content missing from storage based only on sha1.
	-
	- Args:
	- contents: Iterable of sha1 to check for absence.
	-
	- Returns:
	- iterable: missing ids
	-
	- Raises:
	- TODO: an exception when we get a hash collision.
	-
	- """
	for obj in db.content_missing_per_sha1(contents, cur):
	yield obj[0]

	- @remote_api_endpoint('content/missing/sha1_git')
	@timed
	@db_transaction_generator()
	def content_missing_per_sha1_git(self, contents, db=None, cur=None):
	- """List content missing from storage based only on sha1_git.
	-
	- Args:
	- contents (Iterable): An iterable of content id (sha1_git)
	-
	- Yields:
	- missing contents sha1_git
	- """
	for obj in db.content_missing_per_sha1_git(contents, cur):
	yield obj[0]

	- @remote_api_endpoint('content/skipped/missing')
	@timed
	@db_transaction_generator()
	def skipped_content_missing(self, contents, db=None, cur=None):
	- """List skipped_content missing from storage
	-
	- Args:
	- content: iterable of dictionaries containing the data for each
	- checksum algorithm.
	-
	- Returns:
	- iterable: missing signatures
	-
	- """
	for content in db.skipped_content_missing(contents, cur):
	yield dict(zip(db.content_hash_keys, content))

	- @remote_api_endpoint('content/present')
	@timed
	@db_transaction()
	def content_find(self, content, db=None, cur=None):
	- """Find a content hash in db.
	-
	- Args:
	- content: a dictionary representing one content hash, mapping
	- checksum algorithm names (see swh.model.hashutil.ALGORITHMS) to
	- checksum values
	-
	- Returns:
	- a triplet (sha1, sha1_git, sha256) if the content exist
	- or None otherwise.
	-
	- Raises:
	- ValueError: in case the key of the dictionary is not sha1, sha1_git
	- nor sha256.
	-
	- """
	if not set(content).intersection(ALGORITHMS):
	raise ValueError('content keys must contain at least one of: '
	'sha1, sha1_git, sha256, blake2s256')
	@@ -678,46 +444,15 @@
	return [dict(zip(db.content_find_cols, content))
	for content in contents]

	- @remote_api_endpoint('content/get_random')
	@timed
	@db_transaction()
	def content_get_random(self, db=None, cur=None):
	- """Finds a random content id.
	-
	- Returns:
	- a sha1_git
	- """
	return db.content_get_random(cur)

	- @remote_api_endpoint('directory/add')
	@timed
	@process_metrics
	@db_transaction()
	def directory_add(self, directories, db=None, cur=None):
	- """Add directories to the storage
	-
	- Args:
	- directories (iterable): iterable of dictionaries representing the
	- individual directories to add. Each dict has the following
	- keys:
	-
	- - id (sha1_git): the id of the directory to add
	- - entries (list): list of dicts for each entry in the
	- directory. Each dict has the following keys:
	-
	- - name (bytes)
	- - type (one of 'file', 'dir', 'rev'): type of the
	- directory entry (file, directory, revision)
	- - target (sha1_git): id of the object pointed at by the
	- directory entry
	- - perms (int): entry permissions
	-
	- Returns:
	- Summary dict of keys with associated count as values:
	-
	- directory:add: Number of directories actually added
	-
	- """
	directories = list(directories)
	summary = {'directory:add': 0}

	@@ -778,39 +513,15 @@

	return summary

	- @remote_api_endpoint('directory/missing')
	@timed
	@db_transaction_generator()
	def directory_missing(self, directories, db=None, cur=None):
	- """List directories missing from storage
	-
	- Args:
	- directories (iterable): an iterable of directory ids
	-
	- Yields:
	- missing directory ids
	-
	- """
	for obj in db.directory_missing_from_list(directories, cur):
	yield obj[0]

	- @remote_api_endpoint('directory/ls')
	@timed
	@db_transaction_generator(statement_timeout=20000)
	def directory_ls(self, directory, recursive=False, db=None, cur=None):
	- """Get entries for one directory.
	-
	- Args:
	- - directory: the directory to list entries from.
	- - recursive: if flag on, this list recursively from this directory.
	-
	- Returns:
	- List of entries for such directory.
	-
	- If `recursive=True`, names in the path of a dir/file not at the
	- root are concatenated with a slash (`/`).
	-
	- """
	if recursive:
	res_gen = db.directory_walk(directory, cur=cur)
	else:
	@@ -819,77 +530,22 @@
	for line in res_gen:
	yield dict(zip(db.directory_ls_cols, line))

	- @remote_api_endpoint('directory/path')
	@timed
	@db_transaction(statement_timeout=2000)
	def directory_entry_get_by_path(self, directory, paths, db=None, cur=None):
	- """Get the directory entry (either file or dir) from directory with path.
	-
	- Args:
	- - directory: sha1 of the top level directory
	- - paths: path to lookup from the top level directory. From left
	- (top) to right (bottom).
	-
	- Returns:
	- The corresponding directory entry if found, None otherwise.
	-
	- """
	res = db.directory_entry_get_by_path(directory, paths, cur)
	if res:
	return dict(zip(db.directory_ls_cols, res))

	- @remote_api_endpoint('directory/get_random')
	@timed
	@db_transaction()
	def directory_get_random(self, db=None, cur=None):
	- """Finds a random directory id.
	-
	- Returns:
	- a sha1_git
	- """
	return db.directory_get_random(cur)

	- @remote_api_endpoint('revision/add')
	@timed
	@process_metrics
	@db_transaction()
	def revision_add(self, revisions, db=None, cur=None):
	- """Add revisions to the storage
	-
	- Args:
	- revisions (Iterable[dict]): iterable of dictionaries representing
	- the individual revisions to add. Each dict has the following
	- keys:
	-
	- - id (:class:`sha1_git`): id of the revision to add
	- - date (:class:`dict`): date the revision was written
	- - committer_date (:class:`dict`): date the revision got
	- added to the origin
	- - type (one of 'git', 'tar'): type of the
	- revision added
	- - directory (:class:`sha1_git`): the directory the
	- revision points at
	- - message (:class:`bytes`): the message associated with
	- the revision
	- - author (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	- - committer (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	- - metadata (:class:`jsonb`): extra information as
	- dictionary
	- - synthetic (:class:`bool`): revision's nature (tarball,
	- directory creates synthetic revision`)
	- - parents (:class:`list[sha1_git]`): the parents of
	- this revision
	-
	- date dictionaries have the form defined in :mod:`swh.model`.
	-
	- Returns:
	- Summary dict of keys with associated count as values
	-
	- revision:add: New objects actually stored in db
	-
	- """
	revisions = list(revisions)
	summary = {'revision:add': 0}

	@@ -925,39 +581,18 @@

	return {'revision:add': len(revisions_missing)}

	- @remote_api_endpoint('revision/missing')
	@timed
	@db_transaction_generator()
	def revision_missing(self, revisions, db=None, cur=None):
	- """List revisions missing from storage
	-
	- Args:
	- revisions (iterable): revision ids
	-
	- Yields:
	- missing revision ids
	-
	- """
	if not revisions:
	return

	for obj in db.revision_missing_from_list(revisions, cur):
	yield obj[0]

	- @remote_api_endpoint('revision')
	@timed
	@db_transaction_generator(statement_timeout=1000)
	def revision_get(self, revisions, db=None, cur=None):
	- """Get all revisions from storage
	-
	- Args:
	- revisions: an iterable of revision ids
	-
	- Returns:
	- iterable: an iterable of revisions as dictionaries (or None if the
	- revision doesn't exist)
	-
	- """
	for line in db.revision_get_from_list(revisions, cur):
	data = converters.db_to_revision(
	dict(zip(db.revision_get_cols, line))
	@@ -967,20 +602,9 @@
	continue
	yield data

	- @remote_api_endpoint('revision/log')
	@timed
	@db_transaction_generator(statement_timeout=2000)
	def revision_log(self, revisions, limit=None, db=None, cur=None):
	- """Fetch revision entry from the given root revisions.
	-
	- Args:
	- revisions: array of root revision to lookup
	- limit: limitation on the output result. Default to None.
	-
	- Yields:
	- List of revision log from such revisions root.
	-
	- """
	for line in db.revision_log(revisions, limit, cur):
	data = converters.db_to_revision(
	dict(zip(db.revision_get_cols, line))
	@@ -990,64 +614,21 @@
	continue
	yield data

	- @remote_api_endpoint('revision/shortlog')
	@timed
	@db_transaction_generator(statement_timeout=2000)
	def revision_shortlog(self, revisions, limit=None, db=None, cur=None):
	- """Fetch the shortlog for the given revisions
	-
	- Args:
	- revisions: list of root revisions to lookup
	- limit: depth limitation for the output
	-
	- Yields:
	- a list of (id, parents) tuples.
	-
	- """

	yield from db.revision_shortlog(revisions, limit, cur)

	- @remote_api_endpoint('revision/get_random')
	@timed
	@db_transaction()
	def revision_get_random(self, db=None, cur=None):
	- """Finds a random revision id.
	-
	- Returns:
	- a sha1_git
	- """
	return db.revision_get_random(cur)

	- @remote_api_endpoint('release/add')
	@timed
	@process_metrics
	@db_transaction()
	def release_add(self, releases, db=None, cur=None):
	- """Add releases to the storage
	-
	- Args:
	- releases (Iterable[dict]): iterable of dictionaries representing
	- the individual releases to add. Each dict has the following
	- keys:
	-
	- - id (:class:`sha1_git`): id of the release to add
	- - revision (:class:`sha1_git`): id of the revision the
	- release points to
	- - date (:class:`dict`): the date the release was made
	- - name (:class:`bytes`): the name of the release
	- - comment (:class:`bytes`): the comment associated with
	- the release
	- - author (:class:`Dict[str, bytes]`): dictionary with
	- keys: name, fullname, email
	-
	- the date dictionary has the form defined in :mod:`swh.model`.
	-
	- Returns:
	- Summary dict of keys with associated count as values
	-
	- release:add: New objects contents actually stored in db
	-
	- """
	releases = list(releases)
	summary = {'release:add': 0}

	@@ -1079,90 +660,33 @@

	return {'release:add': len(releases_missing)}

	- @remote_api_endpoint('release/missing')
	@timed
	@db_transaction_generator()
	def release_missing(self, releases, db=None, cur=None):
	- """List releases missing from storage
	-
	- Args:
	- releases: an iterable of release ids
	-
	- Returns:
	- a list of missing release ids
	-
	- """
	if not releases:
	return

	for obj in db.release_missing_from_list(releases, cur):
	yield obj[0]

	- @remote_api_endpoint('release')
	@timed
	@db_transaction_generator(statement_timeout=500)
	def release_get(self, releases, db=None, cur=None):
	- """Given a list of sha1, return the releases's information
	-
	- Args:
	- releases: list of sha1s
	-
	- Yields:
	- dicts with the same keys as those given to `release_add`
	- (or ``None`` if a release does not exist)
	-
	- """
	for release in db.release_get_from_list(releases, cur):
	data = converters.db_to_release(
	dict(zip(db.release_get_cols, release))
	)
	yield data if data['target_type'] else None

	- @remote_api_endpoint('release/get_random')
	@timed
	@db_transaction()
	def release_get_random(self, db=None, cur=None):
	- """Finds a random release id.
	-
	- Returns:
	- a sha1_git
	- """
	return db.release_get_random(cur)

	- @remote_api_endpoint('snapshot/add')
	@timed
	@process_metrics
	@db_transaction()
	def snapshot_add(self, snapshots, db=None, cur=None):
	- """Add snapshots to the storage.
	-
	- Args:
	- snapshot ([dict]): the snapshots to add, containing the
	- following keys:
	-
	- - id (:class:`bytes`): id of the snapshot
	- - branches (:class:`dict`): branches the snapshot contains,
	- mapping the branch name (:class:`bytes`) to the branch target,
	- itself a :class:`dict` (or ``None`` if the branch points to an
	- unknown object)
	-
	- - target_type (:class:`str`): one of ``content``,
	- ``directory``, ``revision``, ``release``,
	- ``snapshot``, ``alias``
	- - target (:class:`bytes`): identifier of the target
	- (currently a ``sha1_git`` for all object kinds, or the name
	- of the target branch for aliases)
	-
	- Raises:
	- ValueError: if the origin or visit id does not exist.
	-
	- Returns:
	-
	- Summary dict of keys with associated count as values
	-
	- snapshot:add: Count of object actually stored in db
	-
	- """
	created_temp_table = False

	count = 0
	@@ -1195,78 +719,21 @@

	return {'snapshot:add': count}

	- @remote_api_endpoint('snapshot/missing')
	@timed
	@db_transaction_generator()
	def snapshot_missing(self, snapshots, db=None, cur=None):
	- """List snapshots missing from storage
	-
	- Args:
	- snapshots (iterable): an iterable of snapshot ids
	-
	- Yields:
	- missing snapshot ids
	-
	- """
	for obj in db.snapshot_missing_from_list(snapshots, cur):
	yield obj[0]

	- @remote_api_endpoint('snapshot')
	@timed
	@db_transaction(statement_timeout=2000)
	def snapshot_get(self, snapshot_id, db=None, cur=None):
	- """Get the content, possibly partial, of a snapshot with the given id
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the method :meth:`snapshot_get_branches`
	- should be used instead.
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	- Returns:
	- dict: a dict with three keys:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	- """

	return self.snapshot_get_branches(snapshot_id, db=db, cur=cur)

	- @remote_api_endpoint('snapshot/by_origin_visit')
	@timed
	@db_transaction(statement_timeout=2000)
	def snapshot_get_by_origin_visit(self, origin, visit, db=None, cur=None):
	- """Get the content, possibly partial, of a snapshot for the given origin visit
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the method :meth:`snapshot_get_branches`
	- should be used instead.
	-
	- Args:
	- origin (int): the origin identifier
	- visit (int): the visit identifier
	- Returns:
	- dict: None if the snapshot does not exist;
	- a dict with three keys otherwise:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	-
	- """
	snapshot_id = db.snapshot_get_by_origin_visit(origin, visit, cur)

	if snapshot_id:
	@@ -1274,38 +741,10 @@

	return None

	- @remote_api_endpoint('snapshot/latest')
	@timed
	@db_transaction(statement_timeout=4000)
	def snapshot_get_latest(self, origin, allowed_statuses=None, db=None,
	cur=None):
	- """Get the content, possibly partial, of the latest snapshot for the
	- given origin, optionally only from visits that have one of the given
	- allowed_statuses
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- .. warning:: At most 1000 branches contained in the snapshot will be
	- returned for performance reasons. In order to browse the whole
	- set of branches, the method :meth:`snapshot_get_branches`
	- should be used instead.
	-
	- Args:
	- origin (str): the origin's URL
	- allowed_statuses (list of str): list of visit statuses considered
	- to find the latest snapshot for the visit. For instance,
	- ``allowed_statuses=['full']`` will only consider visits that
	- have successfully run to completion.
	- Returns:
	- dict: a dict with three keys:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than 1000
	- branches.
	- """
	if isinstance(origin, int):
	origin = self.origin_get({'id': origin}, db=db, cur=cur)
	if not origin:
	@@ -1323,53 +762,17 @@
	'last origin visit references an unknown snapshot')
	return snapshot

	- @remote_api_endpoint('snapshot/count_branches')
	@timed
	@db_transaction(statement_timeout=2000)
	def snapshot_count_branches(self, snapshot_id, db=None, cur=None):
	- """Count the number of branches in the snapshot with the given id
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	-
	- Returns:
	- dict: A dict whose keys are the target types of branches and
	- values their corresponding amount
	- """
	return dict([bc for bc in
	db.snapshot_count_branches(snapshot_id, cur)])

	- @remote_api_endpoint('snapshot/get_branches')
	@timed
	@db_transaction(statement_timeout=2000)
	def snapshot_get_branches(self, snapshot_id, branches_from=b'',
	branches_count=1000, target_types=None,
	db=None, cur=None):
	- """Get the content, possibly partial, of a snapshot with the given id
	-
	- The branches of the snapshot are iterated in the lexicographical
	- order of their names.
	-
	- Args:
	- snapshot_id (bytes): identifier of the snapshot
	- branches_from (bytes): optional parameter used to skip branches
	- whose name is lesser than it before returning them
	- branches_count (int): optional parameter used to restrain
	- the amount of returned branches
	- target_types (list): optional parameter used to filter the
	- target types of branch to return (possible values that can be
	- contained in that list are `'content', 'directory',
	- 'revision', 'release', 'snapshot', 'alias'`)
	- Returns:
	- dict: None if the snapshot does not exist;
	- a dict with three keys otherwise:
	- * id: identifier of the snapshot
	- * branches: a dict of branches contained in the snapshot
	- whose keys are the branches' names.
	- * next_branch: the name of the first branch not returned
	- or :const:`None` if the snapshot has less than
	- `branches_count` branches after `branches_from` included.
	- """
	if snapshot_id == EMPTY_SNAPSHOT_ID:
	return {
	'id': snapshot_id,
	@@ -1406,36 +809,15 @@

	return None

	- @remote_api_endpoint('snapshot/get_random')
	@timed
	@db_transaction()
	def snapshot_get_random(self, db=None, cur=None):
	- """Finds a random snapshot id.
	-
	- Returns:
	- a sha1_git
	- """
	return db.snapshot_get_random(cur)

	- @remote_api_endpoint('origin/visit/add')
	@timed
	@db_transaction()
	def origin_visit_add(self, origin, date, type,
	db=None, cur=None):
	- """Add an origin_visit for the origin at ts with status 'ongoing'.
	-
	- Args:
	- origin (str): visited origin's identifier or URL
	- date (Union[str,datetime]): timestamp of such visit
	- type (str): the type of loader used for the visit (hg, git, ...)
	-
	- Returns:
	- dict: dictionary with keys origin and visit where:
	-
	- - origin: origin identifier
	- - visit: the visit identifier for the new visit occurrence
	-
	- """
	origin_url = origin

	if isinstance(date, str):
	@@ -1458,26 +840,11 @@
	'visit': visit_id,
	}

	- @remote_api_endpoint('origin/visit/update')
	@timed
	@db_transaction()
	def origin_visit_update(self, origin, visit_id, status=None,
	metadata=None, snapshot=None,
	db=None, cur=None):
	- """Update an origin_visit's status.
	-
	- Args:
	- origin (str): visited origin's URL
	- visit_id: Visit's id
	- status: Visit's new status
	- metadata: Data associated to the visit
	- snapshot (sha1_git): identifier of the snapshot to add to
	- the visit
	-
	- Returns:
	- None
	-
	- """
	if not isinstance(origin, str):
	raise TypeError('origin must be a string, not %r' % (origin,))
	origin_url = origin
	@@ -1503,25 +870,9 @@

	db.origin_visit_update(origin_url, visit_id, updates, cur)

	- @remote_api_endpoint('origin/visit/upsert')
	@timed
	@db_transaction()
	def origin_visit_upsert(self, visits, db=None, cur=None):
	- """Add a origin_visits with a specific id and with all its data.
	- If there is already an origin_visit with the same
	- `(origin_id, visit_id)`, overwrites it.
	-
	- Args:
	- visits: iterable of dicts with keys:
	-
	- - origin: dict with keys either `id` or `url`
	- - visit: origin visit id
	- - date: timestamp of such visit
	- - status: Visit's new status
	- - metadata: Data associated to the visit
	- - snapshot: identifier of the snapshot to add to
	- the visit
	- """
	visits = copy.deepcopy(visits)
	for visit in visits:
	if isinstance(visit['date'], str):
	@@ -1538,142 +889,55 @@
	# TODO: upsert them all in a single query
	db.origin_visit_upsert(**visit, cur=cur)

	- @remote_api_endpoint('origin/visit/get')
	@timed
	@db_transaction_generator(statement_timeout=500)
	def origin_visit_get(self, origin, last_visit=None, limit=None, db=None,
	cur=None):
	- """Retrieve all the origin's visit's information.
	-
	- Args:
	- origin (str): The visited origin
	- last_visit: Starting point from which listing the next visits
	- Default to None
	- limit (int): Number of results to return from the last visit.
	- Default to None
	-
	- Yields:
	- List of visits.
	-
	- """
	for line in db.origin_visit_get_all(
	origin, last_visit=last_visit, limit=limit, cur=cur):
	data = dict(zip(db.origin_visit_get_cols, line))
	yield data

	- @remote_api_endpoint('origin/visit/find_by_date')
	@timed
	@db_transaction(statement_timeout=500)
	def origin_visit_find_by_date(self, origin, visit_date, db=None, cur=None):
	- """Retrieves the origin visit whose date is closest to the provided
	- timestamp.
	- In case of a tie, the visit with largest id is selected.
	-
	- Args:
	- origin (str): The occurrence's origin (URL).
	- target (datetime): target timestamp
	-
	- Returns:
	- A visit.
	-
	- """
	line = db.origin_visit_find_by_date(origin, visit_date, cur=cur)
	if line:
	return dict(zip(db.origin_visit_get_cols, line))

	- @remote_api_endpoint('origin/visit/getby')
	@timed
	@db_transaction(statement_timeout=500)
	def origin_visit_get_by(self, origin, visit, db=None, cur=None):
	- """Retrieve origin visit's information.
	-
	- Args:
	- origin: The occurrence's origin (identifier).
	-
	- Returns:
	- The information on that particular (origin, visit) or None if
	- it does not exist
	-
	- """
	ori_visit = db.origin_visit_get(origin, visit, cur)
	if not ori_visit:
	return None

	return dict(zip(db.origin_visit_get_cols, ori_visit))

	- @remote_api_endpoint('origin/visit/get_latest')
	@timed
	@db_transaction(statement_timeout=4000)
	def origin_visit_get_latest(
	self, origin, allowed_statuses=None, require_snapshot=False,
	db=None, cur=None):
	- """Get the latest origin visit for the given origin, optionally
	- looking only for those with one of the given allowed_statuses
	- or for those with a known snapshot.
	-
	- Args:
	- origin (str): the origin's URL
	- allowed_statuses (list of str): list of visit statuses considered
	- to find the latest visit. For instance,
	- ``allowed_statuses=['full']`` will only consider visits that
	- have successfully run to completion.
	- require_snapshot (bool): If True, only a visit with a snapshot
	- will be returned.
	-
	- Returns:
	- dict: a dict with the following keys:
	-
	- - origin: the URL of the origin
	- - visit: origin visit id
	- - type: type of loader used for the visit
	- - date: timestamp of such visit
	- - status: Visit's new status
	- - metadata: Data associated to the visit
	- - snapshot (Optional[sha1_git]): identifier of the snapshot
	- associated to the visit
	- """
	origin_visit = db.origin_visit_get_latest(
	origin, allowed_statuses=allowed_statuses,
	require_snapshot=require_snapshot, cur=cur)
	if origin_visit:
	return dict(zip(db.origin_visit_get_cols, origin_visit))

	- @remote_api_endpoint('origin/visit/get_random')
	@timed
	@db_transaction()
	def origin_visit_get_random(
	self, type: str, db=None, cur=None) -> Optional[Dict[str, Any]]:
	- """Randomly select one successful origin visit with <type>
	- made in the last 3 months.
	-
	- Returns:
	- dict representing an origin visit, in the same format as
	- :py:meth:`origin_visit_get`.
	-
	- """
	result = db.origin_visit_get_random(type, cur)
	if result:
	return dict(zip(db.origin_visit_get_cols, result))
	else:
	return None

	- @remote_api_endpoint('object/find_by_sha1_git')
	@timed
	@db_transaction(statement_timeout=2000)
	def object_find_by_sha1_git(self, ids, db=None, cur=None):
	- """Return the objects found with the given ids.
	-
	- Args:
	- ids: a generator of sha1_gits
	-
	- Returns:
	- dict: a mapping from id to the list of objects found. Each object
	- found is itself a dict with keys:
	-
	- - sha1_git: the input id
	- - type: the type of object found
	-
	- """
	ret = {id: [] for id in ids}

	for retval in db.object_find_by_sha1_git(ids, cur=cur):
	@@ -1683,33 +947,9 @@

	return ret

	- @remote_api_endpoint('origin/get')
	@timed
	@db_transaction(statement_timeout=500)
	def origin_get(self, origins, db=None, cur=None):
	- """Return origins, either all identified by their ids or all
	- identified by tuples (type, url).
	-
	- If the url is given and the type is omitted, one of the origins with
	- that url is returned.
	-
	- Args:
	- origin: a list of dictionaries representing the individual
	- origins to find.
	- These dicts have the key url:
	-
	- - url (bytes): the url the origin points to
	-
	- Returns:
	- dict: the origin dictionary with the keys:
	-
	- - id: origin's id
	- - url: origin's url
	-
	- Raises:
	- ValueError: if the url or the id don't exist.
	-
	- """
	if isinstance(origins, dict):
	# Old API
	return_single = True
	@@ -1733,67 +973,26 @@
	else:
	return [None if res['url'] is None else res for res in results]

	- @remote_api_endpoint('origin/get_sha1')
	@timed
	@db_transaction_generator(statement_timeout=500)
	def origin_get_by_sha1(self, sha1s, db=None, cur=None):
	- """Return origins, identified by the sha1 of their URLs.
	-
	- Args:
	- sha1s (list[bytes]): a list of sha1s
	-
	- Yields:
	- dicts containing origin information as returned
	- by :meth:`swh.storage.storage.Storage.origin_get`, or None if an
	- origin matching the sha1 is not found.
	-
	- """
	for line in db.origin_get_by_sha1(sha1s, cur):
	if line[0] is not None:
	yield dict(zip(db.origin_cols, line))
	else:
	yield None

	- @remote_api_endpoint('origin/get_range')
	@timed
	@db_transaction_generator()
	def origin_get_range(self, origin_from=1, origin_count=100,
	db=None, cur=None):
	- """Retrieve ``origin_count`` origins whose ids are greater
	- or equal than ``origin_from``.
	-
	- Origins are sorted by id before retrieving them.
	-
	- Args:
	- origin_from (int): the minimum id of origins to retrieve
	- origin_count (int): the maximum number of origins to retrieve
	-
	- Yields:
	- dicts containing origin information as returned
	- by :meth:`swh.storage.storage.Storage.origin_get`.
	- """
	for origin in db.origin_get_range(origin_from, origin_count, cur):
	yield dict(zip(db.origin_get_range_cols, origin))

	- @remote_api_endpoint('origin/list')
	@timed
	@db_transaction()
	def origin_list(self, page_token: Optional[str] = None, limit: int = 100,
	*, db=None, cur=None) -> dict:
	- """Returns the list of origins
	-
	- Args:
	- page_token: opaque token used for pagination.
	- limit: the maximum number of results to return
	-
	- Returns:
	- dict: dict with the following keys:
	- - next_page_token (str, optional): opaque token to be used as
	- `page_token` for retrieving the next page. if absent, there is
	- no more pages to gather.
	- - origins (List[dict]): list of origins, as returned by
	- `origin_get`.
	- """
	page_token = page_token or '0'
	if not isinstance(page_token, str):
	raise TypeError('page_token must be a string.')
	@@ -1814,70 +1013,23 @@

	return result

	- @remote_api_endpoint('origin/search')
	@timed
	@db_transaction_generator()
	def origin_search(self, url_pattern, offset=0, limit=50,
	regexp=False, with_visit=False, db=None, cur=None):
	- """Search for origins whose urls contain a provided string pattern
	- or match a provided regular expression.
	- The search is performed in a case insensitive way.
	-
	- Args:
	- url_pattern (str): the string pattern to search for in origin urls
	- offset (int): number of found origins to skip before returning
	- results
	- limit (int): the maximum number of found origins to return
	- regexp (bool): if True, consider the provided pattern as a regular
	- expression and return origins whose urls match it
	- with_visit (bool): if True, filter out origins with no visit
	-
	- Yields:
	- dicts containing origin information as returned
	- by :meth:`swh.storage.storage.Storage.origin_get`.
	- """
	for origin in db.origin_search(url_pattern, offset, limit,
	regexp, with_visit, cur):
	yield dict(zip(db.origin_cols, origin))

	- @remote_api_endpoint('origin/count')
	@timed
	@db_transaction()
	def origin_count(self, url_pattern, regexp=False,
	with_visit=False, db=None, cur=None):
	- """Count origins whose urls contain a provided string pattern
	- or match a provided regular expression.
	- The pattern search in origin urls is performed in a case insensitive
	- way.
	-
	- Args:
	- url_pattern (str): the string pattern to search for in origin urls
	- regexp (bool): if True, consider the provided pattern as a regular
	- expression and return origins whose urls match it
	- with_visit (bool): if True, filter out origins with no visit
	-
	- Returns:
	- int: The number of origins matching the search criterion.
	- """
	return db.origin_count(url_pattern, regexp, with_visit, cur)

	- @remote_api_endpoint('origin/add_multi')
	@timed
	@db_transaction()
	def origin_add(self, origins, db=None, cur=None):
	- """Add origins to the storage
	-
	- Args:
	- origins: list of dictionaries representing the individual origins,
	- with the following keys:
	-
	- - type: the origin type ('git', 'svn', 'deb', ...)
	- - url (bytes): the url the origin points to
	-
	- Returns:
	- list: given origins as dict updated with their id
	-
	- """
	origins = copy.deepcopy(list(origins))
	for origin in origins:
	self.origin_add_one(origin, db=db, cur=cur)
	@@ -1885,24 +1037,9 @@
	send_metric('origin:add', count=len(origins), method_name='origin_add')
	return origins

	- @remote_api_endpoint('origin/add')
	@timed
	@db_transaction()
	def origin_add_one(self, origin, db=None, cur=None):
	- """Add origin to the storage
	-
	- Args:
	- origin: dictionary representing the individual origin to add. This
	- dict has the following keys:
	-
	- - type (FIXME: enum TBD): the origin type ('git', 'wget', ...)
	- - url (bytes): the url the origin points to
	-
	- Returns:
	- the id of the added origin, or of the identical one that already
	- exists.
	-
	- """
	origin_row = list(db.origin_get_by_url([origin['url']], cur))[0]
	origin_url = dict(zip(db.origin_cols, origin_row))['url']
	if origin_url:
	@@ -1917,18 +1054,10 @@

	@db_transaction(statement_timeout=500)
	def stat_counters(self, db=None, cur=None):
	- """compute statistics about the number of tuples in various tables
	-
	- Returns:
	- dict: a dictionary mapping textual labels (e.g., content) to
	- integer values (e.g., the number of tuples in table content)
	-
	- """
	return {k: v for (k, v) in db.stat_counters()}

	@db_transaction()
	def refresh_stat_counters(self, db=None, cur=None):
	- """Recomputes the statistics for `stat_counters`."""
	keys = [
	'content',
	'directory',
	@@ -1947,21 +1076,10 @@
	for key in keys:
	cur.execute('select * from swh_update_counter(%s)', (key,))

	- @remote_api_endpoint('origin/metadata/add')
	@timed
	@db_transaction()
	def origin_metadata_add(self, origin_url, ts, provider, tool, metadata,
	db=None, cur=None):
	- """ Add an origin_metadata for the origin at ts with provenance and
	- metadata.
	-
	- Args:
	- origin_url (str): the origin url for which the metadata is added
	- ts (datetime): timestamp of the found metadata
	- provider (int): the provider of metadata (ex:'hal')
	- tool (int): tool used to extract metadata
	- metadata (jsonb): the metadata retrieved at the time and location
	- """
	if isinstance(ts, str):
	ts = dateutil.parser.parse(ts)

	@@ -1970,54 +1088,16 @@
	send_metric(
	'origin_metadata:add', count=1, method_name='origin_metadata_add')

	- @remote_api_endpoint('origin/metadata/get')
	@timed
	@db_transaction_generator(statement_timeout=500)
	def origin_metadata_get_by(self, origin_url, provider_type=None, db=None,
	cur=None):
	- """Retrieve list of all origin_metadata entries for the origin_id
	-
	- Args:
	- origin_url (str): the origin's URL
	- provider_type (str): (optional) type of provider
	-
	- Returns:
	- list of dicts: the origin_metadata dictionary with the keys:
	-
	- - origin_id (int): origin's id
	- - discovery_date (datetime): timestamp of discovery
	- - tool_id (int): metadata's extracting tool
	- - metadata (jsonb)
	- - provider_id (int): metadata's provider
	- - provider_name (str)
	- - provider_type (str)
	- - provider_url (str)
	-
	- """
	for line in db.origin_metadata_get_by(origin_url, provider_type, cur):
	yield dict(zip(db.origin_metadata_get_cols, line))

	- @remote_api_endpoint('tool/add')
	@timed
	@db_transaction()
	def tool_add(self, tools, db=None, cur=None):
	- """Add new tools to the storage.
	-
	- Args:
	- tools (iterable of :class:`dict`): Tool information to add to
	- storage. Each tool is a :class:`dict` with the following keys:
	-
	- - name (:class:`str`): name of the tool
	- - version (:class:`str`): version of the tool
	- - configuration (:class:`dict`): configuration of the tool,
	- must be json-encodable
	-
	- Returns:
	- :class:`dict`: All the tools inserted in storage
	- (including the internal ``id``). The order of the list is not
	- guaranteed to match the order of the initial list.
	-
	- """
	db.mktemp_tool(cur)
	db.copy_to(tools, 'tmp_tool',
	['name', 'version', 'configuration'],
	@@ -2028,21 +1108,9 @@
	send_metric('tool:add', count=len(results), method_name='tool_add')
	return results

	- @remote_api_endpoint('tool/data')
	@timed
	@db_transaction(statement_timeout=500)
	def tool_get(self, tool, db=None, cur=None):
	- """Retrieve tool information.
	-
	- Args:
	- tool (dict): Tool information we want to retrieve from storage.
	- The dicts have the same keys as those used in :func:`tool_add`.
	-
	- Returns:
	- dict: The full tool information if it exists (``id`` included),
	- None otherwise.
	-
	- """
	tool_conf = tool['configuration']
	if isinstance(tool_conf, dict):
	tool_conf = json.dumps(tool_conf)
	@@ -2054,118 +1122,41 @@
	return None
	return dict(zip(db.tool_cols, idx))

	- @remote_api_endpoint('provider/add')
	@timed
	@db_transaction()
	def metadata_provider_add(self, provider_name, provider_type, provider_url,
	metadata, db=None, cur=None):
	- """Add a metadata provider.
	-
	- Args:
	- provider_name (str): Its name
	- provider_type (str): Its type (eg. `'deposit-client'`)
	- provider_url (str): Its URL
	- metadata: JSON-encodable object
	-
	- Returns:
	- int: an identifier of the provider
	- """
	result = db.metadata_provider_add(provider_name, provider_type,
	provider_url, metadata, cur)
	send_metric(
	'metadata_provider:add', count=1, method_name='metadata_provider')
	return result

	- @remote_api_endpoint('provider/get')
	@timed
	@db_transaction()
	def metadata_provider_get(self, provider_id, db=None, cur=None):
	- """Get a metadata provider
	-
	- Args:
	- provider_id: Its identifier, as given by `metadata_provider_add`.
	-
	- Returns:
	- dict: same as `metadata_provider_add`;
	- or None if it does not exist.
	- """
	result = db.metadata_provider_get(provider_id)
	if not result:
	return None
	return dict(zip(db.metadata_provider_cols, result))

	- @remote_api_endpoint('provider/getby')
	@timed
	@db_transaction()
	def metadata_provider_get_by(self, provider, db=None, cur=None):
	- """Get a metadata provider
	-
	- Args:
	- provider (dict): A dictionary with keys:
	- * provider_name: Its name
	- * provider_url: Its URL
	-
	- Returns:
	- dict: same as `metadata_provider_add`;
	- or None if it does not exist.
	- """
	result = db.metadata_provider_get_by(provider['provider_name'],
	provider['provider_url'])
	if not result:
	return None
	return dict(zip(db.metadata_provider_cols, result))

	- @remote_api_endpoint('algos/diff_directories')
	@timed
	def diff_directories(self, from_dir, to_dir, track_renaming=False):
	- """Compute the list of file changes introduced between two arbitrary
	- directories (insertion / deletion / modification / renaming of files).
	-
	- Args:
	- from_dir (bytes): identifier of the directory to compare from
	- to_dir (bytes): identifier of the directory to compare to
	- track_renaming (bool): whether or not to track files renaming
	-
	- Returns:
	- A list of dict describing the introduced file changes
	- (see :func:`swh.storage.algos.diff.diff_directories`
	- for more details).
	- """
	return diff.diff_directories(self, from_dir, to_dir, track_renaming)

	- @remote_api_endpoint('algos/diff_revisions')
	@timed
	def diff_revisions(self, from_rev, to_rev, track_renaming=False):
	- """Compute the list of file changes introduced between two arbitrary
	- revisions (insertion / deletion / modification / renaming of files).
	-
	- Args:
	- from_rev (bytes): identifier of the revision to compare from
	- to_rev (bytes): identifier of the revision to compare to
	- track_renaming (bool): whether or not to track files renaming
	-
	- Returns:
	- A list of dict describing the introduced file changes
	- (see :func:`swh.storage.algos.diff.diff_directories`
	- for more details).
	- """
	return diff.diff_revisions(self, from_rev, to_rev, track_renaming)

	- @remote_api_endpoint('algos/diff_revision')
	@timed
	def diff_revision(self, revision, track_renaming=False):
	- """Compute the list of file changes introduced by a specific revision
	- (insertion / deletion / modification / renaming of files) by comparing
	- it against its first parent.
	-
	- Args:
	- revision (bytes): identifier of the revision from which to
	- compute the list of files changes
	- track_renaming (bool): whether or not to track files renaming
	-
	- Returns:
	- A list of dict describing the introduced file changes
	- (see :func:`swh.storage.algos.diff.diff_directories`
	- for more details).
	- """
	return diff.diff_revision(self, revision, track_renaming)
	diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
	--- a/swh/storage/tests/test_storage.py
	+++ b/swh/storage/tests/test_storage.py
	@@ -6,6 +6,7 @@
	import copy
	from contextlib import contextmanager
	import datetime
	+import inspect
	import itertools
	import math
	import queue
	@@ -28,6 +29,7 @@
	from swh.model.hypothesis_strategies import objects
	from swh.storage import HashCollision
	from swh.storage.converters import origin_url_to_sha1 as sha1
	+from swh.storage.interface import StorageInterface

	from .storage_data import data

	@@ -95,6 +97,34 @@
	"""
	maxDiff = None # type: ClassVar[Optional[int]]

	+ def test_types(self, swh_storage):
	+ """Checks all methods of StorageInterface are implemented by this
	+ backend, and that they have the same signature."""
	+ # Create an instance of the protocol (which cannot be instantiated
	+ # directly, so this creates a subclass, then instantiates it)
	+ interface = type('_', (StorageInterface,), {})()
	+
	+ assert 'content_add' in dir(interface)
	+
	+ missing_methods = []
	+
	+ for meth_name in dir(interface):
	+ if meth_name.startswith('_'):
	+ continue
	+ interface_meth = getattr(interface, meth_name)
	+ try:
	+ concrete_meth = getattr(swh_storage, meth_name)
	+ except AttributeError:
	+ missing_methods.append(meth_name)
	+ continue
	+
	+ expected_signature = inspect.signature(interface_meth)
	+ actual_signature = inspect.signature(concrete_meth)
	+
	+ assert expected_signature == actual_signature, meth_name
	+
	+ assert missing_methods == []
	+
	def test_check_config(self, swh_storage):
	assert swh_storage.check_config(check_write=True)
	assert swh_storage.check_config(check_write=False)

File Metadata

Mime Type: text/plain
Expires: Mar 17 2025, 7:19 PM (7 w, 3 d ago)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3215723

D2587.id.diffNo OneTemporaryActions

D2587.id.diffView Options

File Metadata

Event Timeline

D2587.id.diff
No OneTemporary
Actions

D2587.id.diff
View Options