Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/interface.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
from typing import Any, Dict, Iterable, List, Optional, Union | from typing import Any, Dict, Iterable, List, Optional, Union | ||||
from swh.core.api import remote_api_endpoint | from swh.core.api import remote_api_endpoint | ||||
from swh.model.identifiers import SWHID | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Revision, | Revision, | ||||
Release, | Release, | ||||
Snapshot, | Snapshot, | ||||
SkippedContent, | SkippedContent, | ||||
MetadataAuthority, | |||||
MetadataAuthorityType, | |||||
MetadataFetcher, | |||||
MetadataTargetType, | |||||
RawExtrinsicMetadata, | |||||
) | ) | ||||
def deprecated(f): | def deprecated(f): | ||||
f.deprecated_endpoint = True | f.deprecated_endpoint = True | ||||
return f | return f | ||||
▲ Show 20 Lines • Show All 1,073 Lines • ▼ Show 20 Lines | def stat_counters(self): | ||||
""" | """ | ||||
... | ... | ||||
def refresh_stat_counters(self): | def refresh_stat_counters(self): | ||||
"""Recomputes the statistics for `stat_counters`.""" | """Recomputes the statistics for `stat_counters`.""" | ||||
... | ... | ||||
@remote_api_endpoint("content/metadata/add") | @remote_api_endpoint("object_metadata/add") | ||||
def content_metadata_add( | def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: | ||||
self, | """Add extrinsic metadata on objects (contents, directories, ...). | ||||
id: str, | |||||
context: Dict[str, Union[str, bytes, int]], | |||||
discovery_date: datetime.datetime, | |||||
authority: Dict[str, Any], | |||||
fetcher: Dict[str, Any], | |||||
format: str, | |||||
metadata: bytes, | |||||
) -> None: | |||||
"""Add a content_metadata for the content at discovery_date, | |||||
obtained using the `fetcher` from the `authority`. | |||||
The authority and fetcher must be known to the storage before | |||||
using this endpoint. | |||||
If there is already content metadata for the same content, authority, | |||||
fetcher, and at the same date; the new one will be either dropped or | |||||
will replace the existing one | |||||
(it is unspecified which one of these two behaviors happens). | |||||
Args: | |||||
discovery_date: when the metadata was fetched. | |||||
authority: a dict containing keys `type` and `url`. | |||||
fetcher: a dict containing keys `name` and `version`. | |||||
format: text field indicating the format of the content of the | |||||
metadata: blob of raw metadata | |||||
""" | |||||
... | |||||
@remote_api_endpoint("content/metadata/get") | |||||
def content_metadata_get( | |||||
self, | |||||
id: str, | |||||
authority: Dict[str, str], | |||||
after: Optional[datetime.datetime] = None, | |||||
page_token: Optional[bytes] = None, | |||||
limit: int = 1000, | |||||
) -> Dict[str, Any]: | |||||
"""Retrieve list of all content_metadata entries for the id | |||||
Args: | |||||
id: the content's SWHID | |||||
authority: a dict containing keys `type` and `url`. | |||||
after: minimum discovery_date for a result to be returned | |||||
page_token: opaque token, used to get the next page of results | |||||
limit: maximum number of results to be returned | |||||
Returns: | |||||
dict with keys `next_page_token` and `results`. | |||||
`next_page_token` is an opaque token that is used to get the | |||||
next page of results, or `None` if there are no more results. | |||||
`results` is a list of dicts in the format: | |||||
.. code-block: python | |||||
{ | |||||
'authority': {'type': ..., 'url': ...}, | |||||
'fetcher': {'name': ..., 'version': ...}, | |||||
'discovery_date': ..., | |||||
'format': '...', | |||||
'metadata': b'...', | |||||
'context': { ... }, | |||||
} | |||||
""" | |||||
... | |||||
@remote_api_endpoint("origin/metadata/add") | |||||
def origin_metadata_add( | |||||
self, | |||||
origin_url: str, | |||||
discovery_date: datetime.datetime, | |||||
authority: Dict[str, Any], | |||||
fetcher: Dict[str, Any], | |||||
format: str, | |||||
metadata: bytes, | |||||
) -> None: | |||||
"""Add an origin_metadata for the origin at discovery_date, | |||||
obtained using the `fetcher` from the `authority`. | |||||
The authority and fetcher must be known to the storage before | The authority and fetcher must be known to the storage before | ||||
using this endpoint. | using this endpoint. | ||||
If there is already origin metadata for the same origin, authority, | If there is already metadata for the same object, authority, | ||||
ardumont: `content` might be a bit overloaded here, why not skip it.
Would
```
if there is already… | |||||
Done Inline Actionsyeah I forgot to replace it (copied from content_metadata_add, not origin_metadata_add like Phabricator believes) vlorentz: yeah I forgot to replace it (copied from content_metadata_add, not origin_metadata_add like… | |||||
fetcher, and at the same date; the new one will be either dropped or | fetcher, and at the same date; the new one will be either dropped or | ||||
will replace the existing one | will replace the existing one | ||||
(it is unspecified which one of these two behaviors happens). | (it is unspecified which one of these two behaviors happens). | ||||
Args: | Args: | ||||
discovery_date: when the metadata was fetched. | metadata: iterable of RawExtrinsicMetadata objects to be inserted. | ||||
authority: a dict containing keys `type` and `url`. | |||||
fetcher: a dict containing keys `name` and `version`. | |||||
format: text field indicating the format of the content of the | |||||
metadata: blob of raw metadata | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("origin/metadata/get") | @remote_api_endpoint("object_metadata/get") | ||||
def origin_metadata_get( | def object_metadata_get( | ||||
self, | self, | ||||
origin_url: str, | object_type: MetadataTargetType, | ||||
authority: Dict[str, str], | id: Union[str, SWHID], | ||||
authority: MetadataAuthority, | |||||
after: Optional[datetime.datetime] = None, | after: Optional[datetime.datetime] = None, | ||||
page_token: Optional[bytes] = None, | page_token: Optional[bytes] = None, | ||||
limit: int = 1000, | limit: int = 1000, | ||||
) -> Dict[str, Any]: | ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: | ||||
"""Retrieve list of all origin_metadata entries for the origin_url | """Retrieve list of all object_metadata entries for the id | ||||
Args: | Args: | ||||
origin_url: the origin's URL | object_type: one of the values of swh.model.model.MetadataTargetType | ||||
id: an URL if object_type is 'origin', else a core SWHID | |||||
authority: a dict containing keys `type` and `url`. | authority: a dict containing keys `type` and `url`. | ||||
after: minimum discovery_date for a result to be returned | after: minimum discovery_date for a result to be returned | ||||
page_token: opaque token, used to get the next page of results | page_token: opaque token, used to get the next page of results | ||||
limit: maximum number of results to be returned | limit: maximum number of results to be returned | ||||
Returns: | Returns: | ||||
dict with keys `next_page_token` and `results`. | dict with keys `next_page_token` and `results`. | ||||
`next_page_token` is an opaque token that is used to get the | `next_page_token` is an opaque token that is used to get the | ||||
next page of results, or `None` if there are no more results. | next page of results, or `None` if there are no more results. | ||||
`results` is a list of dicts in the format: | `results` is a list of RawExtrinsicMetadata objects: | ||||
Not Done Inline ActionsDrop the extra "." ;) ... objects: ardumont: Drop the extra "." ;)
```
... objects:
``` | |||||
.. code-block: python | |||||
{ | |||||
'authority': {'type': ..., 'url': ...}, | |||||
'fetcher': {'name': ..., 'version': ...}, | |||||
'discovery_date': ..., | |||||
'format': '...', | |||||
'metadata': b'...' | |||||
} | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("fetcher/add") | @remote_api_endpoint("metadata_fetcher/add") | ||||
def metadata_fetcher_add( | def metadata_fetcher_add(self, fetchers: Iterable[MetadataFetcher],) -> None: | ||||
self, name: str, version: str, metadata: Dict[str, Any] | """Add new metadata fetchers to the storage. | ||||
) -> None: | |||||
"""Add a new metadata fetcher to the storage. | |||||
`name` and `version` together are a unique identifier of this | Their `name` and `version` together are unique identifiers of this | ||||
fetcher; and `metadata` is an arbitrary dict of JSONable data | fetcher; and `metadata` is an arbitrary dict of JSONable data | ||||
with information about this fetcher. | with information about this fetcher, which must not be `None` | ||||
(but may be empty). | |||||
Args: | Args: | ||||
name: the name of the fetcher | fetchers: iterable of MetadataFetcher to be inserted | ||||
version: version of the fetcher | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("fetcher/get") | @remote_api_endpoint("metadata_fetcher/get") | ||||
def metadata_fetcher_get(self, name: str, version: str) -> Optional[Dict[str, Any]]: | def metadata_fetcher_get( | ||||
self, name: str, version: str | |||||
) -> Optional[MetadataFetcher]: | |||||
"""Retrieve information about a fetcher | """Retrieve information about a fetcher | ||||
Args: | Args: | ||||
name: the name of the fetcher | name: the name of the fetcher | ||||
version: version of the fetcher | version: version of the fetcher | ||||
Returns: | Returns: | ||||
dictionary with keys `name`, `version`, and `metadata`; or None | a MetadataFetcher object (with a non-None metadata field) if it is known, | ||||
if the fetcher is not known | else None. | ||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("authority/add") | @remote_api_endpoint("metadata_authority/add") | ||||
def metadata_authority_add( | def metadata_authority_add(self, authorities: Iterable[MetadataAuthority]) -> None: | ||||
self, type: str, url: str, metadata: Dict[str, Any] | """Add new metadata authorities to the storage. | ||||
) -> None: | |||||
Not Done Inline Actionss/of/for ? ardumont: s/of/for ? | |||||
Done Inline ActionsI prefer "of" vlorentz: I prefer "of" | |||||
"""Add a metadata authority | Their `type` and `url` together are unique identifiers of this | ||||
authority; and `metadata` is an arbitrary dict of JSONable data | |||||
Done Inline Actionswhich must exist (could be empty but not None though) What's empty though ;) ? ardumont: ```
which must exist (could be empty but not None though)
```
What's empty though ;) ? | |||||
Done Inline Actions{} vlorentz: `{}` | |||||
Done Inline Actionsack, thx. ardumont: ack, thx. | |||||
with information about this authority, which must not be `None` | |||||
(but may be empty). | |||||
Args: | Args: | ||||
type: one of "deposit", "forge", or "registry" | authorities: iterable of MetadataAuthority to be inserted | ||||
url: unique URI identifying the authority | |||||
metadata: JSON-encodable object | |||||
""" | """ | ||||
... | ... | ||||
@remote_api_endpoint("authority/get") | @remote_api_endpoint("metadata_authority/get") | ||||
def metadata_authority_get(self, type: str, url: str) -> Optional[Dict[str, Any]]: | def metadata_authority_get( | ||||
self, type: MetadataAuthorityType, url: str | |||||
) -> Optional[MetadataAuthority]: | |||||
"""Retrieve information about an authority | """Retrieve information about an authority | ||||
Args: | Args: | ||||
type: one of "deposit", "forge", or "registry" | type: one of "deposit", "forge", or "registry" | ||||
url: unique URI identifying the authority | url: unique URI identifying the authority | ||||
Returns: | Returns: | ||||
dictionary with keys `type`, `url`, and `metadata`; or None | a MetadataAuthority object (with a non-None metadata field) if it is known, | ||||
if the authority is not known | else None. | ||||
""" | """ | ||||
... | ... | ||||
@deprecated | @deprecated | ||||
@remote_api_endpoint("algos/diff_directories") | @remote_api_endpoint("algos/diff_directories") | ||||
def diff_directories(self, from_dir, to_dir, track_renaming=False): | def diff_directories(self, from_dir, to_dir, track_renaming=False): | ||||
"""Compute the list of file changes introduced between two arbitrary | """Compute the list of file changes introduced between two arbitrary | ||||
directories (insertion / deletion / modification / renaming of files). | directories (insertion / deletion / modification / renaming of files). | ||||
▲ Show 20 Lines • Show All 65 Lines • Show Last 20 Lines |
content might be a bit overloaded here, why not skip it.
Would
be correct enough?