Changeset View
Changeset View
Standalone View
Standalone View
swh/web/api/views/metadata.py
Show All 38 Lines | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target) | ||||
``<type> <IRI>``. Required. | ``<type> <IRI>``. Required. | ||||
:query string after: ISO8601 representation of the minimum timestamp of metadata | :query string after: ISO8601 representation of the minimum timestamp of metadata | ||||
to fetch. Defaults to allowing all metadata. | to fetch. Defaults to allowing all metadata. | ||||
:query int limit: Maximum number of metadata objects to return. | :query int limit: Maximum number of metadata objects to return. | ||||
{common_headers} | {common_headers} | ||||
:>jsonarr string target: SWHID of the object described by this metadata | :>jsonarr string target: SWHID of the object described by this metadata | ||||
(absent when ``target`` is not a core SWHID (ie. it does not have type | |||||
``cnt``/``dir``/``rev``/``rel``/``snp``) | |||||
:>jsonarr string discovery_date: ISO8601/RFC3339 timestamp of the moment this | :>jsonarr string discovery_date: ISO8601/RFC3339 timestamp of the moment this | ||||
metadata was collected. | metadata was collected. | ||||
:>jsonarr object authority: authority this metadata is coming from | :>jsonarr object authority: authority this metadata is coming from | ||||
:>jsonarr object fetcher: tool used to fetch the metadata | :>jsonarr object fetcher: tool used to fetch the metadata | ||||
:>jsonarr string format: short identifier of the format of the metadata | :>jsonarr string format: short identifier of the format of the metadata | ||||
:>jsonarr string metadata_url: link to download the metadata "blob" itself | :>jsonarr string metadata_url: link to download the metadata "blob" itself | ||||
:>jsonarr string origin: URL of the origin in which context's | :>jsonarr string origin: URL of the origin in which context's | ||||
the metadata is valid, if any | the metadata is valid, if any | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | def api_raw_extrinsic_metadata_swhid(request: Request, target: str): | ||||
try: | try: | ||||
limit = int(limit_str) | limit = int(limit_str) | ||||
except ValueError: | except ValueError: | ||||
raise BadInputExc("'limit' parameter must be an integer.") from None | raise BadInputExc("'limit' parameter must be an integer.") from None | ||||
limit = min(limit, 10000) | limit = min(limit, 10000) | ||||
try: | try: | ||||
parsed_target = swhids.CoreSWHID.from_string(target).to_extended() | parsed_target = swhids.ExtendedSWHID.from_string(target) | ||||
except swhids.ValidationError as e: | except swhids.ValidationError as e: | ||||
raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | raise BadInputExc(f"Invalid target SWHID: {e}") from None | ||||
try: | |||||
swhids.CoreSWHID.from_string(target) | |||||
except swhids.ValidationError: | |||||
# Can be parsed as an extended SWHID, but not as a core SWHID | |||||
extended_swhid = True | |||||
else: | |||||
extended_swhid = False | |||||
if page_token_str is not None: | if page_token_str is not None: | ||||
page_token = base64.urlsafe_b64decode(page_token_str) | page_token = base64.urlsafe_b64decode(page_token_str) | ||||
else: | else: | ||||
page_token = None | page_token = None | ||||
result_page = archive.storage.raw_extrinsic_metadata_get( | result_page = archive.storage.raw_extrinsic_metadata_get( | ||||
target=parsed_target, | target=parsed_target, | ||||
authority=authority, | authority=authority, | ||||
after=after, | after=after, | ||||
page_token=page_token, | page_token=page_token, | ||||
limit=limit, | limit=limit, | ||||
) | ) | ||||
filename = None | |||||
if parsed_target.object_type == swhids.ExtendedObjectType.ORIGIN: | |||||
origin_sha1 = hashutil.hash_to_hex(parsed_target.object_id) | |||||
anlambert: Why a tuple here ? | |||||
Done Inline ActionsWhy not? It is guaranteed to have a single result vlorentz: Why not? It is guaranteed to have a single result | |||||
Not Done Inline ActionsOh I see, this is for extracting the first element of the list, I am not used to this syntax so my confusion here. anlambert: Oh I see, this is for extracting the first element of the list, I am not used to this syntax so… | |||||
Done Inline Actionsonly element of the list (to extract the first, it would be (first, *rest) = ...) vlorentz: only element of the list (to extract the first, it would be `(first, *rest) = ...`) | |||||
(origin_info,) = list(archive.lookup_origins_by_sha1s([origin_sha1])) | |||||
if origin_info is not None: | |||||
filename = re.sub("[:/_.]+", "_", origin_info["url"]) + "_metadata" | |||||
if filename is None: | |||||
filename = f"{target}_metadata" | |||||
results = [] | results = [] | ||||
for metadata in result_page.results: | for metadata in result_page.results: | ||||
result = converters.from_raw_extrinsic_metadata(metadata) | result = converters.from_raw_extrinsic_metadata(metadata) | ||||
if extended_swhid: | |||||
# Keep extended SWHIDs away from the public API as much as possible. | |||||
Not Done Inline ActionsHATEOAS ? anlambert: HATEOAS ? | |||||
Done Inline Actionsby following the URL from the other API endpoint, instead of computing the URL directly vlorentz: by following the URL from the other API endpoint, instead of computing the URL directly | |||||
# (It is part of the URL, but not documented, and only accessed via | |||||
# the link in the response of api-1-origin) | |||||
del result["target"] | |||||
# We can't reliably send metadata directly, because it is a bytestring, | # We can't reliably send metadata directly, because it is a bytestring, | ||||
# and we have to return JSON documents. | # and we have to return JSON documents. | ||||
result["metadata_url"] = reverse( | result["metadata_url"] = reverse( | ||||
"api-1-raw-extrinsic-metadata-get", | "api-1-raw-extrinsic-metadata-get", | ||||
url_args={"id": hashutil.hash_to_hex(metadata.id)}, | url_args={"id": hashutil.hash_to_hex(metadata.id)}, | ||||
query_params={"filename": f"{target}_metadata"}, | query_params={"filename": filename}, | ||||
request=request, | request=request, | ||||
) | ) | ||||
results.append(result) | results.append(result) | ||||
headers: Dict[str, str] = {} | headers: Dict[str, str] = {} | ||||
if result_page.next_page_token is not None: | if result_page.next_page_token is not None: | ||||
headers["link-next"] = reverse( | headers["link-next"] = reverse( | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | def api_raw_extrinsic_metadata_swhid_authorities(request: Request, target: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)/authorities/ | .. http:get:: /api/1/raw-extrinsic-metadata/swhid/(target)/authorities/ | ||||
Returns a list of metadata authorities that provided metadata on | Returns a list of metadata authorities that provided metadata on | ||||
the given target. | the given target. | ||||
They can then be used to get the raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on | They can then be used to get the raw `extrinsic metadata <https://docs.softwareheritage.org/devel/glossary.html#term-extrinsic-metadata>`__ collected on | ||||
that object from each of the authorities. | that object from each of the authorities. | ||||
This endpoint should only be used directly to retrieve metadata from | |||||
core SWHIDs (with type ``cnt``, ``dir``, ``rev``, ``rel``, and ``snp``). | |||||
For "extended" SWHIDs such as origins, the URL in the | |||||
``origin_metadata_authorities_url`` field of | |||||
:http:get:`/api/1/origin/(origin_url)/get/` should be used instead of building | |||||
this URL directly. | |||||
:param string target: The core SWHID of the object whose metadata-providing | :param string target: The core SWHID of the object whose metadata-providing | ||||
authorities should be returned | authorities should be returned | ||||
{common_headers} | {common_headers} | ||||
:>jsonarr string type: Type of authority (deposit_client, forge, registry) | :>jsonarr string type: Type of authority (deposit_client, forge, registry) | ||||
:>jsonarr string url: Unique IRI identifying the authority | :>jsonarr string url: Unique IRI identifying the authority | ||||
:>jsonarr object metadata_list_url: URL to get the list of metadata objects | :>jsonarr object metadata_list_url: URL to get the list of metadata objects | ||||
on the given object from this authority | on the given object from this authority | ||||
:statuscode 200: no error | :statuscode 200: no error | ||||
**Example:** | **Example:** | ||||
.. parsed-literal:: | .. parsed-literal:: | ||||
:swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/authorities/` | :swh_web_api:`raw-extrinsic-metadata/swhid/swh:1:dir:a2faa28028657859c16ff506924212b33f0e1307/authorities/` | ||||
""" # noqa | """ # noqa | ||||
try: | try: | ||||
parsed_target = swhids.CoreSWHID.from_string(target).to_extended() | parsed_target = swhids.ExtendedSWHID.from_string(target) | ||||
except swhids.ValidationError as e: | except swhids.ValidationError as e: | ||||
raise BadInputExc(f"Invalid target SWHID: {e.args[0]}") from None | raise BadInputExc(f"Invalid target SWHID: {e}") from None | ||||
authorities = archive.storage.raw_extrinsic_metadata_get_authorities( | authorities = archive.storage.raw_extrinsic_metadata_get_authorities( | ||||
target=parsed_target | target=parsed_target | ||||
) | ) | ||||
results = [ | results = [ | ||||
{ | { | ||||
**authority.to_dict(), | **authority.to_dict(), | ||||
"metadata_list_url": reverse( | "metadata_list_url": reverse( | ||||
Show All 13 Lines |
Why a tuple here ?