Changeset View
Changeset View
Standalone View
Standalone View
swh/web/api/views/content.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import functools | import functools | ||||
from typing import Optional | |||||
from django.http import HttpResponse | from django.http import HttpResponse | ||||
from rest_framework.request import Request | |||||
from swh.web.api import utils | from swh.web.api import utils | ||||
from swh.web.api.apidoc import api_doc, format_docstring | from swh.web.api.apidoc import api_doc, format_docstring | ||||
from swh.web.api.apiurls import api_route | from swh.web.api.apiurls import api_route | ||||
from swh.web.api.views.utils import api_lookup | from swh.web.api.views.utils import api_lookup | ||||
from swh.web.common import archive | from swh.web.common import archive | ||||
from swh.web.common.exc import NotFoundExc | from swh.web.common.exc import NotFoundExc | ||||
from swh.web.common.utils import reverse | from swh.web.common.utils import reverse | ||||
@api_route( | @api_route( | ||||
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/filetype/", | r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/filetype/", | ||||
"api-1-content-filetype", | "api-1-content-filetype", | ||||
checksum_args=["q"], | checksum_args=["q"], | ||||
) | ) | ||||
@api_doc("/content/filetype/") | @api_doc("/content/filetype/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_content_filetype(request, q): | def api_content_filetype(request: Request, q: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ | .. http:get:: /api/1/content/[(hash_type):](hash)/filetype/ | ||||
Get information about the detected MIME type of a content object. | Get information about the detected MIME type of a content object. | ||||
:param string hash_type: optional parameter specifying which hashing algorithm | :param string hash_type: optional parameter specifying which hashing algorithm | ||||
has been used to compute the content checksum. It can be either ``sha1``, | has been used to compute the content checksum. It can be either ``sha1``, | ||||
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ||||
Show All 33 Lines | |||||
@api_route( | @api_route( | ||||
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/language/", | r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/language/", | ||||
"api-1-content-language", | "api-1-content-language", | ||||
checksum_args=["q"], | checksum_args=["q"], | ||||
) | ) | ||||
@api_doc("/content/language/") | @api_doc("/content/language/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_content_language(request, q): | def api_content_language(request: Request, q: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/[(hash_type):](hash)/language/ | .. http:get:: /api/1/content/[(hash_type):](hash)/language/ | ||||
Get information about the programming language used in a content object. | Get information about the programming language used in a content object. | ||||
Note: this endpoint currently returns no data. | Note: this endpoint currently returns no data. | ||||
:param string hash_type: optional parameter specifying which hashing algorithm | :param string hash_type: optional parameter specifying which hashing algorithm | ||||
Show All 34 Lines | |||||
@api_route( | @api_route( | ||||
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/license/", | r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/license/", | ||||
"api-1-content-license", | "api-1-content-license", | ||||
checksum_args=["q"], | checksum_args=["q"], | ||||
) | ) | ||||
@api_doc("/content/license/") | @api_doc("/content/license/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_content_license(request, q): | def api_content_license(request: Request, q: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/[(hash_type):](hash)/license/ | .. http:get:: /api/1/content/[(hash_type):](hash)/license/ | ||||
Get information about the license of a content object. | Get information about the license of a content object. | ||||
:param string hash_type: optional parameter specifying which hashing algorithm | :param string hash_type: optional parameter specifying which hashing algorithm | ||||
has been used to compute the content checksum. It can be either ``sha1``, | has been used to compute the content checksum. It can be either ``sha1``, | ||||
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ||||
Show All 26 Lines | return api_lookup( | ||||
notfound_msg="No license information found for content {}.".format(q), | notfound_msg="No license information found for content {}.".format(q), | ||||
enrich_fn=utils.enrich_metadata_endpoint, | enrich_fn=utils.enrich_metadata_endpoint, | ||||
request=request, | request=request, | ||||
) | ) | ||||
@api_route(r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/ctags/", "api-1-content-ctags") | @api_route(r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/ctags/", "api-1-content-ctags") | ||||
@api_doc("/content/ctags/", tags=["hidden"]) | @api_doc("/content/ctags/", tags=["hidden"]) | ||||
def api_content_ctags(request, q): | def api_content_ctags(request: Request, q: str): | ||||
""" | """ | ||||
Get information about all `Ctags <http://ctags.sourceforge.net/>`_-style | Get information about all `Ctags <http://ctags.sourceforge.net/>`_-style | ||||
symbols defined in a content object. | symbols defined in a content object. | ||||
""" | """ | ||||
return api_lookup( | return api_lookup( | ||||
archive.lookup_content_ctags, | archive.lookup_content_ctags, | ||||
q, | q, | ||||
notfound_msg="No ctags symbol found for content {}.".format(q), | notfound_msg="No ctags symbol found for content {}.".format(q), | ||||
enrich_fn=utils.enrich_metadata_endpoint, | enrich_fn=utils.enrich_metadata_endpoint, | ||||
request=request, | request=request, | ||||
) | ) | ||||
@api_route( | @api_route( | ||||
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/raw/", | r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/raw/", | ||||
"api-1-content-raw", | "api-1-content-raw", | ||||
checksum_args=["q"], | checksum_args=["q"], | ||||
) | ) | ||||
@api_doc("/content/raw/") | @api_doc("/content/raw/") | ||||
def api_content_raw(request, q): | def api_content_raw(request: Request, q: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/[(hash_type):](hash)/raw/ | .. http:get:: /api/1/content/[(hash_type):](hash)/raw/ | ||||
Get the raw content of a content object (aka a "blob"), as a byte sequence. | Get the raw content of a content object (aka a "blob"), as a byte sequence. | ||||
:param string hash_type: optional parameter specifying which hashing algorithm | :param string hash_type: optional parameter specifying which hashing algorithm | ||||
has been used to compute the content checksum. It can be either ``sha1``, | has been used to compute the content checksum. It can be either ``sha1``, | ||||
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not | ||||
Show All 31 Lines | response = HttpResponse( | ||||
generate(content_raw), content_type="application/octet-stream" | generate(content_raw), content_type="application/octet-stream" | ||||
) | ) | ||||
response["Content-disposition"] = "attachment; filename=%s" % filename | response["Content-disposition"] = "attachment; filename=%s" % filename | ||||
return response | return response | ||||
@api_route(r"/content/symbol/(?P<q>.+)/", "api-1-content-symbol") | @api_route(r"/content/symbol/(?P<q>.+)/", "api-1-content-symbol") | ||||
@api_doc("/content/symbol/", tags=["hidden"]) | @api_doc("/content/symbol/", tags=["hidden"]) | ||||
def api_content_symbol(request, q=None): | def api_content_symbol(request: Request, q: str): | ||||
"""Search content objects by `Ctags <http://ctags.sourceforge.net/>`_-style | """Search content objects by `Ctags <http://ctags.sourceforge.net/>`_-style | ||||
symbol (e.g., function name, data type, method, ...). | symbol (e.g., function name, data type, method, ...). | ||||
""" | """ | ||||
result = {} | result = {} | ||||
last_sha1 = request.query_params.get("last_sha1", None) | last_sha1 = request.query_params.get("last_sha1", None) | ||||
per_page = int(request.query_params.get("per_page", "10")) | per_page = int(request.query_params.get("per_page", "10")) | ||||
Show All 29 Lines | if symbols: | ||||
} | } | ||||
result.update({"results": symbols}) | result.update({"results": symbols}) | ||||
return result | return result | ||||
@api_route(r"/content/known/search/", "api-1-content-known", methods=["POST"]) | @api_route(r"/content/known/search/", "api-1-content-known", methods=["POST"]) | ||||
@api_route(r"/content/known/(?P<q>(?!search).*)/", "api-1-content-known") | @api_route(r"/content/known/(?P<q>(?!search).+)/", "api-1-content-known") | ||||
@api_doc("/content/known/", tags=["hidden"]) | @api_doc("/content/known/", tags=["hidden"]) | ||||
@format_docstring() | @format_docstring() | ||||
def api_check_content_known(request, q=None): | def api_check_content_known(request: Request, q: Optional[str] = None): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ | .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ | ||||
Check whether some content(s) (aka "blob(s)") is present in the archive | Check whether some content(s) (aka "blob(s)") is present in the archive | ||||
based on its **sha1** checksum. | based on its **sha1** checksum. | ||||
:param string sha1: hexadecimal representation of the **sha1** checksum value | :param string sha1: hexadecimal representation of the **sha1** checksum value | ||||
for the content to check existence. Multiple values can be provided | for the content to check existence. Multiple values can be provided | ||||
Show All 10 Lines | .. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/ | ||||
:statuscode 400: an invalid **sha1** has been provided | :statuscode 400: an invalid **sha1** has been provided | ||||
**Example:** | **Example:** | ||||
.. parsed-literal:: | .. parsed-literal:: | ||||
:swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` | :swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/` | ||||
""" | """ | ||||
response = {"search_res": None, "search_stats": None} | |||||
search_stats = {"nbfiles": 0, "pct": 0} | search_stats = {"nbfiles": 0, "pct": 0} | ||||
search_res = None | search_res = None | ||||
queries = [] | queries = [] | ||||
# GET: Many hash separated values request | # GET: Many hash separated values request | ||||
if q: | if q: | ||||
hashes = q.split(",") | hashes = q.split(",") | ||||
for v in hashes: | for v in hashes: | ||||
Show All 17 Lines | if queries: | ||||
for el in lookup: | for el in lookup: | ||||
res_d = {"sha1": el["sha1"], "found": el["found"]} | res_d = {"sha1": el["sha1"], "found": el["found"]} | ||||
if "filename" in el and el["filename"]: | if "filename" in el and el["filename"]: | ||||
res_d["filename"] = el["filename"] | res_d["filename"] = el["filename"] | ||||
result.append(res_d) | result.append(res_d) | ||||
search_res = result | search_res = result | ||||
nbfound = len([x for x in lookup if x["found"]]) | nbfound = len([x for x in lookup if x["found"]]) | ||||
search_stats["nbfiles"] = nb_queries | search_stats["nbfiles"] = nb_queries | ||||
search_stats["pct"] = (nbfound / nb_queries) * 100 | search_stats["pct"] = int((nbfound / nb_queries) * 100) | ||||
response["search_res"] = search_res | return {"search_res": search_res, "search_stats": search_stats} | ||||
response["search_stats"] = search_stats | |||||
return response | |||||
@api_route( | @api_route( | ||||
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/", "api-1-content", checksum_args=["q"] | r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/", "api-1-content", checksum_args=["q"] | ||||
) | ) | ||||
@api_doc("/content/") | @api_doc("/content/") | ||||
@format_docstring() | @format_docstring() | ||||
def api_content_metadata(request, q): | def api_content_metadata(request: Request, q: str): | ||||
""" | """ | ||||
.. http:get:: /api/1/content/[(hash_type):](hash)/ | .. http:get:: /api/1/content/[(hash_type):](hash)/ | ||||
Get information about a content (aka a "blob") object. | Get information about a content (aka a "blob") object. | ||||
In the archive, a content object is identified based on checksum | In the archive, a content object is identified based on checksum | ||||
values computed using various hashing algorithms. | values computed using various hashing algorithms. | ||||
:param string hash_type: optional parameter specifying which hashing algorithm | :param string hash_type: optional parameter specifying which hashing algorithm | ||||
▲ Show 20 Lines • Show All 41 Lines • Show Last 20 Lines |