Page MenuHomeSoftware Heritage

origin.py
No OneTemporary

origin.py

# Copyright (C) 2015-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from distutils.util import strtobool
from functools import partial
from swh.search.exc import SearchQuerySyntaxError
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
from swh.web.api.utils import (
enrich_origin,
enrich_origin_search_result,
enrich_origin_visit,
)
from swh.web.api.views.utils import api_lookup
from swh.web.common import archive
from swh.web.common.exc import BadInputExc
from swh.web.common.origin_visits import get_origin_visits
from swh.web.common.utils import reverse
DOC_RETURN_ORIGIN = """
:>json string origin_visits_url: link to in order to get information
about the visits for that origin
:>json string url: the origin canonical url
"""
DOC_RETURN_ORIGIN_ARRAY = DOC_RETURN_ORIGIN.replace(":>json", ":>jsonarr")
DOC_RETURN_ORIGIN_VISIT = """
:>json string date: ISO8601/RFC3339 representation of the visit date (in UTC)
:>json str origin: the origin canonical url
:>json string origin_url: link to get information about the origin
:>jsonarr string snapshot: the snapshot identifier of the visit
(may be null if status is not **full**).
:>jsonarr string snapshot_url: link to
:http:get:`/api/1/snapshot/(snapshot_id)/` in order to get
information about the snapshot of the visit
(may be null if status is not **full**).
:>json string status: status of the visit (either **full**,
**partial** or **ongoing**)
:>json number visit: the unique identifier of the visit
"""
DOC_RETURN_ORIGIN_VISIT_ARRAY = DOC_RETURN_ORIGIN_VISIT.replace(":>json", ":>jsonarr")
DOC_RETURN_ORIGIN_VISIT_ARRAY += """
:>jsonarr number id: the unique identifier of the origin
:>jsonarr string origin_visit_url: link to
:http:get:`/api/1/origin/(origin_url)/visit/(visit_id)/`
in order to get information about the visit
"""
@api_route(r"/origins/", "api-1-origins")
@api_doc("/origins/", noargs=True)
@format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY)
def api_origins(request):
"""
.. http:get:: /api/1/origins/
Get list of archived software origins.
.. warning::
This endpoint used to provide an ``origin_from`` query parameter,
and guarantee an order on results. This is no longer true,
and only the Link header should be used for paginating through
results.
:query int origin_count: The maximum number of origins to return
(default to 100, can not exceed 10000)
{return_origin_array}
{common_headers}
{resheader_link}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origins?origin_count=500`
"""
old_param_origin_from = request.query_params.get("origin_from")
if old_param_origin_from:
raise BadInputExc("Please use the Link header to browse through result")
page_token = request.query_params.get("page_token", None)
limit = min(int(request.query_params.get("origin_count", "100")), 10000)
page_result = archive.lookup_origins(page_token, limit)
origins = [enrich_origin(o, request=request) for o in page_result.results]
next_page_token = page_result.next_page_token
response = {"results": origins, "headers": {}}
if next_page_token is not None:
response["headers"]["link-next"] = reverse(
"api-1-origins",
query_params={"page_token": next_page_token, "origin_count": limit},
request=request,
)
return response
@api_route(r"/origin/(?P<origin_url>.+)/get/", "api-1-origin")
@api_doc("/origin/")
@format_docstring(return_origin=DOC_RETURN_ORIGIN)
def api_origin(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/get/
Get information about a software origin.
:param string origin_url: the origin url
{return_origin}
{common_headers}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/python/cpython/get/`
"""
ori_dict = {"url": origin_url}
error_msg = "Origin with url %s not found." % ori_dict["url"]
return api_lookup(
archive.lookup_origin,
ori_dict,
notfound_msg=error_msg,
enrich_fn=enrich_origin,
request=request,
)
@api_route(
r"/origin/search/(?P<url_pattern>.+)/",
"api-1-origin-search",
throttle_scope="swh_api_origin_search",
)
@api_doc("/origin/search/")
@format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY)
def api_origin_search(request, url_pattern):
"""
.. http:get:: /api/1/origin/search/(url_pattern)/
Search for software origins whose urls contain a provided string
pattern or match a provided regular expression.
The search is performed in a case insensitive way.
.. warning::
This endpoint used to provide an ``offset`` query parameter,
and guarantee an order on results. This is no longer true,
and only the Link header should be used for paginating through
results.
:param string url_pattern: a string pattern
:query boolean use_ql: whether to use swh search query language or not
:query int limit: the maximum number of found origins to return
(bounded to 1000)
:query boolean with_visit: if true, only return origins with at least
one visit by Software heritage
{return_origin_array}
{common_headers}
{resheader_link}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/search/python/?limit=2`
"""
result = {}
limit = min(int(request.query_params.get("limit", "70")), 1000)
page_token = request.query_params.get("page_token")
use_ql = request.query_params.get("use_ql", "false")
with_visit = request.query_params.get("with_visit", "false")
visit_type = request.query_params.get("visit_type")
try:
(results, page_token) = api_lookup(
archive.search_origin,
url_pattern,
bool(strtobool(use_ql)),
limit,
bool(strtobool(with_visit)),
[visit_type] if visit_type else None,
page_token,
enrich_fn=enrich_origin_search_result,
request=request,
)
except SearchQuerySyntaxError as e:
raise BadInputExc(f"Syntax error in search query: {e.args[0]}")
if page_token is not None:
query_params = {k: v for (k, v) in request.GET.dict().items()}
query_params["page_token"] = page_token
result["headers"] = {
"link-next": reverse(
"api-1-origin-search",
url_args={"url_pattern": url_pattern},
query_params=query_params,
request=request,
)
}
result.update({"results": results})
return result
@api_route(r"/origin/metadata-search/", "api-1-origin-metadata-search")
@api_doc("/origin/metadata-search/", noargs=True)
@format_docstring(return_origin_array=DOC_RETURN_ORIGIN_ARRAY)
def api_origin_metadata_search(request):
"""
.. http:get:: /api/1/origin/metadata-search/
Search for software origins whose metadata (expressed as a
JSON-LD/CodeMeta dictionary) match the provided criteria.
For now, only full-text search on this dictionary is supported.
:query str fulltext: a string that will be matched against origin
metadata; results are ranked and ordered starting with the best
ones.
:query int limit: the maximum number of found origins to return
(bounded to 100)
{return_origin_array}
{common_headers}
:statuscode 200: no error
**Example:**
.. parsed-literal::
:swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe`
"""
fulltext = request.query_params.get("fulltext", None)
limit = min(int(request.query_params.get("limit", "70")), 100)
if not fulltext:
content = '"fulltext" must be provided and non-empty.'
raise BadInputExc(content)
results = api_lookup(
archive.search_origin_metadata, fulltext, limit, request=request
)
return {
"results": results,
}
@api_route(r"/origin/(?P<origin_url>.*)/visits/", "api-1-origin-visits")
@api_doc("/origin/visits/")
@format_docstring(return_origin_visit_array=DOC_RETURN_ORIGIN_VISIT_ARRAY)
def api_origin_visits(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/visits/
Get information about all visits of a software origin.
Visits are returned sorted in descending order according
to their date.
:param str origin_url: a software origin URL
:query int per_page: specify the number of visits to list, for
pagination purposes
:query int last_visit: visit to start listing from, for pagination
purposes
{common_headers}
{resheader_link}
{return_origin_visit_array}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visits/`
"""
result = {}
origin_query = {"url": origin_url}
notfound_msg = "No origin {} found".format(origin_url)
url_args_next = {"origin_url": origin_url}
per_page = int(request.query_params.get("per_page", "10"))
last_visit = request.query_params.get("last_visit")
if last_visit:
last_visit = int(last_visit)
def _lookup_origin_visits(origin_query, last_visit=last_visit, per_page=per_page):
all_visits = get_origin_visits(origin_query)
all_visits.reverse()
visits = []
if not last_visit:
visits = all_visits[:per_page]
else:
for i, v in enumerate(all_visits):
if v["visit"] == last_visit:
visits = all_visits[i + 1 : i + 1 + per_page]
break
for v in visits:
yield v
results = api_lookup(
_lookup_origin_visits,
origin_query,
notfound_msg=notfound_msg,
enrich_fn=partial(
enrich_origin_visit, with_origin_link=False, with_origin_visit_link=True
),
request=request,
)
if results:
nb_results = len(results)
if nb_results == per_page:
new_last_visit = results[-1]["visit"]
query_params = {}
query_params["last_visit"] = new_last_visit
if request.query_params.get("per_page"):
query_params["per_page"] = per_page
result["headers"] = {
"link-next": reverse(
"api-1-origin-visits",
url_args=url_args_next,
query_params=query_params,
request=request,
)
}
result.update({"results": results})
return result
@api_route(
r"/origin/(?P<origin_url>.*)/visit/latest/",
"api-1-origin-visit-latest",
throttle_scope="swh_api_origin_visit_latest",
)
@api_doc("/origin/visit/latest/")
@format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT)
def api_origin_visit_latest(request, origin_url=None):
"""
.. http:get:: /api/1/origin/(origin_url)/visit/latest/
Get information about the latest visit of a software origin.
:param str origin_url: a software origin URL
:query boolean require_snapshot: if true, only return a visit
with a snapshot
{common_headers}
{return_origin_visit}
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the
archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/`
"""
require_snapshot = request.query_params.get("require_snapshot", "false")
return api_lookup(
archive.lookup_origin_visit_latest,
origin_url,
bool(strtobool(require_snapshot)),
notfound_msg=("No visit for origin {} found".format(origin_url)),
enrich_fn=partial(
enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False
),
request=request,
)
@api_route(
r"/origin/(?P<origin_url>.*)/visit/(?P<visit_id>[0-9]+)/", "api-1-origin-visit"
)
@api_doc("/origin/visit/")
@format_docstring(return_origin_visit=DOC_RETURN_ORIGIN_VISIT)
def api_origin_visit(request, visit_id, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/visit/(visit_id)/
Get information about a specific visit of a software origin.
:param str origin_url: a software origin URL
:param int visit_id: a visit identifier
{common_headers}
{return_origin_visit}
:statuscode 200: no error
:statuscode 404: requested origin or visit can not be found in the
archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/hylang/hy/visit/1/`
"""
return api_lookup(
archive.lookup_origin_visit,
origin_url,
int(visit_id),
notfound_msg=("No visit {} for origin {} found".format(visit_id, origin_url)),
enrich_fn=partial(
enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False
),
request=request,
)
@api_route(
r"/origin/(?P<origin_url>.+)" "/intrinsic-metadata", "api-origin-intrinsic-metadata"
)
@api_doc("/origin/intrinsic-metadata/")
@format_docstring()
def api_origin_intrinsic_metadata(request, origin_url):
"""
.. http:get:: /api/1/origin/(origin_url)/intrinsic-metadata
Get intrinsic metadata of a software origin (as a JSON-LD/CodeMeta dictionary).
:param string origin_url: the origin url
:>json string ???: intrinsic metadata field of the origin
{common_headers}
:statuscode 200: no error
:statuscode 404: requested origin can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`origin/https://github.com/python/cpython/intrinsic-metadata`
"""
return api_lookup(
archive.lookup_origin_intrinsic_metadata,
origin_url,
notfound_msg=f"Origin with url {origin_url} not found",
enrich_fn=enrich_origin,
request=request,
)

File Metadata

Mime Type
text/x-python
Expires
Sat, Jun 21, 6:35 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3238136

Event Timeline