Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9124343
content.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
content.py
View Options
# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
functools
from
django.http
import
HttpResponse
from
swh.web.api
import
utils
from
swh.web.api.apidoc
import
api_doc
,
format_docstring
from
swh.web.api.apiurls
import
api_route
from
swh.web.api.views.utils
import
api_lookup
from
swh.web.common
import
archive
from
swh.web.common.exc
import
NotFoundExc
from
swh.web.common.utils
import
reverse
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/filetype/"
,
"api-1-content-filetype"
,
checksum_args
=
[
"q"
],
)
@api_doc
(
"/content/filetype/"
)
@format_docstring
()
def
api_content_filetype
(
request
,
q
):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/filetype/
Get information about the detected MIME type of a content object.
:param string hash_type: optional parameter specifying which hashing algorithm
has been used to compute the content checksum. It can be either ``sha1``,
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
provided, it is assumed that the hashing algorithm used is ``sha1``.
:param string hash: hexadecimal representation of the checksum value computed
with the specified hashing algorithm.
:>json object content_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/` for getting information
about the content
:>json string encoding: the detected content encoding
:>json string id: the **sha1** identifier of the content
:>json string mimetype: the detected MIME type of the content
:>json object tool: information about the tool used to detect the content
filetype
{common_headers}
:statuscode 200: no error
:statuscode 400: an invalid **hash_type** or **hash** has been provided
:statuscode 404: requested content can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/filetype/`
"""
return
api_lookup
(
archive
.
lookup_content_filetype
,
q
,
notfound_msg
=
"No filetype information found for content {}."
.
format
(
q
),
enrich_fn
=
utils
.
enrich_metadata_endpoint
,
request
=
request
,
)
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/language/"
,
"api-1-content-language"
,
checksum_args
=
[
"q"
],
)
@api_doc
(
"/content/language/"
)
@format_docstring
()
def
api_content_language
(
request
,
q
):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/language/
Get information about the programming language used in a content object.
Note: this endpoint currently returns no data.
:param string hash_type: optional parameter specifying which hashing algorithm
has been used to compute the content checksum. It can be either ``sha1``,
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
provided, it is assumed that the hashing algorithm used is ``sha1``.
:param string hash: hexadecimal representation of the checksum value computed
with the specified hashing algorithm.
:>json object content_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/` for getting information
about the content
:>json string id: the **sha1** identifier of the content
:>json string lang: the detected programming language if any
:>json object tool: information about the tool used to detect the programming
language
{common_headers}
:statuscode 200: no error
:statuscode 400: an invalid **hash_type** or **hash** has been provided
:statuscode 404: requested content can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/language/`
"""
return
api_lookup
(
archive
.
lookup_content_language
,
q
,
notfound_msg
=
"No language information found for content {}."
.
format
(
q
),
enrich_fn
=
utils
.
enrich_metadata_endpoint
,
request
=
request
,
)
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/license/"
,
"api-1-content-license"
,
checksum_args
=
[
"q"
],
)
@api_doc
(
"/content/license/"
)
@format_docstring
()
def
api_content_license
(
request
,
q
):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/license/
Get information about the license of a content object.
:param string hash_type: optional parameter specifying which hashing algorithm
has been used to compute the content checksum. It can be either ``sha1``,
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
provided, it is assumed that the hashing algorithm used is ``sha1``.
:param string hash: hexadecimal representation of the checksum value computed
with the specified hashing algorithm.
:>json object content_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/` for getting information
about the content
:>json string id: the **sha1** identifier of the content
:>json array licenses: array of strings containing the detected license names
:>json object tool: information about the tool used to detect the license
{common_headers}
:statuscode 200: no error
:statuscode 400: an invalid **hash_type** or **hash** has been provided
:statuscode 404: requested content can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/license/`
"""
return
api_lookup
(
archive
.
lookup_content_license
,
q
,
notfound_msg
=
"No license information found for content {}."
.
format
(
q
),
enrich_fn
=
utils
.
enrich_metadata_endpoint
,
request
=
request
,
)
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/ctags/"
,
"api-1-content-ctags"
)
@api_doc
(
"/content/ctags/"
,
tags
=
[
"hidden"
])
def
api_content_ctags
(
request
,
q
):
"""
Get information about all `Ctags <http://ctags.sourceforge.net/>`_-style
symbols defined in a content object.
"""
return
api_lookup
(
archive
.
lookup_content_ctags
,
q
,
notfound_msg
=
"No ctags symbol found for content {}."
.
format
(
q
),
enrich_fn
=
utils
.
enrich_metadata_endpoint
,
request
=
request
,
)
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/raw/"
,
"api-1-content-raw"
,
checksum_args
=
[
"q"
],
)
@api_doc
(
"/content/raw/"
)
def
api_content_raw
(
request
,
q
):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/raw/
Get the raw content of a content object (aka a "blob"), as a byte sequence.
:param string hash_type: optional parameter specifying which hashing algorithm
has been used to compute the content checksum. It can be either ``sha1``,
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
provided, it is assumed that the hashing algorithm used is ``sha1``.
:param string hash: hexadecimal representation of the checksum value computed
with the specified hashing algorithm.
:query string filename: if provided, the downloaded content will get that
filename
:resheader Content-Type: application/octet-stream
:statuscode 200: no error
:statuscode 400: an invalid **hash_type** or **hash** has been provided
:statuscode 404: requested content can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1:dc2830a9e72f23c1dfebef4413003221baa5fb62/raw/`
"""
def
generate
(
content
):
yield
content
[
"data"
]
content_raw
=
archive
.
lookup_content_raw
(
q
)
if
not
content_raw
:
raise
NotFoundExc
(
"Content
%s
is not found."
%
q
)
filename
=
request
.
query_params
.
get
(
"filename"
)
if
not
filename
:
filename
=
"content_
%s
_raw"
%
q
.
replace
(
":"
,
"_"
)
response
=
HttpResponse
(
generate
(
content_raw
),
content_type
=
"application/octet-stream"
)
response
[
"Content-disposition"
]
=
"attachment; filename=
%s
"
%
filename
return
response
@api_route
(
r"/content/symbol/(?P<q>.+)/"
,
"api-1-content-symbol"
)
@api_doc
(
"/content/symbol/"
,
tags
=
[
"hidden"
])
def
api_content_symbol
(
request
,
q
=
None
):
"""Search content objects by `Ctags <http://ctags.sourceforge.net/>`_-style
symbol (e.g., function name, data type, method, ...).
"""
result
=
{}
last_sha1
=
request
.
query_params
.
get
(
"last_sha1"
,
None
)
per_page
=
int
(
request
.
query_params
.
get
(
"per_page"
,
"10"
))
def
lookup_exp
(
exp
,
last_sha1
=
last_sha1
,
per_page
=
per_page
):
exp
=
list
(
archive
.
lookup_expression
(
exp
,
last_sha1
,
per_page
))
return
exp
if
exp
else
None
symbols
=
api_lookup
(
lookup_exp
,
q
,
notfound_msg
=
"No indexed raw content match expression '{}'."
.
format
(
q
),
enrich_fn
=
functools
.
partial
(
utils
.
enrich_content
,
top_url
=
True
),
request
=
request
,
)
if
symbols
:
nb_symbols
=
len
(
symbols
)
if
nb_symbols
==
per_page
:
query_params
=
{}
new_last_sha1
=
symbols
[
-
1
][
"sha1"
]
query_params
[
"last_sha1"
]
=
new_last_sha1
if
request
.
query_params
.
get
(
"per_page"
):
query_params
[
"per_page"
]
=
per_page
result
[
"headers"
]
=
{
"link-next"
:
reverse
(
"api-1-content-symbol"
,
url_args
=
{
"q"
:
q
},
query_params
=
query_params
,
request
=
request
,
)
}
result
.
update
({
"results"
:
symbols
})
return
result
@api_route
(
r"/content/known/search/"
,
"api-1-content-known"
,
methods
=
[
"POST"
])
@api_route
(
r"/content/known/(?P<q>(?!search).*)/"
,
"api-1-content-known"
)
@api_doc
(
"/content/known/"
,
tags
=
[
"hidden"
])
@format_docstring
()
def
api_check_content_known
(
request
,
q
=
None
):
"""
.. http:get:: /api/1/content/known/(sha1)[,(sha1), ...,(sha1)]/
Check whether some content(s) (aka "blob(s)") is present in the archive
based on its **sha1** checksum.
:param string sha1: hexadecimal representation of the **sha1** checksum value
for the content to check existence. Multiple values can be provided
separated by ','.
{common_headers}
:>json array search_res: array holding the search result for each provided
**sha1**
:>json object search_stats: some statistics regarding the number of **sha1**
provided and the percentage of those found in the archive
:statuscode 200: no error
:statuscode 400: an invalid **sha1** has been provided
**Example:**
.. parsed-literal::
:swh_web_api:`content/known/dc2830a9e72f23c1dfebef4413003221baa5fb62,0c3f19cb47ebfbe643fb19fa94c874d18fa62d12/`
"""
response
=
{
"search_res"
:
None
,
"search_stats"
:
None
}
search_stats
=
{
"nbfiles"
:
0
,
"pct"
:
0
}
search_res
=
None
queries
=
[]
# GET: Many hash separated values request
if
q
:
hashes
=
q
.
split
(
","
)
for
v
in
hashes
:
queries
.
append
({
"filename"
:
None
,
"sha1"
:
v
})
# POST: Many hash requests in post form submission
elif
request
.
method
==
"POST"
:
data
=
request
.
data
# Remove potential inputs with no associated value
for
k
,
v
in
data
.
items
():
if
v
is
not
None
:
if
k
==
"q"
and
len
(
v
)
>
0
:
queries
.
append
({
"filename"
:
None
,
"sha1"
:
v
})
elif
v
!=
""
:
queries
.
append
({
"filename"
:
k
,
"sha1"
:
v
})
if
queries
:
lookup
=
archive
.
lookup_multiple_hashes
(
queries
)
result
=
[]
nb_queries
=
len
(
queries
)
for
el
in
lookup
:
res_d
=
{
"sha1"
:
el
[
"sha1"
],
"found"
:
el
[
"found"
]}
if
"filename"
in
el
and
el
[
"filename"
]:
res_d
[
"filename"
]
=
el
[
"filename"
]
result
.
append
(
res_d
)
search_res
=
result
nbfound
=
len
([
x
for
x
in
lookup
if
x
[
"found"
]])
search_stats
[
"nbfiles"
]
=
nb_queries
search_stats
[
"pct"
]
=
(
nbfound
/
nb_queries
)
*
100
response
[
"search_res"
]
=
search_res
response
[
"search_stats"
]
=
search_stats
return
response
@api_route
(
r"/content/(?P<q>[0-9a-z_:]*[0-9a-f]+)/"
,
"api-1-content"
,
checksum_args
=
[
"q"
]
)
@api_doc
(
"/content/"
)
@format_docstring
()
def
api_content_metadata
(
request
,
q
):
"""
.. http:get:: /api/1/content/[(hash_type):](hash)/
Get information about a content (aka a "blob") object.
In the archive, a content object is identified based on checksum
values computed using various hashing algorithms.
:param string hash_type: optional parameter specifying which hashing algorithm
has been used to compute the content checksum. It can be either ``sha1``,
``sha1_git``, ``sha256`` or ``blake2s256``. If that parameter is not
provided, it is assumed that the hashing algorithm used is ``sha1``.
:param string hash: hexadecimal representation of the checksum value computed
with the specified hashing algorithm.
{common_headers}
:>json object checksums: object holding the computed checksum values for the
requested content
:>json string data_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/raw/`
for downloading the content raw bytes
:>json string filetype_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/filetype/`
for getting information about the content MIME type
:>json string language_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/language/`
for getting information about the programming language used in the content
:>json number length: length of the content in bytes
:>json string license_url: link to
:http:get:`/api/1/content/[(hash_type):](hash)/license/`
for getting information about the license of the content
:statuscode 200: no error
:statuscode 400: an invalid **hash_type** or **hash** has been provided
:statuscode 404: requested content can not be found in the archive
**Example:**
.. parsed-literal::
:swh_web_api:`content/sha1_git:fe95a46679d128ff167b7c55df5d02356c5a1ae1/`
"""
return
api_lookup
(
archive
.
lookup_content
,
q
,
notfound_msg
=
"Content with {} not found."
.
format
(
q
),
enrich_fn
=
functools
.
partial
(
utils
.
enrich_content
,
query_string
=
q
),
request
=
request
,
)
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Sat, Jun 21, 7:00 PM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3239285
Attached To
rDWAPPS Web applications
Event Timeline
Log In to Comment