Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8322612
utils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
utils.py
View Options
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
copy
import
functools
import
itertools
import
logging
import
os
import
re
from
typing
import
Callable
,
Dict
,
Optional
,
Tuple
,
TypeVar
from
urllib.parse
import
unquote
,
urlsplit
from
urllib.request
import
urlopen
import
requests
from
swh.loader.exception
import
NotFound
from
swh.loader.package
import
DEFAULT_PARAMS
from
swh.model.hashutil
import
HASH_BLOCK_SIZE
,
MultiHash
from
swh.model.model
import
Person
logger
=
logging
.
getLogger
(
__name__
)
DOWNLOAD_HASHES
=
set
([
"sha1"
,
"sha256"
,
"length"
])
EMPTY_AUTHOR
=
Person
(
fullname
=
b
""
,
name
=
None
,
email
=
None
,)
def
api_info
(
url
:
str
,
**
extra_params
)
->
bytes
:
"""Basic api client to retrieve information on project. This deals with
fetching json metadata about pypi projects.
Args:
url (str): The api url (e.g PyPI, npm, etc...)
Raises:
NotFound in case of query failures (for some reasons: 404, ...)
Returns:
The associated response's information
"""
response
=
requests
.
get
(
url
,
**
{
**
DEFAULT_PARAMS
,
**
extra_params
})
if
response
.
status_code
!=
200
:
raise
NotFound
(
f
"Fail to query '{url}'. Reason: {response.status_code}"
)
return
response
.
content
def
_content_disposition_filename
(
header
:
str
)
->
Optional
[
str
]:
fname
=
None
fnames
=
re
.
findall
(
r"filename[\*]?=([^;]+)"
,
header
)
if
fnames
and
"utf-8''"
in
fnames
[
0
]
.
lower
():
# RFC 5987
fname
=
re
.
sub
(
"utf-8''"
,
""
,
fnames
[
0
],
flags
=
re
.
IGNORECASE
)
fname
=
unquote
(
fname
)
elif
fnames
:
fname
=
fnames
[
0
]
if
fname
:
fname
=
os
.
path
.
basename
(
fname
.
strip
()
.
strip
(
'"'
))
return
fname
def
download
(
url
:
str
,
dest
:
str
,
hashes
:
Dict
=
{},
filename
:
Optional
[
str
]
=
None
,
auth
:
Optional
[
Tuple
[
str
,
str
]]
=
None
,
extra_request_headers
:
Optional
[
Dict
[
str
,
str
]]
=
None
,
)
->
Tuple
[
str
,
Dict
]:
"""Download a remote tarball from url, uncompresses and computes swh hashes
on it.
Args:
url: Artifact uri to fetch, uncompress and hash
dest: Directory to write the archive to
hashes: Dict of expected hashes (key is the hash algo) for the artifact
to download (those hashes are expected to be hex string)
auth: Optional tuple of login/password (for http authentication
service, e.g. deposit)
Raises:
ValueError in case of any error when fetching/computing (length,
checksums mismatched...)
Returns:
Tuple of local (filepath, hashes of filepath)
"""
params
=
copy
.
deepcopy
(
DEFAULT_PARAMS
)
if
auth
is
not
None
:
params
[
"auth"
]
=
auth
if
extra_request_headers
is
not
None
:
params
[
"headers"
]
.
update
(
extra_request_headers
)
# so the connection does not hang indefinitely (read/connection timeout)
timeout
=
params
.
get
(
"timeout"
,
60
)
if
url
.
startswith
(
"ftp://"
):
response
=
urlopen
(
url
,
timeout
=
timeout
)
chunks
=
(
response
.
read
(
HASH_BLOCK_SIZE
)
for
_
in
itertools
.
count
())
response_data
=
itertools
.
takewhile
(
bool
,
chunks
)
else
:
response
=
requests
.
get
(
url
,
**
params
,
timeout
=
timeout
,
stream
=
True
)
if
response
.
status_code
!=
200
:
raise
ValueError
(
"Fail to query '
%s
'. Reason:
%s
"
%
(
url
,
response
.
status_code
)
)
# update URL to response one as requests follow redirection by default
# on GET requests
url
=
response
.
url
# try to extract filename from content-disposition header if available
if
filename
is
None
and
"content-disposition"
in
response
.
headers
:
filename
=
_content_disposition_filename
(
response
.
headers
[
"content-disposition"
]
)
response_data
=
response
.
iter_content
(
chunk_size
=
HASH_BLOCK_SIZE
)
filename
=
filename
if
filename
else
os
.
path
.
basename
(
urlsplit
(
url
)
.
path
)
logger
.
debug
(
"filename:
%s
"
,
filename
)
filepath
=
os
.
path
.
join
(
dest
,
filename
)
logger
.
debug
(
"filepath:
%s
"
,
filepath
)
h
=
MultiHash
(
hash_names
=
DOWNLOAD_HASHES
|
set
(
hashes
.
keys
()))
with
open
(
filepath
,
"wb"
)
as
f
:
for
chunk
in
response_data
:
h
.
update
(
chunk
)
f
.
write
(
chunk
)
response
.
close
()
# Also check the expected hashes if provided
if
hashes
:
actual_hashes
=
h
.
hexdigest
()
for
algo_hash
in
hashes
.
keys
():
actual_digest
=
actual_hashes
[
algo_hash
]
expected_digest
=
hashes
[
algo_hash
]
if
actual_digest
!=
expected_digest
:
raise
ValueError
(
"Failure when fetching
%s
. "
"Checksum mismatched:
%s
!=
%s
"
%
(
url
,
expected_digest
,
actual_digest
)
)
computed_hashes
=
h
.
hexdigest
()
length
=
computed_hashes
.
pop
(
"length"
)
extrinsic_metadata
=
{
"length"
:
length
,
"filename"
:
filename
,
"checksums"
:
computed_hashes
,
"url"
:
url
,
}
logger
.
debug
(
"extrinsic_metadata"
,
extrinsic_metadata
)
return
filepath
,
extrinsic_metadata
def
release_name
(
version
:
str
,
filename
:
Optional
[
str
]
=
None
)
->
str
:
if
filename
:
return
"releases/
%s
/
%s
"
%
(
version
,
filename
)
return
"releases/
%s
"
%
version
TReturn
=
TypeVar
(
"TReturn"
)
TSelf
=
TypeVar
(
"TSelf"
)
_UNDEFINED
=
object
()
def
cached_method
(
f
:
Callable
[[
TSelf
],
TReturn
])
->
Callable
[[
TSelf
],
TReturn
]:
cache_name
=
f
"_cached_{f.__name__}"
@functools.wraps
(
f
)
def
newf
(
self
):
value
=
getattr
(
self
,
cache_name
,
_UNDEFINED
)
if
value
is
_UNDEFINED
:
value
=
f
(
self
)
setattr
(
self
,
cache_name
,
value
)
return
value
return
newf
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Tue, Jun 3, 7:39 AM (3 d, 21 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3291398
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment