Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9123628
http.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
http.py
View Options
# Copyright (C) 2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import
logging
from
typing
import
Iterator
,
Optional
from
urllib.parse
import
urljoin
import
requests
from
swh.model
import
hashutil
from
swh.objstorage
import
exc
from
swh.objstorage.interface
import
CompositeObjId
,
ObjId
from
swh.objstorage.objstorage
import
(
DEFAULT_LIMIT
,
ObjStorage
,
compute_hash
,
decompressors
,
objid_to_default_hex
,
)
LOGGER
=
logging
.
getLogger
(
__name__
)
LOGGER
.
setLevel
(
logging
.
ERROR
)
class
HTTPReadOnlyObjStorage
(
ObjStorage
):
"""Simple ObjStorage retrieving objects from an HTTP server.
For example, can be used to retrieve objects from S3:
objstorage:
cls: http
url: https://softwareheritage.s3.amazonaws.com/content/
"""
def
__init__
(
self
,
url
=
None
,
compression
=
None
,
**
kwargs
):
super
()
.
__init__
(
**
kwargs
)
self
.
session
=
requests
.
sessions
.
Session
()
self
.
root_path
=
url
if
not
self
.
root_path
.
endswith
(
"/"
):
self
.
root_path
+=
"/"
self
.
compression
=
compression
def
check_config
(
self
,
*
,
check_write
):
"""Check the configuration for this object storage"""
return
True
def
__contains__
(
self
,
obj_id
:
ObjId
)
->
bool
:
resp
=
self
.
session
.
head
(
self
.
_path
(
obj_id
))
return
resp
.
status_code
==
200
def
__iter__
(
self
)
->
Iterator
[
CompositeObjId
]:
raise
exc
.
NonIterableObjStorage
(
"__iter__"
)
def
__len__
(
self
):
raise
exc
.
NonIterableObjStorage
(
"__len__"
)
def
add
(
self
,
content
:
bytes
,
obj_id
:
ObjId
,
check_presence
:
bool
=
True
)
->
None
:
raise
exc
.
ReadOnlyObjStorage
(
"add"
)
def
delete
(
self
,
obj_id
:
ObjId
):
raise
exc
.
ReadOnlyObjStorage
(
"delete"
)
def
restore
(
self
,
content
:
bytes
,
obj_id
:
ObjId
)
->
None
:
raise
exc
.
ReadOnlyObjStorage
(
"restore"
)
def
list_content
(
self
,
last_obj_id
:
Optional
[
ObjId
]
=
None
,
limit
:
int
=
DEFAULT_LIMIT
,
)
->
Iterator
[
CompositeObjId
]:
raise
exc
.
NonIterableObjStorage
(
"__len__"
)
def
get
(
self
,
obj_id
:
ObjId
)
->
bytes
:
try
:
resp
=
self
.
session
.
get
(
self
.
_path
(
obj_id
))
resp
.
raise_for_status
()
except
Exception
:
raise
exc
.
ObjNotFoundError
(
obj_id
)
ret
:
bytes
=
resp
.
content
if
self
.
compression
:
d
=
decompressors
[
self
.
compression
]()
ret
=
d
.
decompress
(
ret
)
if
d
.
unused_data
:
hex_obj_id
=
objid_to_default_hex
(
obj_id
)
raise
exc
.
Error
(
"Corrupt object
%s
: trailing data found"
%
hex_obj_id
)
return
ret
def
check
(
self
,
obj_id
:
ObjId
)
->
None
:
# Check the content integrity
obj_content
=
self
.
get
(
obj_id
)
content_obj_id
=
compute_hash
(
obj_content
)
if
content_obj_id
!=
obj_id
:
raise
exc
.
Error
(
obj_id
)
def
_path
(
self
,
obj_id
):
return
urljoin
(
self
.
root_path
,
hashutil
.
hash_to_hex
(
obj_id
))
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Sat, Jun 21, 5:49 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3238975
Attached To
rDOBJS Object storage
Event Timeline
Log In to Comment