Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9337634
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 72728c20..d6fed104 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,9 +1,9 @@
swh.auth[django] >= 0.5.3
swh.core >= 0.0.95
swh.counters >= 0.5.1
swh.indexer >= 0.4.1
-swh.model >= 2.6.3
+swh.model >= 6.2.0
swh.scheduler >= 0.7.0
swh.search >= 0.2.0
-swh.storage >= 0.31.0
+swh.storage >= 1.4.0
swh.vault >= 1.0.0
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
index 831e16f1..4124955a 100644
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -1,22 +1,23 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.api.apiurls import APIUrls
import swh.web.api.views.add_forge_now # noqa
import swh.web.api.views.content # noqa
import swh.web.api.views.directory # noqa
import swh.web.api.views.graph # noqa
import swh.web.api.views.identifiers # noqa
import swh.web.api.views.metadata # noqa
import swh.web.api.views.origin # noqa
import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
+import swh.web.api.views.raw # noqa
import swh.web.api.views.release # noqa
import swh.web.api.views.revision # noqa
import swh.web.api.views.snapshot # noqa
import swh.web.api.views.stat # noqa
import swh.web.api.views.vault # noqa
urlpatterns = APIUrls.get_url_patterns()
diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py
new file mode 100644
index 00000000..10bb38d8
--- /dev/null
+++ b/swh/web/api/views/raw.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from django.http import HttpResponse
+
+from swh.model.git_objects import (
+ content_git_object,
+ directory_git_object,
+ revision_git_object,
+ release_git_object,
+ snapshot_git_object,
+)
+from swh.model import model
+from swh.model.hashutil import hash_to_hex
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.algos.directory import directory_get
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive
+from swh.web.common.exc import NotFoundExc
+from swh.web.common.utils import SWHID_RE
+
+
+@api_route(
+ f"/raw/(?P<swhid>{SWHID_RE})/",
+ "api-1-raw-object",
+)
+@api_doc("/raw/")
+@format_docstring()
+def api_raw_object(request, swhid):
+ """
+ .. http:get:: /api/1/raw/<swhid>/
+
+ Get the object corresponding to the SWHID in raw form.
+
+ This endpoint exposes the internal representation (see the
+ ``*_git_object`` functions in :mod:`swh.model.git_objects`), and
+ so can be used to fetch a binary blob which hashes to the same
+ identifier.
+
+ :param string swhid: the object's SWHID
+
+ :resheader Content-Type: application/octet-stream
+
+ :statuscode 200: no error
+ :statuscode 404: the requested object can not be found in the archive
+
+ **Example:**
+
+ .. parsed-literal::
+
+ :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a`
+ """
+
+ swhid = CoreSWHID.from_string(swhid)
+ object_id = swhid.object_id
+ object_type = swhid.object_type
+
+ def not_found():
+ return NotFoundExc(f"Object with id {swhid} not found.")
+
+ if object_type == ObjectType.CONTENT:
+ results = archive.storage.content_find({"sha1_git": object_id})
+ if len(results) == 0:
+ raise not_found()
+ cnt = results[0]
+ # `cnt.with_data()` unfortunately doesn't seem to work.
+ if cnt.data is None:
+ d = cnt.to_dict()
+ d["data"] = archive.storage.content_get_data(cnt.sha1)
+ cnt = model.Content.from_dict(d)
+ assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist"
+ result = content_git_object(cnt)
+
+ elif object_type == ObjectType.DIRECTORY:
+ result = directory_get(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = directory_git_object(result)
+
+ elif object_type == ObjectType.REVISION:
+ result = archive.storage.revision_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = revision_git_object(result)
+
+ elif object_type == ObjectType.RELEASE:
+ result = archive.storage.release_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = release_git_object(result)
+
+ elif object_type == ObjectType.SNAPSHOT:
+ result = snapshot_get_all_branches(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = snapshot_git_object(result)
+
+ else:
+ raise ValueError(f"Unexpected object type variant: {object_type}")
+
+ response = HttpResponse(result, content_type="application/octet-stream")
+ filename = str(swhid).replace(":", "_") + "_raw"
+ response["Content-disposition"] = f"attachment; filename={filename}"
+
+ return response
diff --git a/swh/web/tests/api/views/test_raw.py b/swh/web/tests/api/views/test_raw.py
new file mode 100644
index 00000000..73369fdb
--- /dev/null
+++ b/swh/web/tests/api/views/test_raw.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import hashlib
+
+from swh.model.hashutil import hash_to_bytes
+from swh.web.common.utils import reverse
+from swh.web.tests.utils import (
+ check_api_get_responses,
+ check_http_get_response,
+)
+
+
+def test_api_raw_not_found(api_client, unknown_core_swhid):
+ url = reverse("api-1-raw-object", url_args={"swhid": str(unknown_core_swhid)})
+ rv = check_api_get_responses(api_client, url, status_code=404)
+ assert rv.data == {
+ "exception": "NotFoundExc",
+ "reason": f"Object with id {unknown_core_swhid} not found.",
+ }
+
+
+def _test_api_raw_hash(api_client, archive_data, object_id, object_ty):
+ url = reverse(
+ "api-1-raw-object",
+ url_args={"swhid": f"swh:1:{object_ty}:{object_id}"},
+ )
+
+ rv = check_http_get_response(api_client, url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert (
+ rv["Content-disposition"]
+ == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw"
+ )
+ sha1_git = hashlib.new("sha1", rv.content).digest()
+ assert sha1_git == hash_to_bytes(object_id)
+
+
+def test_api_raw_content(api_client, archive_data, content):
+ _test_api_raw_hash(api_client, archive_data, content["sha1_git"], "cnt")
+
+
+def test_api_raw_directory(api_client, archive_data, directory):
+ _test_api_raw_hash(api_client, archive_data, directory, "dir")
+
+
+def test_api_raw_revision(api_client, archive_data, revision):
+ _test_api_raw_hash(api_client, archive_data, revision, "rev")
+
+
+def test_api_raw_release(api_client, archive_data, release):
+ _test_api_raw_hash(api_client, archive_data, release, "rel")
+
+
+def test_api_raw_snapshot(api_client, archive_data, snapshot):
+ _test_api_raw_hash(api_client, archive_data, snapshot, "snp")
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Jul 4 2025, 8:11 AM (10 w, 15 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3294160
Attached To
rDWAPPS Web applications
Event Timeline
Log In to Comment