Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/requirements-swh.txt b/requirements-swh.txt
index 72728c20..d6fed104 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,9 +1,9 @@
swh.auth[django] >= 0.5.3
swh.core >= 0.0.95
swh.counters >= 0.5.1
swh.indexer >= 0.4.1
-swh.model >= 2.6.3
+swh.model >= 6.2.0
swh.scheduler >= 0.7.0
swh.search >= 0.2.0
-swh.storage >= 0.31.0
+swh.storage >= 1.4.0
swh.vault >= 1.0.0
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
index 831e16f1..4124955a 100644
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -1,22 +1,23 @@
# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU Affero General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.web.api.apiurls import APIUrls
import swh.web.api.views.add_forge_now # noqa
import swh.web.api.views.content # noqa
import swh.web.api.views.directory # noqa
import swh.web.api.views.graph # noqa
import swh.web.api.views.identifiers # noqa
import swh.web.api.views.metadata # noqa
import swh.web.api.views.origin # noqa
import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
+import swh.web.api.views.raw # noqa
import swh.web.api.views.release # noqa
import swh.web.api.views.revision # noqa
import swh.web.api.views.snapshot # noqa
import swh.web.api.views.stat # noqa
import swh.web.api.views.vault # noqa
urlpatterns = APIUrls.get_url_patterns()
diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py
new file mode 100644
index 00000000..10bb38d8
--- /dev/null
+++ b/swh/web/api/views/raw.py
@@ -0,0 +1,109 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from django.http import HttpResponse
+
+from swh.model.git_objects import (
+ content_git_object,
+ directory_git_object,
+ revision_git_object,
+ release_git_object,
+ snapshot_git_object,
+)
+from swh.model import model
+from swh.model.hashutil import hash_to_hex
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.algos.directory import directory_get
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive
+from swh.web.common.exc import NotFoundExc
+from swh.web.common.utils import SWHID_RE
+
+
+@api_route(
+ f"/raw/(?P<swhid>{SWHID_RE})/",
+ "api-1-raw-object",
+)
+@api_doc("/raw/")
+@format_docstring()
+def api_raw_object(request, swhid):
+ """
+ .. http:get:: /api/1/raw/<swhid>/
+
+ Get the object corresponding to the SWHID in raw form.
+
+ This endpoint exposes the internal representation (see the
+ ``*_git_object`` functions in :mod:`swh.model.git_objects`), and
+ so can be used to fetch a binary blob which hashes to the same
+ identifier.
+
+ :param string swhid: the object's SWHID
+
+ :resheader Content-Type: application/octet-stream
+
+ :statuscode 200: no error
+ :statuscode 404: the requested object can not be found in the archive
+
+ **Example:**
+
+ .. parsed-literal::
+
+ :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a`
+ """
+
+ swhid = CoreSWHID.from_string(swhid)
+ object_id = swhid.object_id
+ object_type = swhid.object_type
+
+ def not_found():
+ return NotFoundExc(f"Object with id {swhid} not found.")
+
+ if object_type == ObjectType.CONTENT:
+ results = archive.storage.content_find({"sha1_git": object_id})
+ if len(results) == 0:
+ raise not_found()
+ cnt = results[0]
+ # `cnt.with_data()` unfortunately doesn't seem to work.
+ if cnt.data is None:
+ d = cnt.to_dict()
+ d["data"] = archive.storage.content_get_data(cnt.sha1)
+ cnt = model.Content.from_dict(d)
+ assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist"
+ result = content_git_object(cnt)
+
+ elif object_type == ObjectType.DIRECTORY:
+ result = directory_get(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = directory_git_object(result)
+
+ elif object_type == ObjectType.REVISION:
+ result = archive.storage.revision_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = revision_git_object(result)
+
+ elif object_type == ObjectType.RELEASE:
+ result = archive.storage.release_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = release_git_object(result)
+
+ elif object_type == ObjectType.SNAPSHOT:
+ result = snapshot_get_all_branches(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = snapshot_git_object(result)
+
+ else:
+ raise ValueError(f"Unexpected object type variant: {object_type}")
+
+ response = HttpResponse(result, content_type="application/octet-stream")
+ filename = str(swhid).replace(":", "_") + "_raw"
+ response["Content-disposition"] = f"attachment; filename={filename}"
+
+ return response
diff --git a/swh/web/tests/api/views/test_raw.py b/swh/web/tests/api/views/test_raw.py
new file mode 100644
index 00000000..73369fdb
--- /dev/null
+++ b/swh/web/tests/api/views/test_raw.py
@@ -0,0 +1,58 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import hashlib
+
+from swh.model.hashutil import hash_to_bytes
+from swh.web.common.utils import reverse
+from swh.web.tests.utils import (
+ check_api_get_responses,
+ check_http_get_response,
+)
+
+
+def test_api_raw_not_found(api_client, unknown_core_swhid):
+ url = reverse("api-1-raw-object", url_args={"swhid": str(unknown_core_swhid)})
+ rv = check_api_get_responses(api_client, url, status_code=404)
+ assert rv.data == {
+ "exception": "NotFoundExc",
+ "reason": f"Object with id {unknown_core_swhid} not found.",
+ }
+
+
+def _test_api_raw_hash(api_client, archive_data, object_id, object_ty):
+ url = reverse(
+ "api-1-raw-object",
+ url_args={"swhid": f"swh:1:{object_ty}:{object_id}"},
+ )
+
+ rv = check_http_get_response(api_client, url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert (
+ rv["Content-disposition"]
+ == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw"
+ )
+ sha1_git = hashlib.new("sha1", rv.content).digest()
+ assert sha1_git == hash_to_bytes(object_id)
+
+
+def test_api_raw_content(api_client, archive_data, content):
+ _test_api_raw_hash(api_client, archive_data, content["sha1_git"], "cnt")
+
+
+def test_api_raw_directory(api_client, archive_data, directory):
+ _test_api_raw_hash(api_client, archive_data, directory, "dir")
+
+
+def test_api_raw_revision(api_client, archive_data, revision):
+ _test_api_raw_hash(api_client, archive_data, revision, "rev")
+
+
+def test_api_raw_release(api_client, archive_data, release):
+ _test_api_raw_hash(api_client, archive_data, release, "rel")
+
+
+def test_api_raw_snapshot(api_client, archive_data, snapshot):
+ _test_api_raw_hash(api_client, archive_data, snapshot, "snp")

File Metadata

Mime Type
text/x-diff
Expires
Jul 4 2025, 8:11 AM (10 w, 15 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3294160

Event Timeline