Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123825
D7192.id27890.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
D7192.id27890.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,7 +2,7 @@
swh.core >= 0.0.95
swh.counters >= 0.5.1
swh.indexer >= 0.4.1
-swh.model >= 2.6.3
+swh.model >= 6.2.0
swh.scheduler >= 0.7.0
swh.search >= 0.2.0
swh.storage >= 0.31.0
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -13,6 +13,7 @@
import swh.web.api.views.origin # noqa
import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
+import swh.web.api.views.raw # noqa
import swh.web.api.views.release # noqa
import swh.web.api.views.revision # noqa
import swh.web.api.views.snapshot # noqa
diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
--- a/swh/web/api/views/metadata.py
+++ b/swh/web/api/views/metadata.py
@@ -16,9 +16,7 @@
from swh.web.api.apiurls import api_route
from swh.web.common import archive, converters
from swh.web.common.exc import BadInputExc, NotFoundExc
-from swh.web.common.utils import reverse
-
-SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+from swh.web.common.utils import SWHID_RE, reverse
@api_route(
diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py
new file mode 100644
--- /dev/null
+++ b/swh/web/api/views/raw.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2018-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from django.http import HttpResponse
+from typing import Optional, Iterable
+
+from swh.core.api.classes import stream_results_optional
+from swh.model.git_objects import (
+ content_git_object,
+ directory_git_object,
+ revision_git_object,
+ release_git_object,
+ snapshot_git_object,
+)
+from swh.model import model
+from swh.model.hashutil import hash_to_hex
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive
+from swh.web.common.exc import NotFoundExc
+from swh.web.common.utils import SWHID_RE
+
+
+@api_route(
+ f"/raw/(?P<swhid>{SWHID_RE})/",
+ "api-1-raw-object",
+)
+@api_doc("/raw/")
+@format_docstring()
+def api_raw_object(request, swhid):
+ """
+ .. http:get:: /api/1/raw/<swhid>/
+
+ Get the object corresponding to the SWHID in raw form.
+
+ This endpoint exposes the internal representation (see the
+ ``*_git_object`` functions in :mod:`swh.model.git_objects`), and
+ so can be used to fetch a binary blob which hashes to the same
+ identifier.
+
+ :param string swhid: the object's SWHID
+
+ :resheader Content-Type: application/octet-stream
+
+ :statuscode 200: no error
+ :statuscode 400: an invalid SWHID has been provided
+ :statuscode 404: the requested object can not be found in the archive
+
+ **Example:**
+
+ .. parsed-literal::
+
+ :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a`
+ """
+
+ swhid = CoreSWHID.from_string(swhid)
+ object_id = swhid.object_id
+ object_type = swhid.object_type
+
+ def not_found():
+ return NotFoundExc(f"Object with id {swhid} not found.")
+
+ if object_type == ObjectType.CONTENT:
+ results = archive.storage.content_find({"sha1_git": object_id})
+ if len(results) == 0:
+ raise not_found()
+ cnt = results[0]
+ # `cnt.with_data()` unfortunately doesn't seem to work.
+ if cnt.data is None:
+ d = cnt.to_dict()
+ d["data"] = archive.storage.content_get_data(cnt.sha1)
+ cnt = model.Content.from_dict(d)
+ assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist"
+ result = content_git_object(cnt)
+
+ elif object_type == ObjectType.DIRECTORY:
+ entries: Optional[Iterable[model.DirectoryEntry]] = stream_results_optional(
+ archive.storage.directory_get_entries,
+ directory_id=object_id,
+ )
+ if entries is None:
+ raise not_found()
+ result = directory_git_object(
+ model.Directory(
+ id=object_id,
+ entries=tuple(entries),
+ )
+ )
+
+ elif object_type == ObjectType.REVISION:
+ result = archive.storage.revision_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = revision_git_object(result)
+
+ elif object_type == ObjectType.RELEASE:
+ result = archive.storage.release_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = release_git_object(result)
+
+ elif object_type == ObjectType.SNAPSHOT:
+ result = snapshot_get_all_branches(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = snapshot_git_object(result)
+
+ else:
+ raise ValueError(f"Unexpected object type variant: {object_type}")
+
+ response = HttpResponse(result, content_type="application/octet-stream")
+ filename = str(swhid).replace(":", "_") + "_raw"
+ response["Content-disposition"] = f"attachment; filename={filename}"
+
+ return response
diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py
--- a/swh/web/api/views/vault.py
+++ b/swh/web/api/views/vault.py
@@ -15,13 +15,12 @@
from swh.web.api.views.utils import api_lookup
from swh.web.common import archive, query
from swh.web.common.exc import BadInputExc
-from swh.web.common.utils import reverse
+from swh.web.common.utils import SWHID_RE, reverse
+
######################################################
# Common
-SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
-
# XXX: a bit spaghetti. Would be better with class-based views.
def _dispatch_cook_progress(request, bundle_type: str, swhid: CoreSWHID):
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -39,6 +39,8 @@
SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True)
+SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+
swh_object_icons = {
"alias": "mdi mdi-star",
"branch": "mdi mdi-source-branch",
diff --git a/swh/web/tests/api/views/test_raw.py b/swh/web/tests/api/views/test_raw.py
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/api/views/test_raw.py
@@ -0,0 +1,61 @@
+# Copyright (C) 2015-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import hashlib
+import pytest
+
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.model.hashutil import hash_to_bytes
+from swh.web.common.utils import reverse
+from swh.web.tests.data import random_sha1_bytes
+from swh.web.tests.utils import (
+ check_api_get_responses,
+ check_http_get_response,
+)
+
+
+def test_api_raw_not_found(api_client, unknown_core_swhid):
+ url = reverse("api-1-raw-object", url_args={"swhid": str(unknown_core_swhid)})
+ rv = check_api_get_responses(api_client, url, status_code=404)
+ assert rv.data == {
+ "exception": "NotFoundExc",
+ "reason": f"Object with id {unknown_core_swhid} not found.",
+ }
+
+
+def _test_api_raw_hash(api_client, archive_data, object_id, object_ty):
+ url = reverse(
+ "api-1-raw-object",
+ url_args={"swhid": f"swh:1:{object_ty}:{object_id}"},
+ )
+
+ rv = check_http_get_response(api_client, url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert (
+ rv["Content-disposition"]
+ == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw"
+ )
+ sha1_git = hashlib.new("sha1", rv.content).digest()
+ assert sha1_git == hash_to_bytes(object_id)
+
+
+def test_api_raw_content(api_client, archive_data, content):
+ _test_api_raw_hash(api_client, archive_data, content["sha1_git"], "cnt")
+
+
+def test_api_raw_directory(api_client, archive_data, directory):
+ _test_api_raw_hash(api_client, archive_data, directory, "dir")
+
+
+def test_api_raw_revision(api_client, archive_data, revision):
+ _test_api_raw_hash(api_client, archive_data, revision, "rev")
+
+
+def test_api_raw_release(api_client, archive_data, release):
+ _test_api_raw_hash(api_client, archive_data, release, "rel")
+
+
+def test_api_raw_snapshot(api_client, archive_data, snapshot):
+ _test_api_raw_hash(api_client, archive_data, snapshot, "snp")
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -31,7 +31,7 @@
hash_to_hex,
)
from swh.model.model import Content, Directory
-from swh.model.swhids import ObjectType
+from swh.model.swhids import CoreSWHID, ObjectType
from swh.scheduler.tests.common import TASK_TYPES
from swh.storage.algos.origin import origin_get_latest_visit_status
from swh.storage.algos.revisions_walker import get_revisions_walker
@@ -52,6 +52,7 @@
override_storages,
random_content,
random_sha1,
+ random_sha1_bytes,
random_sha256,
)
from swh.web.tests.utils import create_django_permission
@@ -815,6 +816,18 @@
return random.choice(_object_type_swhid(ObjectType.SNAPSHOT))
+@pytest.fixture(scope="function", params=list(ObjectType))
+def unknown_core_swhid(object_type) -> CoreSWHID:
+ """Fixture returning an unknown core SWHID.
+
+ Tests using this will be called once per object type.
+ """
+ return CoreSWHID(
+ object_type=object_type,
+ object_id=random_sha1_bytes(),
+ )
+
+
# Fixture to manipulate data from a sample archive used in the tests
@pytest.fixture(scope="function")
def archive_data(tests_data):
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -59,18 +59,33 @@
}
-def random_sha1():
- return hash_to_hex(bytes(random.randint(0, 255) for _ in range(20)))
+def random_sha1_bytes() -> Sha1:
+ return bytes(random.randint(0, 255) for _ in range(20))
-def random_sha256():
+def random_sha1() -> str:
+ return hash_to_hex(random_sha1_bytes())
+
+
+def random_sha256() -> str:
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
-def random_blake2s256():
+def random_blake2s256() -> str:
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
+def random_object_type() -> ObjectType:
+ return random.choice(list(ObjectType))
+
+
+def random_swhid() -> CoreSWHID:
+ return CoreSWHID(
+ object_type=random_object_type(),
+ object_id=random_sha1_bytes(),
+ )
+
+
def random_content():
return {
"sha1": random_sha1(),
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 3:11 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230677
Attached To
D7192: Route for fetching Git-encoded objects
Event Timeline
Log In to Comment