Page MenuHomeSoftware Heritage

D7192.id27890.diff
No OneTemporary

D7192.id27890.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -2,7 +2,7 @@
swh.core >= 0.0.95
swh.counters >= 0.5.1
swh.indexer >= 0.4.1
-swh.model >= 2.6.3
+swh.model >= 6.2.0
swh.scheduler >= 0.7.0
swh.search >= 0.2.0
swh.storage >= 0.31.0
diff --git a/swh/web/api/urls.py b/swh/web/api/urls.py
--- a/swh/web/api/urls.py
+++ b/swh/web/api/urls.py
@@ -13,6 +13,7 @@
import swh.web.api.views.origin # noqa
import swh.web.api.views.origin_save # noqa
import swh.web.api.views.ping # noqa
+import swh.web.api.views.raw # noqa
import swh.web.api.views.release # noqa
import swh.web.api.views.revision # noqa
import swh.web.api.views.snapshot # noqa
diff --git a/swh/web/api/views/metadata.py b/swh/web/api/views/metadata.py
--- a/swh/web/api/views/metadata.py
+++ b/swh/web/api/views/metadata.py
@@ -16,9 +16,7 @@
from swh.web.api.apiurls import api_route
from swh.web.common import archive, converters
from swh.web.common.exc import BadInputExc, NotFoundExc
-from swh.web.common.utils import reverse
-
-SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+from swh.web.common.utils import SWHID_RE, reverse
@api_route(
diff --git a/swh/web/api/views/raw.py b/swh/web/api/views/raw.py
new file mode 100644
--- /dev/null
+++ b/swh/web/api/views/raw.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2018-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from django.http import HttpResponse
+from typing import Optional, Iterable
+
+from swh.core.api.classes import stream_results_optional
+from swh.model.git_objects import (
+ content_git_object,
+ directory_git_object,
+ revision_git_object,
+ release_git_object,
+ snapshot_git_object,
+)
+from swh.model import model
+from swh.model.hashutil import hash_to_hex
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.web.api.apidoc import api_doc, format_docstring
+from swh.web.api.apiurls import api_route
+from swh.web.common import archive
+from swh.web.common.exc import NotFoundExc
+from swh.web.common.utils import SWHID_RE
+
+
+@api_route(
+ f"/raw/(?P<swhid>{SWHID_RE})/",
+ "api-1-raw-object",
+)
+@api_doc("/raw/")
+@format_docstring()
+def api_raw_object(request, swhid):
+ """
+ .. http:get:: /api/1/raw/<swhid>/
+
+ Get the object corresponding to the SWHID in raw form.
+
+ This endpoint exposes the internal representation (see the
+ ``*_git_object`` functions in :mod:`swh.model.git_objects`), and
+ so can be used to fetch a binary blob which hashes to the same
+ identifier.
+
+ :param string swhid: the object's SWHID
+
+ :resheader Content-Type: application/octet-stream
+
+ :statuscode 200: no error
+ :statuscode 400: an invalid SWHID has been provided
+ :statuscode 404: the requested object can not be found in the archive
+
+ **Example:**
+
+ .. parsed-literal::
+
+ :swh_web_api:`raw/swh:1:snp:6a3a2cf0b2b90ce7ae1cf0a221ed68035b686f5a`
+ """
+
+ swhid = CoreSWHID.from_string(swhid)
+ object_id = swhid.object_id
+ object_type = swhid.object_type
+
+ def not_found():
+ return NotFoundExc(f"Object with id {swhid} not found.")
+
+ if object_type == ObjectType.CONTENT:
+ results = archive.storage.content_find({"sha1_git": object_id})
+ if len(results) == 0:
+ raise not_found()
+ cnt = results[0]
+ # `cnt.with_data()` unfortunately doesn't seem to work.
+ if cnt.data is None:
+ d = cnt.to_dict()
+ d["data"] = archive.storage.content_get_data(cnt.sha1)
+ cnt = model.Content.from_dict(d)
+ assert cnt.data, f"Content {hash_to_hex(cnt.sha1)} ceased to exist"
+ result = content_git_object(cnt)
+
+ elif object_type == ObjectType.DIRECTORY:
+ entries: Optional[Iterable[model.DirectoryEntry]] = stream_results_optional(
+ archive.storage.directory_get_entries,
+ directory_id=object_id,
+ )
+ if entries is None:
+ raise not_found()
+ result = directory_git_object(
+ model.Directory(
+ id=object_id,
+ entries=tuple(entries),
+ )
+ )
+
+ elif object_type == ObjectType.REVISION:
+ result = archive.storage.revision_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = revision_git_object(result)
+
+ elif object_type == ObjectType.RELEASE:
+ result = archive.storage.release_get([object_id])[0]
+ if result is None:
+ raise not_found()
+ result = release_git_object(result)
+
+ elif object_type == ObjectType.SNAPSHOT:
+ result = snapshot_get_all_branches(archive.storage, object_id)
+ if result is None:
+ raise not_found()
+ result = snapshot_git_object(result)
+
+ else:
+ raise ValueError(f"Unexpected object type variant: {object_type}")
+
+ response = HttpResponse(result, content_type="application/octet-stream")
+ filename = str(swhid).replace(":", "_") + "_raw"
+ response["Content-disposition"] = f"attachment; filename={filename}"
+
+ return response
diff --git a/swh/web/api/views/vault.py b/swh/web/api/views/vault.py
--- a/swh/web/api/views/vault.py
+++ b/swh/web/api/views/vault.py
@@ -15,13 +15,12 @@
from swh.web.api.views.utils import api_lookup
from swh.web.common import archive, query
from swh.web.common.exc import BadInputExc
-from swh.web.common.utils import reverse
+from swh.web.common.utils import SWHID_RE, reverse
+
######################################################
# Common
-SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
-
# XXX: a bit spaghetti. Would be better with class-based views.
def _dispatch_cook_progress(request, bundle_type: str, swhid: CoreSWHID):
diff --git a/swh/web/common/utils.py b/swh/web/common/utils.py
--- a/swh/web/common/utils.py
+++ b/swh/web/common/utils.py
@@ -39,6 +39,8 @@
SWH_WEB_METRICS_REGISTRY = CollectorRegistry(auto_describe=True)
+SWHID_RE = "swh:1:[a-z]{3}:[0-9a-z]{40}"
+
swh_object_icons = {
"alias": "mdi mdi-star",
"branch": "mdi mdi-source-branch",
diff --git a/swh/web/tests/api/views/test_raw.py b/swh/web/tests/api/views/test_raw.py
new file mode 100644
--- /dev/null
+++ b/swh/web/tests/api/views/test_raw.py
@@ -0,0 +1,61 @@
+# Copyright (C) 2015-2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import hashlib
+import pytest
+
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.model.hashutil import hash_to_bytes
+from swh.web.common.utils import reverse
+from swh.web.tests.data import random_sha1_bytes
+from swh.web.tests.utils import (
+ check_api_get_responses,
+ check_http_get_response,
+)
+
+
+def test_api_raw_not_found(api_client, unknown_core_swhid):
+ url = reverse("api-1-raw-object", url_args={"swhid": str(unknown_core_swhid)})
+ rv = check_api_get_responses(api_client, url, status_code=404)
+ assert rv.data == {
+ "exception": "NotFoundExc",
+ "reason": f"Object with id {unknown_core_swhid} not found.",
+ }
+
+
+def _test_api_raw_hash(api_client, archive_data, object_id, object_ty):
+ url = reverse(
+ "api-1-raw-object",
+ url_args={"swhid": f"swh:1:{object_ty}:{object_id}"},
+ )
+
+ rv = check_http_get_response(api_client, url, status_code=200)
+ assert rv["Content-Type"] == "application/octet-stream"
+ assert (
+ rv["Content-disposition"]
+ == f"attachment; filename=swh_1_{object_ty}_{object_id}_raw"
+ )
+ sha1_git = hashlib.new("sha1", rv.content).digest()
+ assert sha1_git == hash_to_bytes(object_id)
+
+
+def test_api_raw_content(api_client, archive_data, content):
+ _test_api_raw_hash(api_client, archive_data, content["sha1_git"], "cnt")
+
+
+def test_api_raw_directory(api_client, archive_data, directory):
+ _test_api_raw_hash(api_client, archive_data, directory, "dir")
+
+
+def test_api_raw_revision(api_client, archive_data, revision):
+ _test_api_raw_hash(api_client, archive_data, revision, "rev")
+
+
+def test_api_raw_release(api_client, archive_data, release):
+ _test_api_raw_hash(api_client, archive_data, release, "rel")
+
+
+def test_api_raw_snapshot(api_client, archive_data, snapshot):
+ _test_api_raw_hash(api_client, archive_data, snapshot, "snp")
diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py
--- a/swh/web/tests/conftest.py
+++ b/swh/web/tests/conftest.py
@@ -31,7 +31,7 @@
hash_to_hex,
)
from swh.model.model import Content, Directory
-from swh.model.swhids import ObjectType
+from swh.model.swhids import CoreSWHID, ObjectType
from swh.scheduler.tests.common import TASK_TYPES
from swh.storage.algos.origin import origin_get_latest_visit_status
from swh.storage.algos.revisions_walker import get_revisions_walker
@@ -52,6 +52,7 @@
override_storages,
random_content,
random_sha1,
+ random_sha1_bytes,
random_sha256,
)
from swh.web.tests.utils import create_django_permission
@@ -815,6 +816,18 @@
return random.choice(_object_type_swhid(ObjectType.SNAPSHOT))
+@pytest.fixture(scope="function", params=list(ObjectType))
+def unknown_core_swhid(object_type) -> CoreSWHID:
+ """Fixture returning an unknown core SWHID.
+
+ Tests using this will be called once per object type.
+ """
+ return CoreSWHID(
+ object_type=object_type,
+ object_id=random_sha1_bytes(),
+ )
+
+
# Fixture to manipulate data from a sample archive used in the tests
@pytest.fixture(scope="function")
def archive_data(tests_data):
diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py
--- a/swh/web/tests/data.py
+++ b/swh/web/tests/data.py
@@ -59,18 +59,33 @@
}
-def random_sha1():
- return hash_to_hex(bytes(random.randint(0, 255) for _ in range(20)))
+def random_sha1_bytes() -> Sha1:
+ return bytes(random.randint(0, 255) for _ in range(20))
-def random_sha256():
+def random_sha1() -> str:
+ return hash_to_hex(random_sha1_bytes())
+
+
+def random_sha256() -> str:
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
-def random_blake2s256():
+def random_blake2s256() -> str:
return hash_to_hex(bytes(random.randint(0, 255) for _ in range(32)))
+def random_object_type() -> ObjectType:
+ return random.choice(list(ObjectType))
+
+
+def random_swhid() -> CoreSWHID:
+ return CoreSWHID(
+ object_type=random_object_type(),
+ object_id=random_sha1_bytes(),
+ )
+
+
def random_content():
return {
"sha1": random_sha1(),

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 3:11 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230677

Event Timeline