Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123217
D4013.id14193.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
20 KB
Subscribers
None
D4013.id14193.diff
View Options
diff --git a/swh/deposit/__init__.py b/swh/deposit/__init__.py
--- a/swh/deposit/__init__.py
+++ b/swh/deposit/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pkg_resources
+
+try:
+ __version__ = pkg_resources.get_distribution("swh.deposit").version
+except pkg_resources.DistributionNotFound:
+ __version__ = "devel"
diff --git a/swh/deposit/api/__init__.py b/swh/deposit/api/__init__.py
--- a/swh/deposit/api/__init__.py
+++ b/swh/deposit/api/__init__.py
@@ -2,10 +2,3 @@
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-
-import pkg_resources
-
-try:
- __version__ = pkg_resources.get_distribution("swh.deposit").version
-except pkg_resources.DistributionNotFound:
- __version__ = "devel"
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -114,6 +114,8 @@
on_behalf_of = meta.get("HTTP_ON_BEHALF_OF")
metadata_relevant = meta.get("HTTP_METADATA_RELEVANT")
+ swhid = meta.get("X_Check_SWHID")
+
return {
"content-type": content_type,
"content-length": content_length,
@@ -124,6 +126,7 @@
"slug": slug,
"on-behalf-of": on_behalf_of,
"metadata-relevant": metadata_relevant,
+ "swhid": swhid,
}
def _compute_md5(self, filehandler) -> bytes:
@@ -610,7 +613,7 @@
"""Atom entry deposit.
Args:
- request (Request): the request holding information to parse
+ request: the request holding information to parse
and inject in db
headers: request headers formatted
collection_name: the associated client
@@ -785,6 +788,7 @@
NOT_FOUND, f"Deposit with id {deposit_id} does not exist"
)
+ assert deposit is not None
checks = self.restrict_access(request, deposit)
if checks:
return checks
@@ -799,18 +803,19 @@
return {"headers": headers}
- def restrict_access(
- self, request: Request, deposit: Optional[Deposit] = None
- ) -> Dict[str, Any]:
- if deposit:
- if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
- summary = "You can only act on deposit with status '%s'" % (
- DEPOSIT_STATUS_PARTIAL,
- )
- description = f"This deposit has status '{deposit.status}'"
- return make_error_dict(
- BAD_REQUEST, summary=summary, verbose_description=description
- )
+ def restrict_access(self, request: Request, deposit: Deposit) -> Dict[str, Any]:
+ """Add further checks on a deposit to prevent an update on a deposit not in partial
+ state.
+
+ """
+ if request.method != "GET" and deposit.status != DEPOSIT_STATUS_PARTIAL:
+ summary = "You can only act on deposit with status '%s'" % (
+ DEPOSIT_STATUS_PARTIAL,
+ )
+ description = f"This deposit has status '{deposit.status}'"
+ return make_error_dict(
+ BAD_REQUEST, summary=summary, verbose_description=description
+ )
return {}
def _basic_not_allowed_method(self, request: Request, method: str):
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -3,12 +3,36 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import datetime, timezone
+import json
from typing import Any, Dict, Optional, Tuple
from rest_framework import status
-
-from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI
-from ..errors import BAD_REQUEST, make_error_dict
+from rest_framework.request import Request
+
+from swh.deposit.models import Deposit
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import parse_swhid
+from swh.model.model import (
+ MetadataAuthority,
+ MetadataAuthorityType,
+ MetadataFetcher,
+ MetadataTargetType,
+ RawExtrinsicMetadata,
+)
+from swh.storage import get_storage
+from swh.storage.interface import StorageInterface
+
+from ..config import (
+ CONT_FILE_IRI,
+ DEPOSIT_STATUS_LOAD_SUCCESS,
+ EDIT_SE_IRI,
+ EM_IRI,
+ METADATA_KEY,
+ RAW_METADATA_KEY,
+ SWH_METADATA_AUTHORITY,
+)
+from ..errors import BAD_REQUEST, ParserError, make_error_dict
from ..parsers import (
SWHAtomEntryParser,
SWHFileUploadTarParser,
@@ -103,8 +127,28 @@
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
+ def __init__(self):
+ super().__init__()
+ self.storage: StorageInterface = get_storage(**self.config["storage"])
+
+ def restrict_access(self, request: Request, deposit: Deposit) -> Dict[str, Any]:
+ """For the metadata-only update of a deposit, we need to allow some checks on "done"
+ deposits. This overrides allow for such case to occur.
+
+ """
+ headers = self._read_headers(request)
+ if (
+ request.method == "PUT"
+ and headers["swhid"] is not None
+ and deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS
+ ):
+ # Update only metadata is authorized for deposit ingested
+ return {}
+ # otherwise, let the standard access restriction occur
+ return super().restrict_access(request, deposit)
+
def process_put(
- self, req, headers: Dict, collection_name: str, deposit_id: int
+ self, request, headers: Dict, collection_name: str, deposit_id: int
) -> Dict[str, Any]:
"""Replace existing deposit's metadata/archive with new ones.
@@ -116,19 +160,113 @@
204 No content
"""
- if req.content_type.startswith("multipart/"):
+ if request.content_type.startswith("multipart/"):
return self._multipart_upload(
- req,
+ request,
headers,
collection_name,
deposit_id=deposit_id,
replace_archives=True,
replace_metadata=True,
)
- return self._atom_entry(
- req, headers, collection_name, deposit_id=deposit_id, replace_metadata=True
+ swhid = headers.get("swhid")
+ if swhid is None:
+ return self._atom_entry(
+ request,
+ headers,
+ collection_name,
+ deposit_id=deposit_id,
+ replace_metadata=True,
+ )
+
+ # Update metadata on a deposit already ingested
+ # Only write to the metadata storage
+
+ try:
+ raw_metadata, metadata = self._read_metadata(request.data)
+ except ParserError:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Malformed xml metadata",
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.",
+ )
+ if not metadata:
+ return make_error_dict(
+ BAD_REQUEST,
+ "Empty body request is not supported",
+ "Atom entry deposit is supposed to send for metadata. "
+ "If the body is empty, there is no metadata.",
+ )
+
+ metadata_authority = MetadataAuthority(
+ type=MetadataAuthorityType.DEPOSIT_CLIENT,
+ url=self.provider["provider_url"],
+ metadata={
+ "name": self.provider["provider_name"],
+ **(self.provider["metadata"] or {}),
+ },
+ )
+
+ metadata_fetcher = MetadataFetcher(
+ name=self.tool["name"],
+ version=self.tool["version"],
+ metadata=self.tool["configuration"],
)
+ deposit_swhid = parse_swhid(swhid)
+
+ assert deposit_swhid.object_type == "directory"
+ directory_id = hash_to_bytes(deposit_swhid.object_id)
+
+ # check the swhid exists in the archive
+ directories_missing = list(self.storage.directory_missing([directory_id]))
+ if len(directories_missing) > 0:
+ return make_error_dict(
+ BAD_REQUEST,
+ f"Unknown directory SWHID {swhid} reference",
+ "The SWHID provided is not a know directory SWHID in SWH archive. "
+ "Please provide an existing SWHID.",
+ )
+
+ discovery_date = datetime.now(tz=timezone.utc)
+
+ # QUESTION: Do we also put the raw xml?
+ # A priori, no since we don't do this originally in the loader.
+ metadata_object = RawExtrinsicMetadata(
+ type=MetadataTargetType.DIRECTORY,
+ id=deposit_swhid,
+ discovery_date=discovery_date,
+ authority=SWH_METADATA_AUTHORITY,
+ fetcher=metadata_fetcher,
+ format="sword-v2-atom-codemeta-v2-in-json",
+ metadata=json.dumps(metadata).encode(),
+ )
+
+ # write to metadata storage
+ self.storage.metadata_authority_add([metadata_authority])
+ self.storage.metadata_fetcher_add([metadata_fetcher])
+ self.storage.raw_extrinsic_metadata_add([metadata_object])
+
+ # replace metadata within the deposit backend
+ deposit = Deposit.objects.get(pk=deposit_id)
+
+ deposit_request_data = {
+ METADATA_KEY: metadata,
+ RAW_METADATA_KEY: raw_metadata,
+ }
+
+ self._deposit_request_put(
+ deposit, deposit_request_data, replace_metadata=True,
+ )
+
+ return {
+ "deposit_id": deposit_id,
+ "deposit_date": discovery_date,
+ "status": deposit.status,
+ "archive": None,
+ }
+
def process_post(
self,
request,
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -12,7 +12,6 @@
from rest_framework import status
from swh.core import tarball
-from swh.deposit.api import __version__
from swh.deposit.utils import normalize_date
from swh.model import identifiers
@@ -106,12 +105,6 @@
def __init__(self):
super().__init__()
- self.provider = self.config["provider"]
- self.tool = {
- "name": "swh-deposit",
- "version": __version__,
- "configuration": {"sword_version": "2"},
- }
def _normalize_dates(self, deposit, metadata):
"""Normalize the date to use as a tuple of author date, committer date
diff --git a/swh/deposit/config.py b/swh/deposit/config.py
--- a/swh/deposit/config.py
+++ b/swh/deposit/config.py
@@ -7,6 +7,8 @@
from typing import Any, Dict
from swh.core import config
+from swh.deposit import __version__
+from swh.model.model import MetadataAuthority, MetadataAuthorityType
from swh.scheduler import get_scheduler
from swh.scheduler.interface import SchedulerInterface
@@ -53,6 +55,12 @@
"checks": True,
}
+SWH_METADATA_AUTHORITY = MetadataAuthority(
+ type=MetadataAuthorityType.REGISTRY,
+ url="https://softwareheritage.org/",
+ metadata={},
+)
+
def setup_django_for(platform=None, config_file=None):
"""Setup function for command line tools (swh.deposit.create_user) to
@@ -97,3 +105,9 @@
conf = config.read_raw_config(config.config_basepath(config_file))
self.config: Dict[str, Any] = config.merge_configs(DEFAULT_CONFIG, conf)
self.scheduler: SchedulerInterface = get_scheduler(**self.config["scheduler"])
+ self.provider = self.config["provider"]
+ self.tool = {
+ "name": "swh-deposit",
+ "version": __version__,
+ "configuration": {"sword_version": "2"},
+ }
diff --git a/swh/deposit/settings/production.py b/swh/deposit/settings/production.py
--- a/swh/deposit/settings/production.py
+++ b/swh/deposit/settings/production.py
@@ -58,7 +58,7 @@
"disable_existing_loggers": False,
"formatters": {
"standard": {
- "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s", # noqa
+ "format": "[%(asctime)s] %(levelname)s [%(name)s:%(lineno)s] %(message)s",
"datefmt": "%d/%b/%Y %H:%M:%S",
},
},
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -6,7 +6,7 @@
from django.urls import reverse
from rest_framework import status
-from swh.deposit.api import __version__
+from swh.deposit import __version__
from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
from swh.deposit.models import Deposit
diff --git a/swh/deposit/tests/api/test_deposit_update.py b/swh/deposit/tests/api/test_deposit_update.py
--- a/swh/deposit/tests/api/test_deposit_update.py
+++ b/swh/deposit/tests/api/test_deposit_update.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
+# Copyright (C) 2017-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -10,6 +10,8 @@
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import check_archive, create_arborescence_archive
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import parse_swhid, swhid
def test_replace_archive_to_deposit_is_possible(
@@ -124,6 +126,103 @@
assert set(requests_archive0) == set(requests_archive1)
+def test_put_update_metadata_on_done_deposit(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ sample_data,
+ swh_storage,
+):
+ """Update all metadata on a deposit whose status is done is possible, 204 response
+
+ """
+ deposit = complete_deposit
+
+ directory_id = hash_to_bytes(parse_swhid(deposit.swh_id).object_id)
+ # directory targeted by the complete_deposit does not exist in the storage
+ assert list(swh_storage.directory_missing([directory_id])) == [directory_id]
+
+ # so let's create a directory reference in the storage (current deposit targets an
+ # unknown swhid)
+ directory = sample_data.directory
+ swh_storage.directory_add([directory])
+ assert list(swh_storage.directory_missing([directory.id])) == []
+ # and patch one complete deposit swh_id so it targets said reference
+ deposit.swh_id = swhid("directory", directory.id)
+ deposit.save()
+
+ raw_metadata0 = atom_dataset["entry-data0"] % deposit.external_id.encode("utf-8")
+
+ requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
+ assert len(requests_archive0) == 1
+
+ update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
+
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ X_Check_SWHID=deposit.swh_id,
+ )
+
+ assert response.status_code == status.HTTP_204_NO_CONTENT
+
+ requests_meta = DepositRequest.objects.filter(deposit=deposit, type="metadata")
+
+ assert len(requests_meta) == 1
+ request_meta1 = requests_meta[0]
+ raw_metadata1 = request_meta1.raw_metadata
+ assert raw_metadata1 == atom_dataset["entry-data1"]
+ assert raw_metadata0 != raw_metadata1
+
+ # check we did not touch the other parts
+ requests_archive1 = DepositRequest.objects.filter(deposit=deposit, type="archive")
+ assert len(requests_archive1) == 1
+ assert set(requests_archive0) == set(requests_archive1)
+
+ # FIXME: Check the metadata storage information created is consistent
+ pass
+
+
+def test_put_update_metadata_on_done_deposit_failure(
+ tmp_path,
+ authenticated_client,
+ complete_deposit,
+ deposit_collection,
+ atom_dataset,
+ swh_storage,
+):
+ """Update all metadata on a deposit whose status is done, wrong swhid is impossible
+
+ """
+ deposit = complete_deposit
+
+ directory_id = hash_to_bytes(parse_swhid(deposit.swh_id).object_id)
+ # directory targeted by the complete_deposit does not exist in the storage
+ assert list(swh_storage.directory_missing([directory_id]))[0] is not None
+
+ requests_archive0 = DepositRequest.objects.filter(deposit=deposit, type="archive")
+ assert len(requests_archive0) == 1
+
+ update_uri = reverse(EDIT_SE_IRI, args=[deposit_collection.name, deposit.id])
+
+ response = authenticated_client.put(
+ update_uri,
+ content_type="application/atom+xml;type=entry",
+ data=atom_dataset["entry-data1"],
+ X_Check_SWHID=deposit.swh_id,
+ )
+
+ # so the deposit update is rejected
+ assert response.status_code == status.HTTP_400_BAD_REQUEST
+
+ response_str = response.content.decode()
+
+ assert f"Unknown directory SWHID {deposit.swh_id} reference" in response_str
+
+
def test_add_archive_to_deposit_is_possible(
tmp_path,
authenticated_client,
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -16,6 +16,7 @@
from rest_framework.test import APIClient
import yaml
+from swh.core.config import read
from swh.deposit.config import (
COL_IRI,
DEPOSIT_STATUS_DEPOSITED,
@@ -25,12 +26,14 @@
DEPOSIT_STATUS_REJECTED,
DEPOSIT_STATUS_VERIFIED,
EDIT_SE_IRI,
+ SWH_METADATA_AUTHORITY,
setup_django_for,
)
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import create_arborescence_archive
from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
from swh.scheduler import get_scheduler
+from swh.storage import get_storage
# mypy is asked to ignore the import statement above because setup_databases
# is not part of the d.t.utils.__all__ variable.
@@ -51,7 +54,11 @@
@pytest.fixture()
-def deposit_config(swh_scheduler_config):
+def deposit_config(swh_scheduler_config, swh_storage_backend_config):
+ # storage_cfg = deepcopy(swh_storage_backend_config)
+ # # actual side-track # otherwise: OSError: storage check config failed
+ # storage_cfg["check_config"] = {"check_write": False}
+
return {
"max_upload_size": 500,
"extraction_dir": "/tmp/swh-deposit/test/extraction-dir",
@@ -63,6 +70,7 @@
"metadata": {},
},
"scheduler": {"cls": "local", "args": swh_scheduler_config,},
+ "storage": swh_storage_backend_config,
}
@@ -78,14 +86,24 @@
@pytest.fixture(autouse=True)
def deposit_autoconfig(deposit_config_path, swh_scheduler_config):
"""Enforce config for deposit classes inherited from APIConfig."""
+ cfg = read(deposit_config_path)
+
+ if "scheduler" in cfg:
+ # scheduler setup: require the load-deposit tasks (already existing in
+ # production)
+ scheduler = get_scheduler(**cfg["scheduler"])
+ task_type = {
+ "type": "load-deposit",
+ "backend_name": "swh.loader.packages.deposit.tasks.LoadDeposit",
+ "description": "Load deposit task",
+ }
+ scheduler.create_task_type(task_type)
- scheduler = get_scheduler("local", swh_scheduler_config)
- task_type = {
- "type": "load-deposit",
- "backend_name": "swh.loader.packages.deposit.tasks.LoadDeposit",
- "description": "Load deposit task",
- }
- scheduler.create_task_type(task_type)
+ if "storage" in cfg:
+ # `deposit_update` tests requires the SWH_METADATA_AUTHORITY populated (matching
+ # production)
+ storage = get_storage(**cfg["storage"])
+ storage.metadata_authority_add([SWH_METADATA_AUTHORITY])
@pytest.fixture(scope="session")
diff --git a/swh/deposit/tests/test_init.py b/swh/deposit/tests/test_init.py
--- a/swh/deposit/tests/test_init.py
+++ b/swh/deposit/tests/test_init.py
@@ -5,6 +5,6 @@
def test_version():
- from swh.deposit.api import __version__
+ from swh.deposit import __version__
assert __version__ is not None
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 3:29 AM (2 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218171
Attached To
D4013: Allow deposit metadata update on deposit already complete
Event Timeline
Log In to Comment