Page MenuHomeSoftware Heritage

D5175.diff
No OneTemporary

D5175.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,2 @@
swh.core[http] >= 0.4
-swh.model >= 0.7.2
+swh.model >= 1.0.0
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,11 +27,12 @@
from swh.deposit.models import Deposit
from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
-from swh.model.identifiers import SWHID, ValidationError
+from swh.model.identifiers import ExtendedSWHID, QualifiedSWHID, ValidationError
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
+ Origin,
RawExtrinsicMetadata,
)
from swh.scheduler.utils import create_oneshot_task_dict
@@ -67,7 +68,7 @@
)
from ..models import DepositClient, DepositCollection, DepositRequest
from ..parsers import parse_xml
-from ..utils import parse_swh_reference
+from ..utils import extended_swhid_from_qualified, parse_swh_reference
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@@ -608,11 +609,11 @@
def _store_metadata_deposit(
self,
deposit: Deposit,
- swhid_reference: Union[str, SWHID],
+ swhid_reference: Union[str, QualifiedSWHID],
metadata: Dict,
raw_metadata: bytes,
deposit_origin: Optional[str] = None,
- ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]:
+ ) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]:
"""When all user inputs pass the checks, this associates the raw_metadata to the
swhid_reference in the raw extrinsic metadata storage. In case of any issues,
a bad request response is returned to the user with the details.
@@ -636,7 +637,7 @@
(e.g. functionally invalid metadata, ...)
Returns:
- Tuple of core swhid, swhid context, deposit and deposit request
+ Tuple of target swhid, deposit, and deposit request
"""
metadata_ok, error_details = check_metadata(metadata)
@@ -669,20 +670,20 @@
# actually add the metadata to the completed deposit
deposit_request = self._deposit_request_put(deposit, deposit_request_data)
- object_type, metadata_context = compute_metadata_context(swhid_reference)
- if deposit_origin: # metadata deposit update on completed deposit
- metadata_context["origin"] = deposit_origin
-
- swhid_core: Union[str, SWHID]
+ target_swhid: ExtendedSWHID # origin URL or CoreSWHID
if isinstance(swhid_reference, str):
- swhid_core = swhid_reference
+ target_swhid = Origin(swhid_reference).swhid()
+ metadata_context = {}
else:
- swhid_core = attr.evolve(swhid_reference, metadata={})
+ metadata_context = compute_metadata_context(swhid_reference)
+ if deposit_origin: # metadata deposit update on completed deposit
+ metadata_context["origin"] = deposit_origin
+
+ target_swhid = extended_swhid_from_qualified(swhid_reference)
# store that metadata to the metadata storage
metadata_object = RawExtrinsicMetadata(
- type=object_type,
- target=swhid_core, # core swhid or origin
+ target=target_swhid, # core swhid or origin
discovery_date=deposit_request.date,
authority=metadata_authority,
fetcher=metadata_fetcher,
@@ -696,7 +697,7 @@
self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
- return (swhid_core, swhid_reference, deposit, deposit_request)
+ return (target_swhid, deposit, deposit_request)
def _atom_entry(
self,
@@ -752,13 +753,13 @@
# Determine if we are in the metadata-only deposit case
try:
- swhid = parse_swh_reference(metadata)
+ swhid_ref = parse_swh_reference(metadata)
except ValidationError as e:
raise DepositError(
PARSING_ERROR, "Invalid SWHID reference", str(e),
)
- if swhid is not None and (
+ if swhid_ref is not None and (
deposit.origin_url or deposit.parent or deposit.external_id
):
raise DepositError(
@@ -768,15 +769,15 @@
"code deposits, only one may be used on a given deposit.",
)
- if swhid is not None:
+ if swhid_ref is not None:
deposit.save() # We need a deposit id
- swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit(
- deposit, swhid, metadata, raw_metadata
+ target_swhid, depo, depo_request = self._store_metadata_deposit(
+ deposit, swhid_ref, metadata, raw_metadata
)
deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
- if isinstance(swhid_ref, SWHID):
- deposit.swhid = str(swhid)
+ if isinstance(swhid_ref, QualifiedSWHID):
+ deposit.swhid = str(extended_swhid_from_qualified(swhid_ref))
deposit.swhid_context = str(swhid_ref)
deposit.complete_date = depo_request.date
deposit.reception_date = depo_request.date
diff --git a/swh/deposit/api/edit.py b/swh/deposit/api/edit.py
--- a/swh/deposit/api/edit.py
+++ b/swh/deposit/api/edit.py
@@ -6,7 +6,7 @@
from rest_framework.request import Request
from swh.deposit.models import Deposit
-from swh.model.identifiers import parse_swhid
+from swh.model.identifiers import QualifiedSWHID
from ..config import DEPOSIT_STATUS_LOAD_SUCCESS
from ..errors import BAD_REQUEST, DepositError, ParserError
@@ -125,8 +125,12 @@
"If the body is empty, there is no metadata.",
)
- _, _, deposit, deposit_request = self._store_metadata_deposit(
- deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url,
+ _, deposit, deposit_request = self._store_metadata_deposit(
+ deposit,
+ QualifiedSWHID.from_string(swhid),
+ metadata,
+ raw_metadata,
+ deposit.origin_url,
)
def process_delete(self, req, collection_name: str, deposit: Deposit) -> None:
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -14,6 +14,7 @@
from swh.core import tarball
from swh.deposit.utils import normalize_date
from swh.model import identifiers
+from swh.model.hashutil import hash_to_hex
from swh.model.model import MetadataAuthorityType
from . import APIPrivateView, DepositReadMixin
@@ -163,8 +164,8 @@
if deposit.parent:
parent_swhid = deposit.parent.swhid
assert parent_swhid is not None
- swhid = identifiers.parse_swhid(parent_swhid)
- parent_revision = swhid.object_id
+ swhid = identifiers.CoreSWHID.from_string(parent_swhid)
+ parent_revision = hash_to_hex(swhid.object_id)
parents = [parent_revision]
else:
parents = []
diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py
--- a/swh/deposit/api/private/deposit_update_status.py
+++ b/swh/deposit/api/private/deposit_update_status.py
@@ -5,7 +5,8 @@
from rest_framework.parsers import JSONParser
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from . import APIPrivateView
from ...errors import BAD_REQUEST, DepositError
@@ -85,21 +86,28 @@
origin_url = data["origin_url"]
directory_id = data["directory_id"]
revision_id = data["revision_id"]
- dir_id = swhid(DIRECTORY, directory_id)
- snp_id = swhid(SNAPSHOT, data["snapshot_id"])
- rev_id = swhid(REVISION, revision_id)
+ dir_id = CoreSWHID(
+ object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id)
+ )
+ snp_id = CoreSWHID(
+ object_type=ObjectType.SNAPSHOT,
+ object_id=hash_to_bytes(data["snapshot_id"]),
+ )
+ rev_id = CoreSWHID(
+ object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id)
+ )
- deposit.swhid = dir_id
+ deposit.swhid = str(dir_id)
# new id with contextual information
- deposit.swhid_context = swhid(
- DIRECTORY,
- directory_id,
- metadata={
- "origin": origin_url,
- "visit": snp_id,
- "anchor": rev_id,
- "path": "/",
- },
+ deposit.swhid_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=hash_to_bytes(directory_id),
+ origin=origin_url,
+ visit=snp_id,
+ anchor=rev_id,
+ path="/",
+ )
)
else: # rejected
deposit.status = status
diff --git a/swh/deposit/migrations/0018_migrate_swhids.py b/swh/deposit/migrations/0018_migrate_swhids.py
--- a/swh/deposit/migrations/0018_migrate_swhids.py
+++ b/swh/deposit/migrations/0018_migrate_swhids.py
@@ -15,7 +15,7 @@
from swh.core import config
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, parse_swhid, swhid
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from swh.storage import get_storage as get_storage_client
from swh.storage.algos.snapshot import snapshot_id_get_from_revision
@@ -68,7 +68,7 @@
return swh_storage
-def migrate_deposit_swhid_context_not_null(apps, schema_editor):
+def migrate_deposit_swhid_context_not_null(apps, schema_editor) -> None:
"""Migrate deposit SWHIDs to the new format.
Migrate deposit SWHIDs to the new format. Only deposit with status done and
@@ -84,13 +84,13 @@
for deposit in Deposit.objects.filter(
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False
):
- obj_dir = parse_swhid(deposit.swh_id_context)
- assert obj_dir.object_type == DIRECTORY
+ obj_dir = QualifiedSWHID.from_string(deposit.swh_id_context)
+ assert obj_dir.object_type == ObjectType.DIRECTORY
- obj_rev = parse_swhid(deposit.swh_anchor_id)
- assert obj_rev.object_type == REVISION
+ obj_rev = CoreSWHID.from_string(deposit.swh_anchor_id)
+ assert obj_rev.object_type == ObjectType.REVISION
- if set(obj_dir.metadata.keys()) != {"origin"}:
+ if set(obj_dir.qualifiers()) != {"origin"}:
# Assuming the migration is already done for that deposit
logger.warning(
"Deposit id %s: Migration already done, skipping", deposit.id
@@ -100,7 +100,9 @@
# Starting migration
dir_id = obj_dir.object_id
- origin = obj_dir.metadata["origin"]
+ origin = obj_dir.origin
+
+ assert origin
check_origin = storage.origin_get([origin])[0]
if not check_origin:
@@ -125,15 +127,15 @@
old_swh_anchor_id_context = deposit.swh_anchor_id_context
# Update
- deposit.swh_id_context = swhid(
- DIRECTORY,
- dir_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snp_id.hex()),
- "anchor": swhid(REVISION, rev_id),
- "path": "/",
- },
+ deposit.swh_id_context = QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=dir_id,
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id),
+ anchor=CoreSWHID(
+ object_type=ObjectType.REVISION, object_id=hash_to_bytes(rev_id)
+ ),
+ path=b"/",
)
# Ensure only deposit.swh_id_context changed
@@ -211,7 +213,7 @@
return map_origin.get(key, f"{provider_url.rstrip('/')}/{external_id}")
-def migrate_deposit_swhid_context_null(apps, schema_editor):
+def migrate_deposit_swhid_context_null(apps, schema_editor) -> None:
"""Migrate deposit SWHIDs to the new format.
Migrate deposit whose swh_id_context is not set (initial deposits not migrated at
@@ -229,8 +231,8 @@
for deposit in Deposit.objects.filter(
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=True
):
- obj_rev = parse_swhid(deposit.swh_id)
- if obj_rev.object_type == DIRECTORY:
+ obj_rev = CoreSWHID.from_string(deposit.swh_id)
+ if obj_rev.object_type == ObjectType.DIRECTORY:
# Assuming the migration is already done for that deposit
logger.warning(
"Deposit id %s: Migration already done, skipping", deposit.id
@@ -238,7 +240,7 @@
continue
# Ensuring Migration not done
- assert obj_rev.object_type == REVISION
+ assert obj_rev.object_type == ObjectType.REVISION
assert deposit.swh_id is not None
assert deposit.swh_id_context is None
@@ -280,21 +282,25 @@
continue
# New SWHIDs ids
- deposit.swh_id = swhid(DIRECTORY, dir_id)
- deposit.swh_id_context = swhid(
- DIRECTORY,
- dir_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snp_id.hex()),
- "anchor": swhid(REVISION, rev_id),
- "path": "/",
- },
+ deposit.swh_id = CoreSWHID(
+ object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(dir_id)
+ )
+ deposit.swh_id_context = QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=dir_id,
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id),
+ anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=rev_id_bytes),
+ path=b"/",
)
# Realign the remaining deposit SWHIDs fields
- deposit.swh_anchor_id = swhid(REVISION, rev_id)
- deposit.swh_anchor_id_context = swhid(
- REVISION, rev_id, metadata={"origin": origin,}
+ deposit.swh_anchor_id = str(
+ CoreSWHID(object_type=ObjectType.REVISION, object_id=rev_id_bytes)
+ )
+ deposit.swh_anchor_id_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.REVISION, object_id=rev_id_bytes, origin=origin
+ )
)
# Ensure only deposit.swh_id_context changed
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -22,13 +22,13 @@
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom
-from swh.deposit.utils import compute_metadata_context
-from swh.model.identifiers import SWHID, parse_swhid
+from swh.deposit.utils import compute_metadata_context, extended_swhid_from_qualified
+from swh.model.identifiers import QualifiedSWHID
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
+ Origin,
RawExtrinsicMetadata,
)
from swh.storage.interface import PagedResult
@@ -452,63 +452,28 @@
@pytest.mark.parametrize(
- "swhid,target_type",
+ "swhid",
[
- (
- "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.SNAPSHOT,
- ),
- (
- "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.SNAPSHOT,
- ),
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
],
)
def test_deposit_metadata_swhid(
- swhid,
- target_type,
- authenticated_client,
- deposit_collection,
- atom_dataset,
- swh_storage,
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
- swhid_reference = parse_swhid(swhid)
- swhid_core = attr.evolve(swhid_reference, metadata={})
+ swhid_reference = QualifiedSWHID.from_string(swhid)
+ swhid_target = extended_swhid_from_qualified(swhid_reference)
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
deposit_client = authenticated_client.deposit_client
@@ -525,8 +490,7 @@
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
- assert isinstance(swhid_core, SWHID)
- assert deposit.swhid == str(swhid_core)
+ assert deposit.swhid == str(swhid_target)
assert deposit.swhid_context == str(swhid_reference)
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
@@ -557,19 +521,18 @@
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
- target_type, swhid_core, metadata_authority
+ swhid_target, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
assert len(page_results.results) == 1
assert page_results.next_page_token is None
- object_type, metadata_context = compute_metadata_context(swhid_reference)
+ metadata_context = compute_metadata_context(swhid_reference)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=object_type,
- target=swhid_core,
+ target=swhid_target,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
@@ -593,6 +556,7 @@
"""
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
+ origin_swhid = Origin(url).swhid()
deposit_client = authenticated_client.deposit_client
response = post_atom(
authenticated_client,
@@ -637,7 +601,7 @@
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, url, metadata_authority
+ origin_swhid, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
@@ -647,8 +611,7 @@
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=url,
+ target=origin_swhid,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
diff --git a/swh/deposit/tests/api/test_deposit_private_update_status.py b/swh/deposit/tests/api/test_deposit_private_update_status.py
--- a/swh/deposit/tests/api/test_deposit_private_update_status.py
+++ b/swh/deposit/tests/api/test_deposit_private_update_status.py
@@ -16,7 +16,6 @@
PRIVATE_PUT_DEPOSIT,
)
from swh.deposit.models import Deposit
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
PRIVATE_PUT_DEPOSIT_NC = PRIVATE_PUT_DEPOSIT + "-nc"
@@ -50,13 +49,13 @@
"origin_url": origin_url,
}
for url in private_check_url_endpoints(deposit_collection, deposit):
- dir_id = swhid(DIRECTORY, directory_id)
- rev_id = swhid(REVISION, revision_id)
- snp_id = swhid(SNAPSHOT, snapshot_id)
-
expected_swhid = "swh:1:dir:%s" % directory_id
expected_swhid_context = (
- f"{dir_id};origin={origin_url};" + f"visit={snp_id};anchor={rev_id};path=/"
+ f"{expected_swhid}"
+ f";origin={origin_url}"
+ f";visit=swh:1:snp:{snapshot_id}"
+ f";anchor=swh:1:rev:{revision_id}"
+ f";path=/"
)
response = authenticated_client.put(
diff --git a/swh/deposit/tests/api/test_deposit_update_atom.py b/swh/deposit/tests/api/test_deposit_update_atom.py
--- a/swh/deposit/tests/api/test_deposit_update_atom.py
+++ b/swh/deposit/tests/api/test_deposit_update_atom.py
@@ -23,12 +23,11 @@
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom, put_atom
from swh.model.hashutil import hash_to_bytes
-from swh.model.identifiers import parse_swhid, swhid
+from swh.model.identifiers import CoreSWHID, ExtendedSWHID, ObjectType
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
RawExtrinsicMetadata,
)
from swh.storage.interface import PagedResult
@@ -319,8 +318,8 @@
Response: 204
"""
- deposit_swhid = parse_swhid(complete_deposit.swhid)
- assert deposit_swhid.object_type == "directory"
+ deposit_swhid = CoreSWHID.from_string(complete_deposit.swhid)
+ assert deposit_swhid.object_type == ObjectType.DIRECTORY
directory_id = hash_to_bytes(deposit_swhid.object_id)
# directory targeted by the complete_deposit does not exist in the storage
@@ -333,7 +332,7 @@
assert list(swh_storage.directory_missing([existing_directory.id])) == []
# and patch one complete deposit swhid so it targets said reference
- complete_deposit.swhid = swhid("directory", existing_directory.id)
+ complete_deposit.swhid = str(existing_directory.swhid())
complete_deposit.save()
actual_existing_requests_archive = DepositRequest.objects.filter(
@@ -394,14 +393,13 @@
)
assert actual_fetcher == metadata_fetcher
- directory_swhid = parse_swhid(complete_deposit.swhid)
+ directory_swhid = ExtendedSWHID.from_string(complete_deposit.swhid)
page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority
+ directory_swhid, metadata_authority
)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=directory_swhid,
discovery_date=request_meta1.date,
authority=attr.evolve(metadata_authority, metadata=None),
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -38,7 +38,8 @@
post_archive,
post_atom,
)
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from swh.scheduler import get_scheduler
# mypy is asked to ignore the import statement above because setup_databases
@@ -457,18 +458,18 @@
)
origin = "https://hal.archives-ouvertes.fr/hal-01727745"
directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b"
- revision_id = "548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10"
- snapshot_id = "e5e82d064a9c3df7464223042e0c55d72ccff7f0"
- deposit.swhid = swhid(DIRECTORY, directory_id)
- deposit.swhid_context = swhid(
- DIRECTORY,
- directory_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snapshot_id),
- "anchor": swhid(REVISION, revision_id),
- "path": "/",
- },
+ revision_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10")
+ snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0")
+ deposit.swhid = f"swh:1:dir:{directory_id}"
+ deposit.swhid_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=hash_to_bytes(directory_id),
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id),
+ anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id),
+ path=b"/",
+ )
)
deposit.save()
return deposit
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Union
from unittest.mock import patch
import pytest
@@ -11,8 +10,7 @@
from swh.deposit import utils
from swh.deposit.parsers import parse_xml
from swh.model.exceptions import ValidationError
-from swh.model.identifiers import SWHID, parse_swhid
-from swh.model.model import MetadataTargetType
+from swh.model.identifiers import CoreSWHID, QualifiedSWHID
@pytest.fixture
@@ -163,59 +161,43 @@
@pytest.mark.parametrize(
- "swhid_or_origin,expected_type,expected_metadata_context",
+ "swhid,expected_metadata_context",
[
- ("https://something", MetadataTargetType.ORIGIN, {"origin": None}),
- (
- "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.CONTENT,
- {"origin": None},
- ),
+ ("swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49", {"origin": None},),
(
"swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
- MetadataTargetType.SNAPSHOT,
{"origin": "http://blah", "path": None},
),
(
"swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
- MetadataTargetType.DIRECTORY,
{"origin": None, "path": b"/path"},
),
(
"swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
- MetadataTargetType.REVISION,
{
"origin": None,
"path": None,
- "snapshot": parse_swhid(
+ "snapshot": CoreSWHID.from_string(
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
(
"swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
- MetadataTargetType.RELEASE,
{
"origin": None,
"path": None,
- "directory": parse_swhid(
+ "directory": CoreSWHID.from_string(
"swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
],
)
-def test_compute_metadata_context(
- swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context
-):
- if expected_type != MetadataTargetType.ORIGIN:
- assert isinstance(swhid_or_origin, str)
- swhid_or_origin = parse_swhid(swhid_or_origin)
-
- object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin)
-
- assert object_type == expected_type
- assert metadata_context == expected_metadata_context
+def test_compute_metadata_context(swhid: str, expected_metadata_context):
+ assert expected_metadata_context == utils.compute_metadata_context(
+ QualifiedSWHID.from_string(swhid)
+ )
def test_parse_swh_reference_origin(xml_with_origin_reference):
@@ -278,7 +260,7 @@
actual_swhid = utils.parse_swh_reference(metadata)
assert actual_swhid is not None
- expected_swhid = parse_swhid(swhid)
+ expected_swhid = QualifiedSWHID.from_string(swhid)
assert actual_swhid == expected_swhid
@@ -291,7 +273,7 @@
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:rev:0175049fc45055a3824a1675ac06e3711619a55a", # noqa
# anchor qualifier should be a core SWHID with type one of
"swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;anchor=swh:1:cnt:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
- "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04" # noqa
+ "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04", # noqa
],
)
def test_parse_swh_reference_invalid_swhid(invalid_swhid, xml_with_swhid):
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -5,22 +5,18 @@
import logging
from types import GeneratorType
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Dict, Optional, Union
import iso8601
import xmltodict
from swh.model.exceptions import ValidationError
from swh.model.identifiers import (
- DIRECTORY,
- RELEASE,
- REVISION,
- SNAPSHOT,
- SWHID,
+ ExtendedSWHID,
+ ObjectType,
+ QualifiedSWHID,
normalize_timestamp,
- parse_swhid,
)
-from swh.model.model import MetadataTargetType
logger = logging.getLogger(__name__)
@@ -120,44 +116,36 @@
return normalize_timestamp(date)
-def compute_metadata_context(
- swhid_reference: Union[SWHID, str]
-) -> Tuple[MetadataTargetType, Dict[str, Any]]:
+def compute_metadata_context(swhid_reference: QualifiedSWHID) -> Dict[str, Any]:
"""Given a SWHID object, determine the context as a dict.
- The parse_swhid calls within are not expected to raise (because they should have
- been caught early on).
-
"""
metadata_context: Dict[str, Any] = {"origin": None}
- if isinstance(swhid_reference, SWHID):
- object_type = MetadataTargetType(swhid_reference.object_type)
- assert object_type != MetadataTargetType.ORIGIN
-
- if swhid_reference.metadata:
- path = swhid_reference.metadata.get("path")
- metadata_context = {
- "origin": swhid_reference.metadata.get("origin"),
- "path": path.encode() if path else None,
- }
- snapshot = swhid_reference.metadata.get("visit")
- if snapshot:
- metadata_context["snapshot"] = parse_swhid(snapshot)
-
- anchor = swhid_reference.metadata.get("anchor")
- if anchor:
- anchor_swhid = parse_swhid(anchor)
- metadata_context[anchor_swhid.object_type] = anchor_swhid
- else:
- object_type = MetadataTargetType.ORIGIN
+ if swhid_reference.qualifiers():
+ metadata_context = {
+ "origin": swhid_reference.origin,
+ "path": swhid_reference.path,
+ }
+ snapshot = swhid_reference.visit
+ if snapshot:
+ metadata_context["snapshot"] = snapshot
+
+ anchor = swhid_reference.anchor
+ if anchor:
+ metadata_context[anchor.object_type.name.lower()] = anchor
- return object_type, metadata_context
+ return metadata_context
-ALLOWED_QUALIFIERS_NODE_TYPE = (SNAPSHOT, REVISION, RELEASE, DIRECTORY)
+ALLOWED_QUALIFIERS_NODE_TYPE = (
+ ObjectType.SNAPSHOT,
+ ObjectType.REVISION,
+ ObjectType.RELEASE,
+ ObjectType.DIRECTORY,
+)
-def parse_swh_reference(metadata: Dict) -> Optional[Union[str, SWHID]]:
+def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]:
"""Parse swh reference within the metadata dict (or origin) reference if found, None
otherwise.
@@ -182,9 +170,6 @@
Either swhid or origin reference if any. None otherwise.
""" # noqa
- visit_swhid = None
- anchor_swhid = None
-
swh_deposit = metadata.get("swh:deposit")
if not swh_deposit:
return None
@@ -206,32 +191,31 @@
swhid = swh_object.get("@swhid")
if not swhid:
return None
- swhid_reference = parse_swhid(swhid)
+ swhid_reference = QualifiedSWHID.from_string(swhid)
- if swhid_reference.metadata:
- anchor = swhid_reference.metadata.get("anchor")
+ if swhid_reference.qualifiers():
+ anchor = swhid_reference.anchor
if anchor:
- anchor_swhid = parse_swhid(anchor)
- if anchor_swhid.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
+ if anchor.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
error_msg = (
"anchor qualifier should be a core SWHID with type one of "
- f" {', '.join(ALLOWED_QUALIFIERS_NODE_TYPE)}"
+ f"{', '.join(t.name.lower() for t in ALLOWED_QUALIFIERS_NODE_TYPE)}"
)
raise ValidationError(error_msg)
- visit = swhid_reference.metadata.get("visit")
+ visit = swhid_reference.visit
if visit:
- visit_swhid = parse_swhid(visit)
- if visit_swhid.object_type != SNAPSHOT:
+ if visit.object_type != ObjectType.SNAPSHOT:
raise ValidationError(
- f"visit qualifier should be a core SWHID with type {SNAPSHOT}"
+ f"visit qualifier should be a core SWHID with type snp, "
+ f"not {visit.object_type.value}"
)
if (
- visit_swhid
- and anchor_swhid
- and visit_swhid.object_type == SNAPSHOT
- and anchor_swhid.object_type == SNAPSHOT
+ visit
+ and anchor
+ and visit.object_type == ObjectType.SNAPSHOT
+ and anchor.object_type == ObjectType.SNAPSHOT
):
logger.warn(
"SWHID use of both anchor and visit targeting "
@@ -242,3 +226,9 @@
)
return swhid_reference
+
+
+def extended_swhid_from_qualified(swhid: QualifiedSWHID) -> ExtendedSWHID:
+ """Used to get the target of a metadata object from a <swh:reference>,
+ as the latter uses a QualifiedSWHID."""
+ return ExtendedSWHID.from_string(str(swhid).split(";")[0])

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:04 PM (5 d, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223570

Event Timeline