Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345090
D5175.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
36 KB
Subscribers
None
D5175.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,2 @@
swh.core[http] >= 0.4
-swh.model >= 0.7.2
+swh.model >= 1.0.0
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,11 +27,12 @@
from swh.deposit.models import Deposit
from swh.deposit.utils import compute_metadata_context
from swh.model import hashutil
-from swh.model.identifiers import SWHID, ValidationError
+from swh.model.identifiers import ExtendedSWHID, QualifiedSWHID, ValidationError
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
+ Origin,
RawExtrinsicMetadata,
)
from swh.scheduler.utils import create_oneshot_task_dict
@@ -67,7 +68,7 @@
)
from ..models import DepositClient, DepositCollection, DepositRequest
from ..parsers import parse_xml
-from ..utils import parse_swh_reference
+from ..utils import extended_swhid_from_qualified, parse_swh_reference
ACCEPT_PACKAGINGS = ["http://purl.org/net/sword/package/SimpleZip"]
ACCEPT_ARCHIVE_CONTENT_TYPES = ["application/zip", "application/x-tar"]
@@ -608,11 +609,11 @@
def _store_metadata_deposit(
self,
deposit: Deposit,
- swhid_reference: Union[str, SWHID],
+ swhid_reference: Union[str, QualifiedSWHID],
metadata: Dict,
raw_metadata: bytes,
deposit_origin: Optional[str] = None,
- ) -> Tuple[Union[SWHID, str], Union[SWHID, str], Deposit, DepositRequest]:
+ ) -> Tuple[ExtendedSWHID, Deposit, DepositRequest]:
"""When all user inputs pass the checks, this associates the raw_metadata to the
swhid_reference in the raw extrinsic metadata storage. In case of any issues,
a bad request response is returned to the user with the details.
@@ -636,7 +637,7 @@
(e.g. functionally invalid metadata, ...)
Returns:
- Tuple of core swhid, swhid context, deposit and deposit request
+ Tuple of target swhid, deposit, and deposit request
"""
metadata_ok, error_details = check_metadata(metadata)
@@ -669,20 +670,20 @@
# actually add the metadata to the completed deposit
deposit_request = self._deposit_request_put(deposit, deposit_request_data)
- object_type, metadata_context = compute_metadata_context(swhid_reference)
- if deposit_origin: # metadata deposit update on completed deposit
- metadata_context["origin"] = deposit_origin
-
- swhid_core: Union[str, SWHID]
+ target_swhid: ExtendedSWHID # origin URL or CoreSWHID
if isinstance(swhid_reference, str):
- swhid_core = swhid_reference
+ target_swhid = Origin(swhid_reference).swhid()
+ metadata_context = {}
else:
- swhid_core = attr.evolve(swhid_reference, metadata={})
+ metadata_context = compute_metadata_context(swhid_reference)
+ if deposit_origin: # metadata deposit update on completed deposit
+ metadata_context["origin"] = deposit_origin
+
+ target_swhid = extended_swhid_from_qualified(swhid_reference)
# store that metadata to the metadata storage
metadata_object = RawExtrinsicMetadata(
- type=object_type,
- target=swhid_core, # core swhid or origin
+ target=target_swhid, # core swhid or origin
discovery_date=deposit_request.date,
authority=metadata_authority,
fetcher=metadata_fetcher,
@@ -696,7 +697,7 @@
self.storage_metadata.metadata_fetcher_add([metadata_fetcher])
self.storage_metadata.raw_extrinsic_metadata_add([metadata_object])
- return (swhid_core, swhid_reference, deposit, deposit_request)
+ return (target_swhid, deposit, deposit_request)
def _atom_entry(
self,
@@ -752,13 +753,13 @@
# Determine if we are in the metadata-only deposit case
try:
- swhid = parse_swh_reference(metadata)
+ swhid_ref = parse_swh_reference(metadata)
except ValidationError as e:
raise DepositError(
PARSING_ERROR, "Invalid SWHID reference", str(e),
)
- if swhid is not None and (
+ if swhid_ref is not None and (
deposit.origin_url or deposit.parent or deposit.external_id
):
raise DepositError(
@@ -768,15 +769,15 @@
"code deposits, only one may be used on a given deposit.",
)
- if swhid is not None:
+ if swhid_ref is not None:
deposit.save() # We need a deposit id
- swhid, swhid_ref, depo, depo_request = self._store_metadata_deposit(
- deposit, swhid, metadata, raw_metadata
+ target_swhid, depo, depo_request = self._store_metadata_deposit(
+ deposit, swhid_ref, metadata, raw_metadata
)
deposit.status = DEPOSIT_STATUS_LOAD_SUCCESS
- if isinstance(swhid_ref, SWHID):
- deposit.swhid = str(swhid)
+ if isinstance(swhid_ref, QualifiedSWHID):
+ deposit.swhid = str(extended_swhid_from_qualified(swhid_ref))
deposit.swhid_context = str(swhid_ref)
deposit.complete_date = depo_request.date
deposit.reception_date = depo_request.date
diff --git a/swh/deposit/api/edit.py b/swh/deposit/api/edit.py
--- a/swh/deposit/api/edit.py
+++ b/swh/deposit/api/edit.py
@@ -6,7 +6,7 @@
from rest_framework.request import Request
from swh.deposit.models import Deposit
-from swh.model.identifiers import parse_swhid
+from swh.model.identifiers import QualifiedSWHID
from ..config import DEPOSIT_STATUS_LOAD_SUCCESS
from ..errors import BAD_REQUEST, DepositError, ParserError
@@ -125,8 +125,12 @@
"If the body is empty, there is no metadata.",
)
- _, _, deposit, deposit_request = self._store_metadata_deposit(
- deposit, parse_swhid(swhid), metadata, raw_metadata, deposit.origin_url,
+ _, deposit, deposit_request = self._store_metadata_deposit(
+ deposit,
+ QualifiedSWHID.from_string(swhid),
+ metadata,
+ raw_metadata,
+ deposit.origin_url,
)
def process_delete(self, req, collection_name: str, deposit: Deposit) -> None:
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -14,6 +14,7 @@
from swh.core import tarball
from swh.deposit.utils import normalize_date
from swh.model import identifiers
+from swh.model.hashutil import hash_to_hex
from swh.model.model import MetadataAuthorityType
from . import APIPrivateView, DepositReadMixin
@@ -163,8 +164,8 @@
if deposit.parent:
parent_swhid = deposit.parent.swhid
assert parent_swhid is not None
- swhid = identifiers.parse_swhid(parent_swhid)
- parent_revision = swhid.object_id
+ swhid = identifiers.CoreSWHID.from_string(parent_swhid)
+ parent_revision = hash_to_hex(swhid.object_id)
parents = [parent_revision]
else:
parents = []
diff --git a/swh/deposit/api/private/deposit_update_status.py b/swh/deposit/api/private/deposit_update_status.py
--- a/swh/deposit/api/private/deposit_update_status.py
+++ b/swh/deposit/api/private/deposit_update_status.py
@@ -5,7 +5,8 @@
from rest_framework.parsers import JSONParser
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from . import APIPrivateView
from ...errors import BAD_REQUEST, DepositError
@@ -85,21 +86,28 @@
origin_url = data["origin_url"]
directory_id = data["directory_id"]
revision_id = data["revision_id"]
- dir_id = swhid(DIRECTORY, directory_id)
- snp_id = swhid(SNAPSHOT, data["snapshot_id"])
- rev_id = swhid(REVISION, revision_id)
+ dir_id = CoreSWHID(
+ object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(directory_id)
+ )
+ snp_id = CoreSWHID(
+ object_type=ObjectType.SNAPSHOT,
+ object_id=hash_to_bytes(data["snapshot_id"]),
+ )
+ rev_id = CoreSWHID(
+ object_type=ObjectType.REVISION, object_id=hash_to_bytes(revision_id)
+ )
- deposit.swhid = dir_id
+ deposit.swhid = str(dir_id)
# new id with contextual information
- deposit.swhid_context = swhid(
- DIRECTORY,
- directory_id,
- metadata={
- "origin": origin_url,
- "visit": snp_id,
- "anchor": rev_id,
- "path": "/",
- },
+ deposit.swhid_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=hash_to_bytes(directory_id),
+ origin=origin_url,
+ visit=snp_id,
+ anchor=rev_id,
+ path="/",
+ )
)
else: # rejected
deposit.status = status
diff --git a/swh/deposit/migrations/0018_migrate_swhids.py b/swh/deposit/migrations/0018_migrate_swhids.py
--- a/swh/deposit/migrations/0018_migrate_swhids.py
+++ b/swh/deposit/migrations/0018_migrate_swhids.py
@@ -15,7 +15,7 @@
from swh.core import config
from swh.deposit.config import DEPOSIT_STATUS_LOAD_SUCCESS
from swh.model.hashutil import hash_to_bytes, hash_to_hex
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, parse_swhid, swhid
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from swh.storage import get_storage as get_storage_client
from swh.storage.algos.snapshot import snapshot_id_get_from_revision
@@ -68,7 +68,7 @@
return swh_storage
-def migrate_deposit_swhid_context_not_null(apps, schema_editor):
+def migrate_deposit_swhid_context_not_null(apps, schema_editor) -> None:
"""Migrate deposit SWHIDs to the new format.
Migrate deposit SWHIDs to the new format. Only deposit with status done and
@@ -84,13 +84,13 @@
for deposit in Deposit.objects.filter(
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=False
):
- obj_dir = parse_swhid(deposit.swh_id_context)
- assert obj_dir.object_type == DIRECTORY
+ obj_dir = QualifiedSWHID.from_string(deposit.swh_id_context)
+ assert obj_dir.object_type == ObjectType.DIRECTORY
- obj_rev = parse_swhid(deposit.swh_anchor_id)
- assert obj_rev.object_type == REVISION
+ obj_rev = CoreSWHID.from_string(deposit.swh_anchor_id)
+ assert obj_rev.object_type == ObjectType.REVISION
- if set(obj_dir.metadata.keys()) != {"origin"}:
+ if set(obj_dir.qualifiers()) != {"origin"}:
# Assuming the migration is already done for that deposit
logger.warning(
"Deposit id %s: Migration already done, skipping", deposit.id
@@ -100,7 +100,9 @@
# Starting migration
dir_id = obj_dir.object_id
- origin = obj_dir.metadata["origin"]
+ origin = obj_dir.origin
+
+ assert origin
check_origin = storage.origin_get([origin])[0]
if not check_origin:
@@ -125,15 +127,15 @@
old_swh_anchor_id_context = deposit.swh_anchor_id_context
# Update
- deposit.swh_id_context = swhid(
- DIRECTORY,
- dir_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snp_id.hex()),
- "anchor": swhid(REVISION, rev_id),
- "path": "/",
- },
+ deposit.swh_id_context = QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=dir_id,
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id),
+ anchor=CoreSWHID(
+ object_type=ObjectType.REVISION, object_id=hash_to_bytes(rev_id)
+ ),
+ path=b"/",
)
# Ensure only deposit.swh_id_context changed
@@ -211,7 +213,7 @@
return map_origin.get(key, f"{provider_url.rstrip('/')}/{external_id}")
-def migrate_deposit_swhid_context_null(apps, schema_editor):
+def migrate_deposit_swhid_context_null(apps, schema_editor) -> None:
"""Migrate deposit SWHIDs to the new format.
Migrate deposit whose swh_id_context is not set (initial deposits not migrated at
@@ -229,8 +231,8 @@
for deposit in Deposit.objects.filter(
status=DEPOSIT_STATUS_LOAD_SUCCESS, swh_id_context__isnull=True
):
- obj_rev = parse_swhid(deposit.swh_id)
- if obj_rev.object_type == DIRECTORY:
+ obj_rev = CoreSWHID.from_string(deposit.swh_id)
+ if obj_rev.object_type == ObjectType.DIRECTORY:
# Assuming the migration is already done for that deposit
logger.warning(
"Deposit id %s: Migration already done, skipping", deposit.id
@@ -238,7 +240,7 @@
continue
# Ensuring Migration not done
- assert obj_rev.object_type == REVISION
+ assert obj_rev.object_type == ObjectType.REVISION
assert deposit.swh_id is not None
assert deposit.swh_id_context is None
@@ -280,21 +282,25 @@
continue
# New SWHIDs ids
- deposit.swh_id = swhid(DIRECTORY, dir_id)
- deposit.swh_id_context = swhid(
- DIRECTORY,
- dir_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snp_id.hex()),
- "anchor": swhid(REVISION, rev_id),
- "path": "/",
- },
+ deposit.swh_id = CoreSWHID(
+ object_type=ObjectType.DIRECTORY, object_id=hash_to_bytes(dir_id)
+ )
+ deposit.swh_id_context = QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=dir_id,
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snp_id),
+ anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=rev_id_bytes),
+ path=b"/",
)
# Realign the remaining deposit SWHIDs fields
- deposit.swh_anchor_id = swhid(REVISION, rev_id)
- deposit.swh_anchor_id_context = swhid(
- REVISION, rev_id, metadata={"origin": origin,}
+ deposit.swh_anchor_id = str(
+ CoreSWHID(object_type=ObjectType.REVISION, object_id=rev_id_bytes)
+ )
+ deposit.swh_anchor_id_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.REVISION, object_id=rev_id_bytes, origin=origin
+ )
)
# Ensure only deposit.swh_id_context changed
diff --git a/swh/deposit/tests/api/test_collection_post_atom.py b/swh/deposit/tests/api/test_collection_post_atom.py
--- a/swh/deposit/tests/api/test_collection_post_atom.py
+++ b/swh/deposit/tests/api/test_collection_post_atom.py
@@ -22,13 +22,13 @@
from swh.deposit.models import Deposit, DepositCollection, DepositRequest
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom
-from swh.deposit.utils import compute_metadata_context
-from swh.model.identifiers import SWHID, parse_swhid
+from swh.deposit.utils import compute_metadata_context, extended_swhid_from_qualified
+from swh.model.identifiers import QualifiedSWHID
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
+ Origin,
RawExtrinsicMetadata,
)
from swh.storage.interface import PagedResult
@@ -452,63 +452,28 @@
@pytest.mark.parametrize(
- "swhid,target_type",
+ "swhid",
[
- (
- "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.SNAPSHOT,
- ),
- (
- "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.CONTENT,
- ),
- (
- "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
- MetadataTargetType.DIRECTORY,
- ),
- (
- "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.REVISION,
- ),
- (
- "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.RELEASE,
- ),
- (
- "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
- MetadataTargetType.SNAPSHOT,
- ),
+ "swh:1:cnt:01b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:dir:11b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rev:21b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:rel:31b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49",
+ "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;origin=https://inria.halpreprod.archives-ouvertes.fr/hal-01243573;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:rev:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
+ "swh:1:rev:71b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:rel:81b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
+ "swh:1:snp:91b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=h://g.c/o/repo",
],
)
def test_deposit_metadata_swhid(
- swhid,
- target_type,
- authenticated_client,
- deposit_collection,
- atom_dataset,
- swh_storage,
+ swhid, authenticated_client, deposit_collection, atom_dataset, swh_storage,
):
"""Posting a swhid reference is stored on raw extrinsic metadata storage
"""
- swhid_reference = parse_swhid(swhid)
- swhid_core = attr.evolve(swhid_reference, metadata={})
+ swhid_reference = QualifiedSWHID.from_string(swhid)
+ swhid_target = extended_swhid_from_qualified(swhid_reference)
xml_data = atom_dataset["entry-data-with-swhid"].format(swhid=swhid)
deposit_client = authenticated_client.deposit_client
@@ -525,8 +490,7 @@
# Ensure the deposit is finalized
deposit_id = int(response_content["swh:deposit_id"])
deposit = Deposit.objects.get(pk=deposit_id)
- assert isinstance(swhid_core, SWHID)
- assert deposit.swhid == str(swhid_core)
+ assert deposit.swhid == str(swhid_target)
assert deposit.swhid_context == str(swhid_reference)
assert deposit.complete_date == deposit.reception_date
assert deposit.complete_date is not None
@@ -557,19 +521,18 @@
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
- target_type, swhid_core, metadata_authority
+ swhid_target, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
assert len(page_results.results) == 1
assert page_results.next_page_token is None
- object_type, metadata_context = compute_metadata_context(swhid_reference)
+ metadata_context = compute_metadata_context(swhid_reference)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=object_type,
- target=swhid_core,
+ target=swhid_target,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
@@ -593,6 +556,7 @@
"""
xml_data = atom_dataset["entry-data-with-origin-reference"].format(url=url)
+ origin_swhid = Origin(url).swhid()
deposit_client = authenticated_client.deposit_client
response = post_atom(
authenticated_client,
@@ -637,7 +601,7 @@
assert actual_fetcher == metadata_fetcher
page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.ORIGIN, url, metadata_authority
+ origin_swhid, metadata_authority
)
discovery_date = page_results.results[0].discovery_date
@@ -647,8 +611,7 @@
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.ORIGIN,
- target=url,
+ target=origin_swhid,
discovery_date=discovery_date,
authority=attr.evolve(metadata_authority, metadata=None),
fetcher=attr.evolve(metadata_fetcher, metadata=None),
diff --git a/swh/deposit/tests/api/test_deposit_private_update_status.py b/swh/deposit/tests/api/test_deposit_private_update_status.py
--- a/swh/deposit/tests/api/test_deposit_private_update_status.py
+++ b/swh/deposit/tests/api/test_deposit_private_update_status.py
@@ -16,7 +16,6 @@
PRIVATE_PUT_DEPOSIT,
)
from swh.deposit.models import Deposit
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
PRIVATE_PUT_DEPOSIT_NC = PRIVATE_PUT_DEPOSIT + "-nc"
@@ -50,13 +49,13 @@
"origin_url": origin_url,
}
for url in private_check_url_endpoints(deposit_collection, deposit):
- dir_id = swhid(DIRECTORY, directory_id)
- rev_id = swhid(REVISION, revision_id)
- snp_id = swhid(SNAPSHOT, snapshot_id)
-
expected_swhid = "swh:1:dir:%s" % directory_id
expected_swhid_context = (
- f"{dir_id};origin={origin_url};" + f"visit={snp_id};anchor={rev_id};path=/"
+ f"{expected_swhid}"
+ f";origin={origin_url}"
+ f";visit=swh:1:snp:{snapshot_id}"
+ f";anchor=swh:1:rev:{revision_id}"
+ f";path=/"
)
response = authenticated_client.put(
diff --git a/swh/deposit/tests/api/test_deposit_update_atom.py b/swh/deposit/tests/api/test_deposit_update_atom.py
--- a/swh/deposit/tests/api/test_deposit_update_atom.py
+++ b/swh/deposit/tests/api/test_deposit_update_atom.py
@@ -23,12 +23,11 @@
from swh.deposit.parsers import parse_xml
from swh.deposit.tests.common import post_atom, put_atom
from swh.model.hashutil import hash_to_bytes
-from swh.model.identifiers import parse_swhid, swhid
+from swh.model.identifiers import CoreSWHID, ExtendedSWHID, ObjectType
from swh.model.model import (
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
- MetadataTargetType,
RawExtrinsicMetadata,
)
from swh.storage.interface import PagedResult
@@ -319,8 +318,8 @@
Response: 204
"""
- deposit_swhid = parse_swhid(complete_deposit.swhid)
- assert deposit_swhid.object_type == "directory"
+ deposit_swhid = CoreSWHID.from_string(complete_deposit.swhid)
+ assert deposit_swhid.object_type == ObjectType.DIRECTORY
directory_id = hash_to_bytes(deposit_swhid.object_id)
# directory targeted by the complete_deposit does not exist in the storage
@@ -333,7 +332,7 @@
assert list(swh_storage.directory_missing([existing_directory.id])) == []
# and patch one complete deposit swhid so it targets said reference
- complete_deposit.swhid = swhid("directory", existing_directory.id)
+ complete_deposit.swhid = str(existing_directory.swhid())
complete_deposit.save()
actual_existing_requests_archive = DepositRequest.objects.filter(
@@ -394,14 +393,13 @@
)
assert actual_fetcher == metadata_fetcher
- directory_swhid = parse_swhid(complete_deposit.swhid)
+ directory_swhid = ExtendedSWHID.from_string(complete_deposit.swhid)
page_results = swh_storage.raw_extrinsic_metadata_get(
- MetadataTargetType.DIRECTORY, directory_swhid, metadata_authority
+ directory_swhid, metadata_authority
)
assert page_results == PagedResult(
results=[
RawExtrinsicMetadata(
- type=MetadataTargetType.DIRECTORY,
target=directory_swhid,
discovery_date=request_meta1.date,
authority=attr.evolve(metadata_authority, metadata=None),
diff --git a/swh/deposit/tests/conftest.py b/swh/deposit/tests/conftest.py
--- a/swh/deposit/tests/conftest.py
+++ b/swh/deposit/tests/conftest.py
@@ -38,7 +38,8 @@
post_archive,
post_atom,
)
-from swh.model.identifiers import DIRECTORY, REVISION, SNAPSHOT, swhid
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import CoreSWHID, ObjectType, QualifiedSWHID
from swh.scheduler import get_scheduler
# mypy is asked to ignore the import statement above because setup_databases
@@ -457,18 +458,18 @@
)
origin = "https://hal.archives-ouvertes.fr/hal-01727745"
directory_id = "42a13fc721c8716ff695d0d62fc851d641f3a12b"
- revision_id = "548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10"
- snapshot_id = "e5e82d064a9c3df7464223042e0c55d72ccff7f0"
- deposit.swhid = swhid(DIRECTORY, directory_id)
- deposit.swhid_context = swhid(
- DIRECTORY,
- directory_id,
- metadata={
- "origin": origin,
- "visit": swhid(SNAPSHOT, snapshot_id),
- "anchor": swhid(REVISION, revision_id),
- "path": "/",
- },
+ revision_id = hash_to_bytes("548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10")
+ snapshot_id = hash_to_bytes("e5e82d064a9c3df7464223042e0c55d72ccff7f0")
+ deposit.swhid = f"swh:1:dir:{directory_id}"
+ deposit.swhid_context = str(
+ QualifiedSWHID(
+ object_type=ObjectType.DIRECTORY,
+ object_id=hash_to_bytes(directory_id),
+ origin=origin,
+ visit=CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=snapshot_id),
+ anchor=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id),
+ path=b"/",
+ )
)
deposit.save()
return deposit
diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py
--- a/swh/deposit/tests/test_utils.py
+++ b/swh/deposit/tests/test_utils.py
@@ -3,7 +3,6 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Union
from unittest.mock import patch
import pytest
@@ -11,8 +10,7 @@
from swh.deposit import utils
from swh.deposit.parsers import parse_xml
from swh.model.exceptions import ValidationError
-from swh.model.identifiers import SWHID, parse_swhid
-from swh.model.model import MetadataTargetType
+from swh.model.identifiers import CoreSWHID, QualifiedSWHID
@pytest.fixture
@@ -163,59 +161,43 @@
@pytest.mark.parametrize(
- "swhid_or_origin,expected_type,expected_metadata_context",
+ "swhid,expected_metadata_context",
[
- ("https://something", MetadataTargetType.ORIGIN, {"origin": None}),
- (
- "swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49",
- MetadataTargetType.CONTENT,
- {"origin": None},
- ),
+ ("swh:1:cnt:51b5c8cc985d190b5a7ef4878128ebfdc2358f49", {"origin": None},),
(
"swh:1:snp:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;origin=http://blah",
- MetadataTargetType.SNAPSHOT,
{"origin": "http://blah", "path": None},
),
(
"swh:1:dir:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;path=/path",
- MetadataTargetType.DIRECTORY,
{"origin": None, "path": b"/path"},
),
(
"swh:1:rev:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;visit=swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
- MetadataTargetType.REVISION,
{
"origin": None,
"path": None,
- "snapshot": parse_swhid(
+ "snapshot": CoreSWHID.from_string(
"swh:1:snp:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
(
"swh:1:rel:51b5c8cc985d190b5a7ef4878128ebfdc2358f49;anchor=swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49", # noqa
- MetadataTargetType.RELEASE,
{
"origin": None,
"path": None,
- "directory": parse_swhid(
+ "directory": CoreSWHID.from_string(
"swh:1:dir:41b5c8cc985d190b5a7ef4878128ebfdc2358f49"
),
},
),
],
)
-def test_compute_metadata_context(
- swhid_or_origin: Union[str, SWHID], expected_type, expected_metadata_context
-):
- if expected_type != MetadataTargetType.ORIGIN:
- assert isinstance(swhid_or_origin, str)
- swhid_or_origin = parse_swhid(swhid_or_origin)
-
- object_type, metadata_context = utils.compute_metadata_context(swhid_or_origin)
-
- assert object_type == expected_type
- assert metadata_context == expected_metadata_context
+def test_compute_metadata_context(swhid: str, expected_metadata_context):
+ assert expected_metadata_context == utils.compute_metadata_context(
+ QualifiedSWHID.from_string(swhid)
+ )
def test_parse_swh_reference_origin(xml_with_origin_reference):
@@ -278,7 +260,7 @@
actual_swhid = utils.parse_swh_reference(metadata)
assert actual_swhid is not None
- expected_swhid = parse_swhid(swhid)
+ expected_swhid = QualifiedSWHID.from_string(swhid)
assert actual_swhid == expected_swhid
@@ -291,7 +273,7 @@
"swh:1:dir:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:rev:0175049fc45055a3824a1675ac06e3711619a55a", # noqa
# anchor qualifier should be a core SWHID with type one of
"swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;anchor=swh:1:cnt:b5f505b005435fa5c4fa4c279792bd7b17167c04;path=/", # noqa
- "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04" # noqa
+ "swh:1:rev:c4993c872593e960dc84e4430dbbfbc34fd706d0;visit=swh:1:snp:0175049fc45055a3824a1675ac06e3711619a55a;anchor=swh:1:snp:b5f505b005435fa5c4fa4c279792bd7b17167c04", # noqa
],
)
def test_parse_swh_reference_invalid_swhid(invalid_swhid, xml_with_swhid):
diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py
--- a/swh/deposit/utils.py
+++ b/swh/deposit/utils.py
@@ -5,22 +5,18 @@
import logging
from types import GeneratorType
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import Any, Dict, Optional, Union
import iso8601
import xmltodict
from swh.model.exceptions import ValidationError
from swh.model.identifiers import (
- DIRECTORY,
- RELEASE,
- REVISION,
- SNAPSHOT,
- SWHID,
+ ExtendedSWHID,
+ ObjectType,
+ QualifiedSWHID,
normalize_timestamp,
- parse_swhid,
)
-from swh.model.model import MetadataTargetType
logger = logging.getLogger(__name__)
@@ -120,44 +116,36 @@
return normalize_timestamp(date)
-def compute_metadata_context(
- swhid_reference: Union[SWHID, str]
-) -> Tuple[MetadataTargetType, Dict[str, Any]]:
+def compute_metadata_context(swhid_reference: QualifiedSWHID) -> Dict[str, Any]:
"""Given a SWHID object, determine the context as a dict.
- The parse_swhid calls within are not expected to raise (because they should have
- been caught early on).
-
"""
metadata_context: Dict[str, Any] = {"origin": None}
- if isinstance(swhid_reference, SWHID):
- object_type = MetadataTargetType(swhid_reference.object_type)
- assert object_type != MetadataTargetType.ORIGIN
-
- if swhid_reference.metadata:
- path = swhid_reference.metadata.get("path")
- metadata_context = {
- "origin": swhid_reference.metadata.get("origin"),
- "path": path.encode() if path else None,
- }
- snapshot = swhid_reference.metadata.get("visit")
- if snapshot:
- metadata_context["snapshot"] = parse_swhid(snapshot)
-
- anchor = swhid_reference.metadata.get("anchor")
- if anchor:
- anchor_swhid = parse_swhid(anchor)
- metadata_context[anchor_swhid.object_type] = anchor_swhid
- else:
- object_type = MetadataTargetType.ORIGIN
+ if swhid_reference.qualifiers():
+ metadata_context = {
+ "origin": swhid_reference.origin,
+ "path": swhid_reference.path,
+ }
+ snapshot = swhid_reference.visit
+ if snapshot:
+ metadata_context["snapshot"] = snapshot
+
+ anchor = swhid_reference.anchor
+ if anchor:
+ metadata_context[anchor.object_type.name.lower()] = anchor
- return object_type, metadata_context
+ return metadata_context
-ALLOWED_QUALIFIERS_NODE_TYPE = (SNAPSHOT, REVISION, RELEASE, DIRECTORY)
+ALLOWED_QUALIFIERS_NODE_TYPE = (
+ ObjectType.SNAPSHOT,
+ ObjectType.REVISION,
+ ObjectType.RELEASE,
+ ObjectType.DIRECTORY,
+)
-def parse_swh_reference(metadata: Dict) -> Optional[Union[str, SWHID]]:
+def parse_swh_reference(metadata: Dict,) -> Optional[Union[QualifiedSWHID, str]]:
"""Parse swh reference within the metadata dict (or origin) reference if found, None
otherwise.
@@ -182,9 +170,6 @@
Either swhid or origin reference if any. None otherwise.
""" # noqa
- visit_swhid = None
- anchor_swhid = None
-
swh_deposit = metadata.get("swh:deposit")
if not swh_deposit:
return None
@@ -206,32 +191,31 @@
swhid = swh_object.get("@swhid")
if not swhid:
return None
- swhid_reference = parse_swhid(swhid)
+ swhid_reference = QualifiedSWHID.from_string(swhid)
- if swhid_reference.metadata:
- anchor = swhid_reference.metadata.get("anchor")
+ if swhid_reference.qualifiers():
+ anchor = swhid_reference.anchor
if anchor:
- anchor_swhid = parse_swhid(anchor)
- if anchor_swhid.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
+ if anchor.object_type not in ALLOWED_QUALIFIERS_NODE_TYPE:
error_msg = (
"anchor qualifier should be a core SWHID with type one of "
- f" {', '.join(ALLOWED_QUALIFIERS_NODE_TYPE)}"
+ f"{', '.join(t.name.lower() for t in ALLOWED_QUALIFIERS_NODE_TYPE)}"
)
raise ValidationError(error_msg)
- visit = swhid_reference.metadata.get("visit")
+ visit = swhid_reference.visit
if visit:
- visit_swhid = parse_swhid(visit)
- if visit_swhid.object_type != SNAPSHOT:
+ if visit.object_type != ObjectType.SNAPSHOT:
raise ValidationError(
- f"visit qualifier should be a core SWHID with type {SNAPSHOT}"
+ f"visit qualifier should be a core SWHID with type snp, "
+ f"not {visit.object_type.value}"
)
if (
- visit_swhid
- and anchor_swhid
- and visit_swhid.object_type == SNAPSHOT
- and anchor_swhid.object_type == SNAPSHOT
+ visit
+ and anchor
+ and visit.object_type == ObjectType.SNAPSHOT
+ and anchor.object_type == ObjectType.SNAPSHOT
):
logger.warn(
"SWHID use of both anchor and visit targeting "
@@ -242,3 +226,9 @@
)
return swhid_reference
+
+
+def extended_swhid_from_qualified(swhid: QualifiedSWHID) -> ExtendedSWHID:
+ """Used to get the target of a metadata object from a <swh:reference>,
+ as the latter uses a QualifiedSWHID."""
+ return ExtendedSWHID.from_string(str(swhid).split(";")[0])
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:04 PM (5 d, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223570
Attached To
D5175: deposit: Use CoreSWHID/QualifiedSWHID instead of the deprecated SWHID class + update to new RawExtrinsicMetadata
Event Timeline
Log In to Comment