diff --git a/swh/storage/api/serializers.py b/swh/storage/api/serializers.py --- a/swh/storage/api/serializers.py +++ b/swh/storage/api/serializers.py @@ -7,8 +7,8 @@ from typing import Callable, Dict, List, Tuple -from swh.model.identifiers import SWHID, parse_swhid import swh.model.model as model +from swh.model.swhid import SWHID, parse_swhid from swh.storage import interface @@ -36,7 +36,6 @@ ENCODERS: List[Tuple[type, str, Callable]] = [ (model.BaseModel, "model", _encode_model_object), (SWHID, "swhid", str), - (model.MetadataTargetType, "model_enum", _encode_enum), (model.MetadataAuthorityType, "model_enum", _encode_enum), (interface.ListOrder, "storage_enum", _encode_enum), ] @@ -46,6 +45,5 @@ "swhid": parse_swhid, "model": lambda d: getattr(model, d.pop("__type__")).from_dict(d), "model_enum": _decode_model_enum, - "model_enum": _decode_model_enum, "storage_enum": _decode_storage_enum, } diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py --- a/swh/storage/backfill.py +++ b/swh/storage/backfill.py @@ -35,6 +35,7 @@ db_to_release, db_to_revision, ) +from swh.storage.postgresql.db import register_swhid_type from swh.storage.replay import object_converter_fn from swh.storage.writer import JournalWriter @@ -79,7 +80,6 @@ "metadata_authority": ["type", "url", "metadata",], "metadata_fetcher": ["name", "version", "metadata",], "raw_extrinsic_metadata": [ - "raw_extrinsic_metadata.type", "raw_extrinsic_metadata.target", "metadata_authority.type", "metadata_authority.url", @@ -533,6 +533,8 @@ ) db = BaseDb.connect(self.config["storage"]["db"]) + register_swhid_type(db.conn) + writer = JournalWriter({"cls": "kafka", **self.config["journal_writer"]}) assert writer.journal is not None diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py --- a/swh/storage/cassandra/model.py +++ b/swh/storage/cassandra/model.py @@ -250,7 +250,6 @@ "fetcher_version", ) - type: str target: str authority_type: str diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py --- a/swh/storage/cassandra/schema.py +++ b/swh/storage/cassandra/schema.py @@ -188,7 +188,6 @@ );""", """ CREATE TABLE IF NOT EXISTS raw_extrinsic_metadata ( - type text, target text, -- metadata source diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -26,7 +26,6 @@ from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.model.hashutil import DEFAULT_ALGORITHMS -from swh.model.identifiers import SWHID, parse_swhid from swh.model.model import ( Content, Directory, @@ -34,7 +33,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -47,6 +45,7 @@ SnapshotBranch, TargetType, ) +from swh.model.swhid import SWHID, parse_swhid from swh.storage.interface import ( VISIT_STATUSES, ListOrder, @@ -1157,7 +1156,6 @@ try: row = RawExtrinsicMetadataRow( - type=metadata_entry.type.value, target=str(metadata_entry.target), authority_type=metadata_entry.authority.type.value, authority_url=metadata_entry.authority.url, @@ -1166,7 +1164,7 @@ fetcher_version=metadata_entry.fetcher.version, format=metadata_entry.format, metadata=metadata_entry.metadata, - origin=metadata_entry.origin, + origin=map_optional(str, metadata_entry.origin), visit=metadata_entry.visit, snapshot=map_optional(str, metadata_entry.snapshot), release=map_optional(str, metadata_entry.release), @@ -1180,26 +1178,12 @@ def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: SWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, limit: int = 1000, ) -> PagedResult[RawExtrinsicMetadata]: - if type == MetadataTargetType.ORIGIN: - if isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type='origin', " - f"but provided target is a SWHID: {target!r}" - ) - else: - if not isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type!='origin', " - f"but provided target is not a SWHID: {target!r}" - ) - if page_token is not None: (after_date, after_fetcher_name, after_fetcher_url) = msgpack_loads( base64.b64decode(page_token) @@ -1235,7 +1219,6 @@ assert str(target) == entry.target result = RawExtrinsicMetadata( - type=MetadataTargetType(entry.type), target=target, authority=MetadataAuthority( type=MetadataAuthorityType(entry.authority_type), @@ -1247,7 +1230,7 @@ discovery_date=discovery_date, format=entry.format, metadata=entry.metadata, - origin=entry.origin, + origin=map_optional(parse_swhid, entry.origin), visit=entry.visit, snapshot=map_optional(parse_swhid, entry.snapshot), release=map_optional(parse_swhid, entry.release), diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -5,20 +5,18 @@ import datetime from enum import Enum -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar, Union +from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, TypeVar from typing_extensions import Protocol, TypedDict, runtime_checkable from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult -from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -31,6 +29,7 @@ Snapshot, SnapshotBranch, ) +from swh.model.swhid import SWHID class ListOrder(Enum): @@ -1096,8 +1095,7 @@ @remote_api_endpoint("raw_extrinsic_metadata/get") def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: SWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1106,8 +1104,7 @@ """Retrieve list of all raw_extrinsic_metadata entries for the id Args: - type: one of the values of swh.model.model.MetadataTargetType - target: an URL if type is 'origin', else a core SWHID + target: a core SWHID authority: a dict containing keys `type` and `url`. after: minimum discovery_date for a result to be returned page_token: opaque token, used to get the next page of results diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py old mode 100644 new mode 100755 --- a/swh/storage/migrate_extrinsic_metadata.py +++ b/swh/storage/migrate_extrinsic_metadata.py @@ -38,15 +38,14 @@ from swh.core.db import BaseDb from swh.model.hashutil import hash_to_hex -from swh.model.identifiers import SWHID, parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, RawExtrinsicMetadata, Sha1Git, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage import get_storage from swh.storage.algos.origin import iter_origin_visit_statuses, iter_origin_visits from swh.storage.algos.snapshot import snapshot_get_all_branches @@ -412,19 +411,27 @@ dry_run: bool, ): """Does the actual loading to swh-storage.""" + origin_swhid: Optional[SWHID] + if origin is not None: + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin.encode() + ) + else: + origin_swhid = None directory_swhid = SWHID( - object_type="directory", object_id=hash_to_hex(directory_id) + object_type=SWHIDObjectType.DIRECTORY, object_id=hash_to_hex(directory_id) + ) + revision_swhid = SWHID( + object_type=SWHIDObjectType.REVISION, object_id=hash_to_hex(revision_id) ) - revision_swhid = SWHID(object_type="revision", object_id=hash_to_hex(revision_id)) obj = RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=directory_swhid, discovery_date=discovery_date, authority=authority, fetcher=FETCHER, format=format, metadata=json.dumps(metadata).encode(), - origin=origin, + origin=origin_swhid, revision=revision_swhid, ) if not dry_run: @@ -516,7 +523,6 @@ if discovery_date is None: discovery_date = max(dates) - # Sanity checks to make sure deposit requests are consistent with each other assert len(metadata_entries) >= 1, deposit_id assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}" @@ -585,6 +591,7 @@ "https://hal-preprod.archives-ouvertes.fr/hal-01593855", ), ] + print("CHECK", origin, swhid_origin) if (origin, swhid_origin) not in exceptions: assert origin == swhid_origin, ( f"the origin we guessed from the deposit db or revision ({origin}) " diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -7,12 +7,10 @@ from typing import Any, Dict, Optional from swh.core.utils import encode_with_unescape -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Person, RawExtrinsicMetadata, @@ -23,8 +21,6 @@ TimestampWithTimezone, ) -from ..utils import map_optional - DEFAULT_AUTHOR = { "fullname": None, "name": None, @@ -295,13 +291,8 @@ def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata: - type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"]) - target = row["raw_extrinsic_metadata.target"] - if type_ != MetadataTargetType.ORIGIN: - target = parse_swhid(target) return RawExtrinsicMetadata( - type=type_, - target=target, + target=row["raw_extrinsic_metadata.target"], authority=MetadataAuthority( type=MetadataAuthorityType(row["metadata_authority.type"]), url=row["metadata_authority.url"], @@ -314,9 +305,9 @@ metadata=row["raw_extrinsic_metadata.metadata"], origin=row["origin"], visit=row["visit"], - snapshot=map_optional(parse_swhid, row["snapshot"]), - release=map_optional(parse_swhid, row["release"]), - revision=map_optional(parse_swhid, row["revision"]), + snapshot=row["snapshot"], + release=row["release"], + revision=row["revision"], path=row["path"], - directory=map_optional(parse_swhid, row["directory"]), + directory=row["directory"], ) diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -6,14 +6,20 @@ import datetime import logging import random +import re import select from typing import Any, Dict, Iterable, List, Optional, Tuple +import psycopg2 + from swh.core.db import BaseDb from swh.core.db.db_utils import execute_values_generator from swh.core.db.db_utils import jsonize as _jsonize from swh.core.db.db_utils import stored_procedure +from swh.model.hashutil import hash_to_bytes from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus +from swh.model.swhid import SWHID +from swh.model.swhid import _swhid_type_map as swhid_typemap from swh.storage.interface import ListOrder logger = logging.getLogger(__name__) @@ -23,6 +29,55 @@ return _jsonize(dict(d) if d is not None else None) +def typecast_swhid(value, cur): + if value is None: + return None + m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value) + if m: + return SWHID( + scheme_version=int(m.group(1)), + object_type=swhid_typemap[m.group(2)], + object_id=hash_to_bytes(m.group(3)[3:]), + ) + else: + raise psycopg2.InterfaceError("bad SWHID representation: %r" % value) + + +def adapt_swhid(swhid: SWHID): + value = psycopg2.extensions.AsIs( + ( + b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)" + % ( + swhid.scheme_version, + swhid.object_type.value.encode(), + swhid.object_id.encode(), + ) + ).decode() + ) + print("VALUE", value) + return value + + +def register_swhid_type(conn): + with conn.cursor() as cur: + cur.execute( + """ + SELECT pg_type.oid + FROM pg_type + JOIN pg_namespace + ON typnamespace = pg_namespace.oid + WHERE typname = %(typename)s + AND nspname = %(namespace)s""", + {"typename": "swhid", "namespace": "public"}, + ) + + oid = cur.fetchone()[0] + + t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid) + psycopg2.extensions.register_type(t_SWHID, conn) + psycopg2.extensions.register_adapter(SWHID, adapt_swhid) + + class Db(BaseDb): """Proxy to the SWH DB, with wrappers around stored procedures @@ -30,6 +85,14 @@ current_version = 166 + def __init__( + self, + conn: psycopg2.extensions.connection, + pool: Optional[psycopg2.pool.AbstractConnectionPool] = None, + ): + super().__init__(conn, pool) + register_swhid_type(conn) + def mktemp_dir_entry(self, entry_type, cur=None): self._cursor(cur).execute( "SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),) @@ -1136,7 +1199,6 @@ """The list of context columns for all artifact types.""" _raw_extrinsic_metadata_insert_cols = [ - "type", "target", "authority_id", "fetcher_id", @@ -1158,7 +1220,6 @@ raw_extrinsic_metadata_get_cols = [ "raw_extrinsic_metadata.target", - "raw_extrinsic_metadata.type", "discovery_date", "metadata_authority.type", "metadata_authority.url", @@ -1179,30 +1240,29 @@ INNER JOIN metadata_authority ON (metadata_authority.id=authority_id) INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id) - WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s + WHERE (raw_extrinsic_metadata.target)=%s + AND authority_id=%s """ def raw_extrinsic_metadata_add( self, - type: str, - target: str, + target: SWHID, discovery_date: datetime.datetime, authority_id: int, fetcher_id: int, format: str, metadata: bytes, - origin: Optional[str], + origin: Optional[SWHID], visit: Optional[int], - snapshot: Optional[str], - release: Optional[str], - revision: Optional[str], + snapshot: Optional[SWHID], + release: Optional[SWHID], + revision: Optional[SWHID], path: Optional[bytes], - directory: Optional[str], + directory: Optional[SWHID], cur, ): query = self._raw_extrinsic_metadata_insert_query args: Dict[str, Any] = dict( - type=type, target=target, authority_id=authority_id, fetcher_id=fetcher_id, @@ -1224,8 +1284,7 @@ def raw_extrinsic_metadata_get( self, - type: str, - target: str, + target: SWHID, authority_id: int, after_time: Optional[datetime.datetime], after_fetcher: Optional[int], diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -9,7 +9,7 @@ from contextlib import contextmanager import datetime import itertools -from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import Any, Counter, Dict, Iterable, List, Optional, Sequence, Tuple import attr import psycopg2 @@ -19,7 +19,6 @@ from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.core.db.common import db_transaction, db_transaction_generator from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID from swh.model.model import ( SHA1_SIZE, Content, @@ -27,7 +26,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -41,6 +39,7 @@ SnapshotBranch, TargetType, ) +from swh.model.swhid import SWHID, SWHIDObjectType from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError from swh.storage.interface import ( VISIT_STATUSES, @@ -50,12 +49,7 @@ ) from swh.storage.metrics import process_metrics, send_metric, timed from swh.storage.objstorage import ObjStorage -from swh.storage.utils import ( - extract_collision_hash, - get_partition_bounds_bytes, - map_optional, - now, -) +from swh.storage.utils import extract_collision_hash, get_partition_bounds_bytes, now from swh.storage.writer import JournalWriter from . import converters @@ -1250,26 +1244,22 @@ ) -> None: metadata = list(metadata) self.journal_writer.raw_extrinsic_metadata_add(metadata) - counter = Counter[MetadataTargetType]() + counter = Counter[SWHIDObjectType]() for metadata_entry in metadata: - authority_id = self._get_authority_id(metadata_entry.authority, db, cur) - fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) - db.raw_extrinsic_metadata_add( - type=metadata_entry.type.value, - target=str(metadata_entry.target), + target=metadata_entry.target, + authority_id=self._get_authority_id(metadata_entry.authority, db, cur), discovery_date=metadata_entry.discovery_date, - authority_id=authority_id, - fetcher_id=fetcher_id, + fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur), format=metadata_entry.format, metadata=metadata_entry.metadata, origin=metadata_entry.origin, visit=metadata_entry.visit, - snapshot=map_optional(str, metadata_entry.snapshot), - release=map_optional(str, metadata_entry.release), - revision=map_optional(str, metadata_entry.revision), + snapshot=metadata_entry.snapshot, + release=metadata_entry.release, + revision=metadata_entry.revision, path=metadata_entry.path, - directory=map_optional(str, metadata_entry.directory), + directory=metadata_entry.directory, cur=cur, ) counter[metadata_entry.type] += 1 @@ -1284,8 +1274,7 @@ @db_transaction() def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: SWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1293,19 +1282,6 @@ db=None, cur=None, ) -> PagedResult[RawExtrinsicMetadata]: - if type == MetadataTargetType.ORIGIN: - if isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type='origin', " - f"but provided target is a SWHID: {target!r}" - ) - else: - if not isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type!='origin', " - f"but provided target is not a SWHID: {target!r}" - ) - if page_token: (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) if after and after_time < after: @@ -1321,12 +1297,13 @@ return PagedResult(next_page_token=None, results=[],) rows = db.raw_extrinsic_metadata_get( - type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur, + target, authority_id, after_time, after_fetcher, limit + 1, cur, ) + rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows] results = [] for row in rows: - assert str(target) == row["raw_extrinsic_metadata.target"] + assert target == row["raw_extrinsic_metadata.target"] results.append(converters.db_to_raw_extrinsic_metadata(row)) if len(results) > limit: diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql --- a/swh/storage/sql/20-enums.sql +++ b/swh/storage/sql/20-enums.sql @@ -23,3 +23,5 @@ 'failed' ); comment on type origin_visit_state IS 'Possible origin visit status values'; + +create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori'); diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -37,8 +37,8 @@ -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod create domain file_perms as int; --- an SWHID -create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); +-- a SWHID +create type swhid as (version int, object_type swhid_type, object_id bytea); -- Checksums about actual file content. Note that the content itself is not @@ -430,8 +430,7 @@ -- Extrinsic metadata on a DAG objects and origins. create table raw_extrinsic_metadata ( - type text not null, - target text not null, + target swhid not null, -- metadata source authority_id bigint not null, @@ -443,7 +442,7 @@ metadata bytea not null, -- context - origin text, + origin swhid, visit bigint, snapshot swhid, release swhid, @@ -453,7 +452,6 @@ ); comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object'; -comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found'; comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval'; comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_cran.py @@ -11,15 +11,14 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row FETCHER = MetadataFetcher( @@ -102,7 +101,9 @@ } origin_url = "https://cran.r-project.org/package=ExtremeRisks" - + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() def origin_get(urls): @@ -118,7 +119,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 7, 15, 27, 38, 652281, tzinfo=datetime.timezone.utc, @@ -127,7 +127,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000361aa33842cbdea5fa6e77db696b937ebd269" ), @@ -191,6 +191,9 @@ } origin_url = "https://cran.r-project.org/package=gofgamma" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -207,7 +210,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 4, 30, 11, 1, 57, 832481, tzinfo=datetime.timezone.utc, @@ -216,7 +218,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0000d4ef5e166122aee6862ad38a18ce5386cc3e" ), @@ -269,6 +271,9 @@ } origin_url = "https://cran.r-project.org/package=r2mlm" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -285,7 +290,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 9, 25, 14, 4, 20, 926667, tzinfo=datetime.timezone.utc, @@ -294,7 +298,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:2e223782ee4ba152e4c886f797976241c39a9aab" ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_debian.py @@ -15,12 +15,10 @@ import attr import pytest -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -34,6 +32,7 @@ Timestamp, TimestampWithTimezone, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage import get_storage from swh.storage.interface import ListOrder, PagedResult from swh.storage.migrate_extrinsic_metadata import debian_origins_from_row, handle_row @@ -425,6 +424,9 @@ } origin_url = "deb://Debian/packages/kalgebra" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -437,7 +439,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 26, 22, 3, 24, tzinfo=datetime.timezone.utc, @@ -446,7 +447,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0000036c311ef33a281b05688f6eadcfc0943aee" ), @@ -545,6 +546,9 @@ storage = Mock() origin_url = "http://snapshot.debian.org/package/pymongo" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) deposit_cur = None with patch("debian_origins_from_row", return_value=[origin_url]): @@ -554,7 +558,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2011, 3, 31, 20, 17, 41, tzinfo=datetime.timezone.utc @@ -563,7 +566,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000001c28c8fca01b904de92a2640a866ce03cb7" ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_deposit.py @@ -11,15 +11,14 @@ import json from unittest.mock import MagicMock, Mock, call -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage.migrate_extrinsic_metadata import ( DEPOSIT_COLS, cran_package_from_url, @@ -130,6 +129,9 @@ origin_url = ( "https://www.softwareheritage.org/check-deposit-2020-03-11T11:07:18.424476" ) + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) swhid = ( f"swh:1:dir:ef04a768181417fbc5eef4243e2507915f24deea" @@ -186,7 +188,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 7, 18, 688410, tzinfo=datetime.timezone.utc @@ -195,7 +196,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" ), @@ -205,7 +206,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 3, 11, 11, 11, 36, 336283, tzinfo=datetime.timezone.utc @@ -214,7 +214,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:022310df16fd9e4d4f81fe36a142e82db977c01d" ), @@ -333,6 +333,9 @@ ] origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -352,7 +355,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 54, 0, 413748, tzinfo=datetime.timezone.utc @@ -361,7 +363,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" ), @@ -371,7 +373,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 5, 15, 14, 27, 21, 462270, tzinfo=datetime.timezone.utc @@ -380,7 +381,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0116cab71964d59c8570b4c5729b28bdd63c9b46" ), @@ -498,6 +499,9 @@ origin_url = ( "https://www.softwareheritage.org/check-deposit-2020-06-26T13:50:07.564420" ) + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -517,7 +521,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 8, 216113, tzinfo=datetime.timezone.utc @@ -526,7 +529,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" ), @@ -536,7 +539,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 26, 13, 50, 22, 640625, tzinfo=datetime.timezone.utc @@ -545,7 +547,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0122966e509317aece6a41d0f088da733cc09d0f" ), @@ -662,6 +664,9 @@ ] origin_url = "https://hal.archives-ouvertes.fr/hal-02960679" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -681,7 +686,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 7, 394544, tzinfo=datetime.timezone.utc @@ -690,7 +694,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" ), @@ -700,7 +704,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 10, 9, 13, 38, 25, 888646, tzinfo=datetime.timezone.utc @@ -709,7 +712,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:4a9d637ba507a2b93365250428e6e3f021f194d0" ), @@ -837,6 +840,9 @@ ] origin_url = "https://software.intel.com/f80482de-90a8-4c32-bce4-6f6918d492ff" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -856,7 +862,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc @@ -865,7 +870,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), @@ -875,7 +880,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 28, 33, 210100, tzinfo=datetime.timezone.utc @@ -884,7 +888,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), @@ -894,7 +898,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 5, 14, 7, 49, 36, 775072, tzinfo=datetime.timezone.utc @@ -903,7 +906,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:09356053c49ad0f9e62e51c29d3e617c791140df" ), @@ -1056,6 +1059,9 @@ ] origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -1075,7 +1081,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 17, 12, 49, 30, 645576, tzinfo=datetime.timezone.utc @@ -1084,7 +1089,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:03987f056eaf4596cd20d7b2ee01c9b84ceddfa8" ), @@ -1164,6 +1169,9 @@ ] origin_url = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -1183,7 +1191,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2018, 1, 10, 13, 14, 50, 555143, tzinfo=datetime.timezone.utc @@ -1192,7 +1199,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:2d7bce631fc791080311eb835c47428e586a6ea4" ), @@ -1203,7 +1210,7 @@ ] -def test_deposit_missing_metadata_in_revision(): +def test_deposit_ignore_origin_in_metadata(): extrinsic_metadata = { "id": "hal-01243573", "@xmlns": "http://www.w3.org/2005/Atom", @@ -1297,6 +1304,9 @@ origin_url = "https://hal.archives-ouvertes.fr/hal-01243573" # /!\ not https://hal-test.archives-ouvertes.fr/hal-01243573 # do not trust the metadata! + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -1311,12 +1321,11 @@ deposit_cur.execute.assert_called_once() deposit_cur.__iter__.assert_called_once() - assert storage.method_calls == [ + expected = [ call.origin_get([origin_url]), call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 49, 12, 302745, tzinfo=datetime.timezone.utc @@ -1325,7 +1334,7 @@ fetcher=FETCHER, format="sword-v2-atom-codemeta-v2-in-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" ), @@ -1335,7 +1344,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 2, 25, 15, 54, 30, 102072, tzinfo=datetime.timezone.utc @@ -1344,7 +1352,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:034076f3f41ee1204eb9f64082cbe6e950d7bb8a" ), @@ -1352,3 +1360,4 @@ ] ), ] + assert storage.method_calls == expected diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_gnu.py @@ -11,15 +11,14 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row FETCHER = MetadataFetcher( @@ -76,6 +75,9 @@ } origin_url = "https://ftp.gnu.org/gnu/gperf/" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -92,7 +94,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 11, 27, 11, 17, 38, 318997, tzinfo=datetime.timezone.utc @@ -101,7 +102,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:001c71458e405b25baccc80b99f6634dff9d2b18" ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_nixguix.py @@ -11,15 +11,14 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage.migrate_extrinsic_metadata import cran_package_from_url, handle_row FETCHER = MetadataFetcher( @@ -74,6 +73,9 @@ } origin_url = "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -90,7 +92,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc @@ -99,7 +100,7 @@ fetcher=FETCHER, format="nixguix-sources-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" ), @@ -109,7 +110,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 6, 3, 11, 25, 5, 259341, tzinfo=datetime.timezone.utc @@ -118,7 +118,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:0001ba4dd05394850211d7b3854d9913d23ae379" ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_npm.py @@ -11,15 +11,14 @@ import json from unittest.mock import Mock, call -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, RawExtrinsicMetadata, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage.migrate_extrinsic_metadata import ( handle_row, npm_package_from_source_url, @@ -126,6 +125,9 @@ } origin_url = "https://www.npmjs.com/package/@l3ilkojr/jdinsults" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -142,7 +144,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, @@ -151,7 +152,7 @@ fetcher=FETCHER, format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" ), @@ -161,7 +162,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 2, 27, 1, 35, 47, 965375, tzinfo=datetime.timezone.utc, @@ -170,7 +170,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000002a49bba17ca8cf37f5f3d16aaacf95360fc" ), @@ -231,6 +231,9 @@ } origin_url = "https://www.npmjs.com/package/simplemaps" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -247,7 +250,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, @@ -256,7 +258,7 @@ fetcher=FETCHER, format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" ), @@ -266,7 +268,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2016, 12, 23, 7, 21, 29, tzinfo=datetime.timezone.utc, @@ -275,7 +276,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:000004aeed09ee089c781264c04d2564fd58feb5" ), @@ -332,6 +333,9 @@ } origin_url = "https://www.npmjs.com/package/@piximi/components" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = Mock() @@ -348,7 +352,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, @@ -357,7 +360,7 @@ fetcher=FETCHER, format="replicate-npm-package-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" ), @@ -367,7 +370,6 @@ call.raw_extrinsic_metadata_add( [ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 6, 7, 19, 56, 4, tzinfo=datetime.timezone.utc, @@ -376,7 +378,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=parse_swhid( "swh:1:rev:00002019c5775874bced007a529bd3b78bf60457" ), diff --git a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py --- a/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py +++ b/swh/storage/tests/migrate_extrinsic_metadata/test_pypi.py @@ -13,12 +13,10 @@ import attr -from swh.model.identifiers import parse_swhid from swh.model.model import ( MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -27,6 +25,7 @@ SnapshotBranch, TargetType, ) +from swh.model.swhid import SWHID, SWHIDObjectType, parse_swhid from swh.storage import get_storage from swh.storage.interface import PagedResult from swh.storage.migrate_extrinsic_metadata import ( @@ -266,6 +265,9 @@ } origin_url = "https://pypi.org/project/m3-ui/" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = get_storage("memory") storage.origin_add([Origin(url=origin_url)]) @@ -282,11 +284,10 @@ revision_swhid = parse_swhid("swh:1:rev:000007617b53e7b1458f695dd07de4ce55af1517") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, @@ -295,18 +296,17 @@ fetcher=FETCHER, format="pypi-project-json", metadata=json.dumps(extrinsic_metadata).encode(), - origin=origin_url, + origin=origin_swhid, revision=revision_swhid, ), ], next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2020, 1, 23, 18, 43, 9, 109407, tzinfo=datetime.timezone.utc, @@ -315,7 +315,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=revision_swhid, ), ], @@ -405,11 +405,10 @@ revision_swhid = parse_swhid("swh:1:rev:000004d6382c4ad4c0519266626c36551f0e51ca") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, @@ -425,11 +424,10 @@ next_page_token=None, ) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2019, 1, 23, 22, 10, 55, tzinfo=datetime.timezone.utc, @@ -512,14 +510,13 @@ revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, @@ -580,6 +577,9 @@ } origin_url = "https://pypi.org/project/PyPDFLite/" + origin_swhid = SWHID( + object_type=SWHIDObjectType.ORIGIN, object_id=origin_url.encode() + ) storage = get_storage("memory") @@ -624,14 +624,13 @@ revision_swhid = parse_swhid("swh:1:rev:4ea9917cdf53cd13534a042e4eb3787b86c834d2") assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=PYPI_AUTHORITY, + DIRECTORY_SWHID, authority=PYPI_AUTHORITY, ) == PagedResult(results=[], next_page_token=None,) assert storage.raw_extrinsic_metadata_get( - MetadataTargetType.DIRECTORY, DIRECTORY_SWHID, authority=SWH_AUTHORITY, + DIRECTORY_SWHID, authority=SWH_AUTHORITY, ) == PagedResult( results=[ RawExtrinsicMetadata( - type=MetadataTargetType.DIRECTORY, target=DIRECTORY_SWHID, discovery_date=datetime.datetime( 2014, 5, 7, 22, 3, tzinfo=datetime.timezone.utc, @@ -640,7 +639,7 @@ fetcher=FETCHER, format="original-artifacts-json", metadata=json.dumps(dest_original_artifacts).encode(), - origin=origin_url, + origin=origin_swhid, revision=revision_swhid, ), ], diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -10,7 +10,6 @@ from swh.model import from_disk from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import parse_swhid from swh.model.model import ( Content, Directory, @@ -18,7 +17,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -34,6 +32,23 @@ Timestamp, TimestampWithTimezone, ) +from swh.model.swhid import SWHID +from swh.model.swhid import _swhid_type_map as swhid_type_map +from swh.model.swhid import parse_swhid + + +class SWHIDProvider: + def __init__(self, data): + self._data = data + + def __getattr__(self, name): + return mkswhid(getattr(self._data, name)) + + +def mkswhid(obj): + object_type = swhid_type_map.get(obj.object_type) + if object_type: + return SWHID(object_type=object_type, object_id=obj.id) class StorageData: @@ -41,6 +56,8 @@ """ + swhid: SWHIDProvider + content = Content( data=b"42\n", length=3, @@ -463,9 +480,8 @@ snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot) content_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin.url, + origin=mkswhid(origin), discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), @@ -475,9 +491,8 @@ metadata=b'{"foo": "bar"}', ) content_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin2.url, + origin=mkswhid(origin2), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -487,7 +502,6 @@ metadata=b"foo: bar", ) content_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc @@ -496,7 +510,7 @@ fetcher=attr.evolve(metadata_fetcher2, metadata=None), format="yaml", metadata=b"foo: bar", - origin=origin.url, + origin=mkswhid(origin), visit=42, snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"), release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"), @@ -512,8 +526,7 @@ ) origin_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), @@ -523,8 +536,7 @@ metadata=b'{"foo": "bar"}', ) origin_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -534,8 +546,7 @@ metadata=b"foo: bar", ) origin_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -550,3 +561,6 @@ origin_metadata2, origin_metadata3, ) + + +StorageData.swhid = SWHIDProvider(StorageData) diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -19,11 +19,9 @@ from swh.model import from_disk from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects -from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -3323,23 +3321,17 @@ swh_storage.metadata_authority_add([]) def test_content_metadata_add(self, swh_storage, sample_data): - content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add(content_metadata) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( content_metadata @@ -3356,13 +3348,10 @@ def test_content_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" - content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) new_content_metadata2 = attr.evolve( content_metadata2, format="new-format", metadata=b"new-metadata", @@ -3374,9 +3363,7 @@ swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) assert result.next_page_token is None expected_results1 = (content_metadata, new_content_metadata2) @@ -3397,8 +3384,8 @@ content1_metadata3, ) = sample_data.content_metadata[:3] - content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) - content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) + content1_swhid = sample_data.swhid.content + content2_swhid = sample_data.swhid.content2 content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid) swh_storage.metadata_authority_add([authority, authority2]) @@ -3413,43 +3400,34 @@ ] ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority) assert result.next_page_token is None assert [content1_metadata1, content1_metadata2] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority2 - ) + result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2) assert result.next_page_token is None assert [content1_metadata3] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content2_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority) assert result.next_page_token is None assert [content2_metadata] == list(result.results,) def test_content_metadata_get_after(self, swh_storage, sample_data): - content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, after=content_metadata.discovery_date - timedelta(seconds=1), @@ -3460,62 +3438,46 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - after=content_metadata.discovery_date, + content_swhid, authority, after=content_metadata.discovery_date, ) assert result.next_page_token is None assert result.results == [content_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - after=content_metadata2.discovery_date, + content_swhid, authority, after=content_metadata2.discovery_date, ) assert result.next_page_token is None assert result.results == [] def test_content_metadata_get_paginate(self, swh_storage, sample_data): - content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - limit=1, - page_token=result.next_page_token, + content_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [content_metadata2] def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): - content = sample_data.content + content_swhid = sample_data.swhid.content fetcher1, fetcher2 = sample_data.fetchers[:2] authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) swh_storage.metadata_authority_add([authority]) @@ -3530,38 +3492,20 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - limit=1, - page_token=result.next_page_token, + content_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [new_content_metadata2] - def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): - origin = sample_data.origin - fetcher = sample_data.metadata_fetcher - authority = sample_data.metadata_authority - content_metadata, content_metadata2 = sample_data.content_metadata[:2] - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, origin.url, authority - ) - def test_origin_metadata_add(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3573,9 +3517,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ origin_metadata, @@ -3596,6 +3538,7 @@ def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3611,9 +3554,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None # which of the two behavior happens is backend-specific. @@ -3626,7 +3567,10 @@ ) def test_origin_metadata_get(self, swh_storage, sample_data): - origin, origin2 = sample_data.origins[:2] + origin = sample_data.origin + origin_swhid = sample_data.swhid.origin + origin2 = sample_data.origin2 + origin2_swhid = sample_data.swhid.origin2 fetcher, fetcher2 = sample_data.fetchers[:2] authority, authority2 = sample_data.authorities[:2] ( @@ -3637,7 +3581,7 @@ assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} - origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url) + origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid) swh_storage.metadata_authority_add([authority, authority2]) swh_storage.metadata_fetcher_add([fetcher, fetcher2]) @@ -3646,30 +3590,25 @@ [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None assert [origin1_metadata1, origin1_metadata2] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority2 - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2) assert result.next_page_token is None assert [origin1_metadata3] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin2.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority) assert result.next_page_token is None assert [origin2_metadata] == list(result.results,) def test_origin_metadata_get_after(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3681,8 +3620,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + origin_swhid, authority, after=origin_metadata.discovery_date - timedelta(seconds=1), ) @@ -3693,25 +3631,20 @@ ] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - after=origin_metadata.discovery_date, + origin_swhid, authority, after=origin_metadata.discovery_date, ) assert result.next_page_token is None assert result.results == [origin_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - after=origin_metadata2.discovery_date, + origin_swhid, authority, after=origin_metadata2.discovery_date, ) assert result.next_page_token is None assert result.results == [] def test_origin_metadata_get_paginate(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3722,28 +3655,23 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + origin_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - limit=1, - page_token=result.next_page_token, + origin_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [origin_metadata2] def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher1, fetcher2 = sample_data.fetchers[:2] authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3761,17 +3689,13 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + origin_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - limit=1, - page_token=result.next_page_token, + origin_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [new_origin_metadata2] @@ -3798,24 +3722,6 @@ with pytest.raises(StorageArgumentException, match="fetcher"): swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): - origin = sample_data.origin - authority = sample_data.metadata_authority - fetcher = sample_data.metadata_fetcher - origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] - content_metadata = sample_data.content_metadata[0] - assert swh_storage.origin_add([origin]) == {"origin:add": 1} - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - - swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, content_metadata.target, authority, - ) - class TestStorageGeneratedData: def test_generate_content_get_data(self, swh_storage, swh_contents):