diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -11,14 +11,12 @@ from swh.core.api import remote_api_endpoint from swh.core.api.classes import PagedResult as CorePagedResult -from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -31,6 +29,7 @@ Snapshot, SnapshotBranch, ) +from swh.model.swhid import SWHID, SWHIDObjectType class ListOrder(Enum): @@ -1099,8 +1098,7 @@ @remote_api_endpoint("raw_extrinsic_metadata/get") def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: SWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1109,8 +1107,7 @@ """Retrieve list of all raw_extrinsic_metadata entries for the id Args: - type: one of the values of swh.model.model.MetadataTargetType - target: an URL if type is 'origin', else a core SWHID + target: a core SWHID authority: a dict containing keys `type` and `url`. after: minimum discovery_date for a result to be returned page_token: opaque token, used to get the next page of results diff --git a/swh/storage/postgresql/converters.py b/swh/storage/postgresql/converters.py --- a/swh/storage/postgresql/converters.py +++ b/swh/storage/postgresql/converters.py @@ -4,15 +4,16 @@ # See top-level LICENSE file for more information import datetime -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple + +import attr from swh.core.utils import encode_with_unescape -from swh.model.identifiers import parse_swhid from swh.model.model import ( + SWHID, MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Person, RawExtrinsicMetadata, @@ -22,6 +23,7 @@ Timestamp, TimestampWithTimezone, ) +from swh.model.swhid import SWHIDObjectType, parse_swhid from ..utils import map_optional @@ -142,6 +144,24 @@ } +def swhid_to_db(swhid: SWHID) -> Optional[Tuple[int, str, bytes]]: + if swhid is not None: + assert not swhid.metadata + assert swhid.namespace == "swh" + return (swhid.scheme_version, swhid.object_type.value, swhid.object_id) + + +def db_to_swhid(db_swhid: Optional[Tuple[int, str, bytes]]) -> SWHID: + if db_swhid: + return SWHID( + namespace="swh", + scheme_version=db_swhid[0], + object_type=db_swhid[1], + object_id=db_swhid[2], + metadata={}, + ) + + def revision_to_db(revision: Revision) -> Dict[str, Any]: """Convert a swh-model revision to its database representation. """ @@ -295,13 +315,8 @@ def db_to_raw_extrinsic_metadata(row) -> RawExtrinsicMetadata: - type_ = MetadataTargetType(row["raw_extrinsic_metadata.type"]) - target = row["raw_extrinsic_metadata.target"] - if type_ != MetadataTargetType.ORIGIN: - target = parse_swhid(target) return RawExtrinsicMetadata( - type=type_, - target=target, + target=row["raw_extrinsic_metadata.target"], authority=MetadataAuthority( type=MetadataAuthorityType(row["metadata_authority.type"]), url=row["metadata_authority.url"], @@ -314,9 +329,9 @@ metadata=row["raw_extrinsic_metadata.metadata"], origin=row["origin"], visit=row["visit"], - snapshot=map_optional(parse_swhid, row["snapshot"]), - release=map_optional(parse_swhid, row["release"]), - revision=map_optional(parse_swhid, row["revision"]), + snapshot=row["snapshot"], + release=row["release"], + revision=row["revision"], path=row["path"], - directory=map_optional(parse_swhid, row["directory"]), + directory=row["directory"], ) diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -6,15 +6,22 @@ import datetime import logging import random +import re import select from typing import Any, Dict, Iterable, List, Optional, Tuple +import psycopg2 + from swh.core.db import BaseDb from swh.core.db.db_utils import execute_values_generator from swh.core.db.db_utils import jsonize as _jsonize from swh.core.db.db_utils import stored_procedure +from swh.model.hashutil import hash_to_bytes from swh.model.model import SHA1_SIZE, OriginVisit, OriginVisitStatus +from swh.model.swhid import SWHID +from swh.model.swhid import _swhid_type_map as swhid_typemap from swh.storage.interface import ListOrder +from swh.storage.postgresql.converters import swhid_to_db logger = logging.getLogger(__name__) @@ -23,6 +30,35 @@ return _jsonize(dict(d) if d is not None else None) +def typecast_swhid(value, cur): + if value is None: + return None + m = re.match(r'\(([^)]+),([^)]+),"([^)]+)"\)', value) + if m: + return SWHID( + scheme_version=int(m.group(1)), + object_type=swhid_typemap[m.group(2)], + object_id=hash_to_bytes(m.group(3)[3:]), + ) + else: + raise InterfaceError("bad SWHID representation: %r" % value) + + +def adapt_swhid(swhid: SWHID): + value = psycopg2.extensions.AsIs( + ( + b"ROW(%d, '%s'::swhid_type, '\\x%s'::bytea)" + % ( + swhid.scheme_version, + swhid.object_type.value.encode(), + swhid.object_id.encode(), + ) + ).decode() + ) + print("VALUE", value) + return value + + class Db(BaseDb): """Proxy to the SWH DB, with wrappers around stored procedures @@ -30,6 +66,30 @@ current_version = 166 + def __init__( + self, + conn: psycopg2.extensions.connection, + pool: Optional[psycopg2.pool.AbstractConnectionPool] = None, + ): + super().__init__(conn, pool) + with conn.cursor() as cur: + cur.execute( + """ + SELECT pg_type.oid + FROM pg_type + JOIN pg_namespace + ON typnamespace = pg_namespace.oid + WHERE typname = %(typename)s + AND nspname = %(namespace)s""", + {"typename": "swhid", "namespace": "public"}, + ) + + oid = cur.fetchone()[0] + + t_SWHID = psycopg2.extensions.new_type((oid,), "SWHID", typecast_swhid) + psycopg2.extensions.register_type(t_SWHID, conn) + psycopg2.extensions.register_adapter(SWHID, adapt_swhid) + def mktemp_dir_entry(self, entry_type, cur=None): self._cursor(cur).execute( "SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),) @@ -1136,7 +1196,6 @@ """The list of context columns for all artifact types.""" _raw_extrinsic_metadata_insert_cols = [ - "type", "target", "authority_id", "fetcher_id", @@ -1158,7 +1217,6 @@ raw_extrinsic_metadata_get_cols = [ "raw_extrinsic_metadata.target", - "raw_extrinsic_metadata.type", "discovery_date", "metadata_authority.type", "metadata_authority.url", @@ -1179,30 +1237,29 @@ INNER JOIN metadata_authority ON (metadata_authority.id=authority_id) INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id) - WHERE raw_extrinsic_metadata.target=%s AND authority_id=%s + WHERE (raw_extrinsic_metadata.target)=%s + AND authority_id=%s """ def raw_extrinsic_metadata_add( self, - type: str, - target: str, + target: SWHID, discovery_date: datetime.datetime, authority_id: int, fetcher_id: int, format: str, metadata: bytes, - origin: Optional[str], + origin: Optional[SWHID], visit: Optional[int], - snapshot: Optional[str], - release: Optional[str], - revision: Optional[str], + snapshot: Optional[SWHID], + release: Optional[SWHID], + revision: Optional[SWHID], path: Optional[bytes], - directory: Optional[str], + directory: Optional[SWHID], cur, ): query = self._raw_extrinsic_metadata_insert_query args: Dict[str, Any] = dict( - type=type, target=target, authority_id=authority_id, fetcher_id=fetcher_id, @@ -1224,8 +1281,7 @@ def raw_extrinsic_metadata_get( self, - type: str, - target: str, + target: SWHID, authority_id: int, after_time: Optional[datetime.datetime], after_fetcher: Optional[int], diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -19,7 +19,6 @@ from swh.core.api.serializers import msgpack_dumps, msgpack_loads from swh.core.db.common import db_transaction, db_transaction_generator from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex -from swh.model.identifiers import SWHID from swh.model.model import ( SHA1_SIZE, Content, @@ -27,7 +26,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -41,6 +39,7 @@ SnapshotBranch, TargetType, ) +from swh.model.swhid import SWHID, SWHIDObjectType from swh.storage.exc import HashCollision, StorageArgumentException, StorageDBError from swh.storage.interface import ( VISIT_STATUSES, @@ -1256,26 +1255,18 @@ ) -> None: metadata = list(metadata) self.journal_writer.raw_extrinsic_metadata_add(metadata) - counter = Counter[MetadataTargetType]() + counter = Counter[SWHIDObjectType]() for metadata_entry in metadata: - authority_id = self._get_authority_id(metadata_entry.authority, db, cur) - fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) + d = attr.asdict( + metadata_entry, recurse=False + ) # we want SWHID not converted + d.pop("authority") + d.pop("fetcher") db.raw_extrinsic_metadata_add( - type=metadata_entry.type.value, - target=str(metadata_entry.target), - discovery_date=metadata_entry.discovery_date, - authority_id=authority_id, - fetcher_id=fetcher_id, - format=metadata_entry.format, - metadata=metadata_entry.metadata, - origin=metadata_entry.origin, - visit=metadata_entry.visit, - snapshot=map_optional(str, metadata_entry.snapshot), - release=map_optional(str, metadata_entry.release), - revision=map_optional(str, metadata_entry.revision), - path=metadata_entry.path, - directory=map_optional(str, metadata_entry.directory), + **d, + authority_id=self._get_authority_id(metadata_entry.authority, db, cur), + fetcher_id=self._get_fetcher_id(metadata_entry.fetcher, db, cur), cur=cur, ) counter[metadata_entry.type] += 1 @@ -1290,8 +1281,7 @@ @db_transaction() def raw_extrinsic_metadata_get( self, - type: MetadataTargetType, - target: Union[str, SWHID], + target: SWHID, authority: MetadataAuthority, after: Optional[datetime.datetime] = None, page_token: Optional[bytes] = None, @@ -1299,19 +1289,6 @@ db=None, cur=None, ) -> PagedResult[RawExtrinsicMetadata]: - if type == MetadataTargetType.ORIGIN: - if isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type='origin', " - f"but provided target is a SWHID: {target!r}" - ) - else: - if not isinstance(target, SWHID): - raise StorageArgumentException( - f"raw_extrinsic_metadata_get called with type!='origin', " - f"but provided target is not a SWHID: {target!r}" - ) - if page_token: (after_time, after_fetcher) = msgpack_loads(base64.b64decode(page_token)) if after and after_time < after: @@ -1327,12 +1304,13 @@ return PagedResult(next_page_token=None, results=[],) rows = db.raw_extrinsic_metadata_get( - type, str(target), authority_id, after_time, after_fetcher, limit + 1, cur, + target, authority_id, after_time, after_fetcher, limit + 1, cur, ) + rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows] results = [] for row in rows: - assert str(target) == row["raw_extrinsic_metadata.target"] + assert target == row["raw_extrinsic_metadata.target"] results.append(converters.db_to_raw_extrinsic_metadata(row)) if len(results) > limit: diff --git a/swh/storage/sql/20-enums.sql b/swh/storage/sql/20-enums.sql --- a/swh/storage/sql/20-enums.sql +++ b/swh/storage/sql/20-enums.sql @@ -23,3 +23,5 @@ 'failed' ); comment on type origin_visit_state IS 'Possible origin visit status values'; + +create type swhid_type as enum ('cnt', 'dir', 'rev', 'rel', 'snp', 'ori'); diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -38,7 +38,9 @@ create domain file_perms as int; -- an SWHID -create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); +-- create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); + +create type swhid as (version int, object_type swhid_type, object_id bytea); -- Checksums about actual file content. Note that the content itself is not @@ -430,8 +432,7 @@ -- Extrinsic metadata on a DAG objects and origins. create table raw_extrinsic_metadata ( - type text not null, - target text not null, + target swhid not null, -- metadata source authority_id bigint not null, @@ -443,7 +444,7 @@ metadata bytea not null, -- context - origin text, + origin swhid, visit bigint, snapshot swhid, release swhid, @@ -453,7 +454,6 @@ ); comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object'; -comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; comment on column raw_extrinsic_metadata.target is 'the SWHID or origin URL for which the metadata was found'; comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval'; comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -10,7 +10,6 @@ from swh.model import from_disk from swh.model.hashutil import hash_to_bytes, hash_to_hex -from swh.model.identifiers import parse_swhid from swh.model.model import ( Content, Directory, @@ -18,7 +17,6 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, - MetadataTargetType, ObjectType, Origin, OriginVisit, @@ -34,6 +32,23 @@ Timestamp, TimestampWithTimezone, ) +from swh.model.swhid import SWHID, SWHIDObjectType +from swh.model.swhid import _swhid_type_map as swhid_type_map +from swh.model.swhid import parse_swhid + + +class SWHIDProvider: + def __init__(self, data): + self._data = data + + def __getattr__(self, name): + return mkswhid(getattr(self._data, name)) + + +def mkswhid(obj): + object_type = swhid_type_map.get(obj.object_type) + if object_type: + return SWHID(object_type=object_type, object_id=obj.id) class StorageData: @@ -463,9 +478,8 @@ snapshots: Tuple[Snapshot, ...] = (snapshot, empty_snapshot, complete_snapshot) content_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin.url, + origin=mkswhid(origin), discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), @@ -475,9 +489,8 @@ metadata=b'{"foo": "bar"}', ) content_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), - origin=origin2.url, + origin=mkswhid(origin2), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -487,7 +500,6 @@ metadata=b"foo: bar", ) content_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.CONTENT, target=parse_swhid(f"swh:1:cnt:{hash_to_hex(content.sha1_git)}"), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc @@ -496,7 +508,7 @@ fetcher=attr.evolve(metadata_fetcher2, metadata=None), format="yaml", metadata=b"foo: bar", - origin=origin.url, + origin=mkswhid(origin), visit=42, snapshot=parse_swhid(f"swh:1:snp:{hash_to_hex(snapshot.id)}"), release=parse_swhid(f"swh:1:rel:{hash_to_hex(release.id)}"), @@ -512,8 +524,7 @@ ) origin_metadata1 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), @@ -523,8 +534,7 @@ metadata=b'{"foo": "bar"}', ) origin_metadata2 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -534,8 +544,7 @@ metadata=b"foo: bar", ) origin_metadata3 = RawExtrinsicMetadata( - type=MetadataTargetType.ORIGIN, - target=origin.url, + target=mkswhid(origin), discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), @@ -550,3 +559,6 @@ origin_metadata2, origin_metadata3, ) + + +StorageData.swhid = SWHIDProvider(StorageData) diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -19,11 +19,9 @@ from swh.model import from_disk from swh.model.hashutil import hash_to_bytes from swh.model.hypothesis_strategies import objects -from swh.model.identifiers import SWHID from swh.model.model import ( Content, Directory, - MetadataTargetType, Origin, OriginVisit, OriginVisitStatus, @@ -33,6 +31,7 @@ Snapshot, TargetType, ) +from swh.model.swhid import SWHID, SWHIDObjectType from swh.storage import get_storage from swh.storage.common import origin_url_to_sha1 as sha1 from swh.storage.exc import HashCollision, StorageArgumentException @@ -3324,22 +3323,17 @@ def test_content_metadata_add(self, swh_storage, sample_data): content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add(content_metadata) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date,)) == list( content_metadata @@ -3357,12 +3351,10 @@ def test_content_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID( - object_type="content", object_id=hash_to_bytes(content.sha1_git) - ) new_content_metadata2 = attr.evolve( content_metadata2, format="new-format", metadata=b"new-metadata", @@ -3374,9 +3366,7 @@ swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) assert result.next_page_token is None expected_results1 = (content_metadata, new_content_metadata2) @@ -3397,8 +3387,8 @@ content1_metadata3, ) = sample_data.content_metadata[:3] - content1_swhid = SWHID(object_type="content", object_id=content.sha1_git) - content2_swhid = SWHID(object_type="content", object_id=content2.sha1_git) + content1_swhid = sample_data.swhid.content + content2_swhid = sample_data.swhid.content2 content2_metadata = attr.evolve(content1_metadata2, target=content2_swhid) swh_storage.metadata_authority_add([authority, authority2]) @@ -3413,43 +3403,35 @@ ] ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority) assert result.next_page_token is None assert [content1_metadata1, content1_metadata2] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content1_swhid, authority2 - ) + result = swh_storage.raw_extrinsic_metadata_get(content1_swhid, authority2) assert result.next_page_token is None assert [content1_metadata3] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content2_swhid, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(content2_swhid, authority) assert result.next_page_token is None assert [content2_metadata] == list(result.results,) def test_content_metadata_get_after(self, swh_storage, sample_data): content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, after=content_metadata.discovery_date - timedelta(seconds=1), @@ -3460,62 +3442,48 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - after=content_metadata.discovery_date, + content_swhid, authority, after=content_metadata.discovery_date, ) assert result.next_page_token is None assert result.results == [content_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - after=content_metadata2.discovery_date, + content_swhid, authority, after=content_metadata2.discovery_date, ) assert result.next_page_token is None assert result.results == [] def test_content_metadata_get_paginate(self, swh_storage, sample_data): content = sample_data.content + content_swhid = sample_data.swhid.content fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority - ) + swh_storage.raw_extrinsic_metadata_get(content_swhid, authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - limit=1, - page_token=result.next_page_token, + content_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [content_metadata2] def test_content_metadata_get_paginate_same_date(self, swh_storage, sample_data): content = sample_data.content + content_swhid = sample_data.swhid.content fetcher1, fetcher2 = sample_data.fetchers[:2] authority = sample_data.metadata_authority content_metadata, content_metadata2 = sample_data.content_metadata[:2] - content_swhid = SWHID(object_type="content", object_id=content.sha1_git) - swh_storage.metadata_fetcher_add([fetcher1, fetcher2]) swh_storage.metadata_authority_add([authority]) @@ -3530,38 +3498,20 @@ ) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, content_swhid, authority, limit=1 + content_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [content_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, - content_swhid, - authority, - limit=1, - page_token=result.next_page_token, + content_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [new_content_metadata2] - def test_content_metadata_get__invalid_id(self, swh_storage, sample_data): - origin = sample_data.origin - fetcher = sample_data.metadata_fetcher - authority = sample_data.metadata_authority - content_metadata, content_metadata2 = sample_data.content_metadata[:2] - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.CONTENT, origin.url, authority - ) - def test_origin_metadata_add(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3573,9 +3523,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None assert list(sorted(result.results, key=lambda x: x.discovery_date)) == [ origin_metadata, @@ -3596,6 +3544,7 @@ def test_origin_metadata_add_duplicate(self, swh_storage, sample_data): """Duplicates should be silently updated.""" origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3611,9 +3560,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None # which of the two behavior happens is backend-specific. @@ -3626,7 +3573,10 @@ ) def test_origin_metadata_get(self, swh_storage, sample_data): - origin, origin2 = sample_data.origins[:2] + origin = sample_data.origin + origin_swhid = sample_data.swhid.origin + origin2 = sample_data.origin2 + origin2_swhid = sample_data.swhid.origin2 fetcher, fetcher2 = sample_data.fetchers[:2] authority, authority2 = sample_data.authorities[:2] ( @@ -3637,7 +3587,7 @@ assert swh_storage.origin_add([origin, origin2]) == {"origin:add": 2} - origin2_metadata = attr.evolve(origin1_metadata2, target=origin2.url) + origin2_metadata = attr.evolve(origin1_metadata2, target=origin2_swhid) swh_storage.metadata_authority_add([authority, authority2]) swh_storage.metadata_fetcher_add([fetcher, fetcher2]) @@ -3646,30 +3596,25 @@ [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) assert result.next_page_token is None assert [origin1_metadata1, origin1_metadata2] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority2 - ) + result = swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority2) assert result.next_page_token is None assert [origin1_metadata3] == list( sorted(result.results, key=lambda x: x.discovery_date,) ) - result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin2.url, authority - ) + result = swh_storage.raw_extrinsic_metadata_get(origin2_swhid, authority) assert result.next_page_token is None assert [origin2_metadata] == list(result.results,) def test_origin_metadata_get_after(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3681,8 +3626,7 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, + origin_swhid, authority, after=origin_metadata.discovery_date - timedelta(seconds=1), ) @@ -3693,25 +3637,20 @@ ] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - after=origin_metadata.discovery_date, + origin_swhid, authority, after=origin_metadata.discovery_date, ) assert result.next_page_token is None assert result.results == [origin_metadata2] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - after=origin_metadata2.discovery_date, + origin_swhid, authority, after=origin_metadata2.discovery_date, ) assert result.next_page_token is None assert result.results == [] def test_origin_metadata_get_paginate(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher = sample_data.metadata_fetcher authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3722,28 +3661,23 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority - ) + swh_storage.raw_extrinsic_metadata_get(origin_swhid, authority) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + origin_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - limit=1, - page_token=result.next_page_token, + origin_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [origin_metadata2] def test_origin_metadata_get_paginate_same_date(self, swh_storage, sample_data): origin = sample_data.origin + origin_swhid = sample_data.swhid.origin fetcher1, fetcher2 = sample_data.fetchers[:2] authority = sample_data.metadata_authority origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] @@ -3761,17 +3695,13 @@ swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, origin.url, authority, limit=1 + origin_swhid, authority, limit=1 ) assert result.next_page_token is not None assert result.results == [origin_metadata] result = swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, - origin.url, - authority, - limit=1, - page_token=result.next_page_token, + origin_swhid, authority, limit=1, page_token=result.next_page_token, ) assert result.next_page_token is None assert result.results == [new_origin_metadata2] @@ -3798,24 +3728,6 @@ with pytest.raises(StorageArgumentException, match="fetcher"): swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): - origin = sample_data.origin - authority = sample_data.metadata_authority - fetcher = sample_data.metadata_fetcher - origin_metadata, origin_metadata2 = sample_data.origin_metadata[:2] - content_metadata = sample_data.content_metadata[0] - assert swh_storage.origin_add([origin]) == {"origin:add": 1} - - swh_storage.metadata_fetcher_add([fetcher]) - swh_storage.metadata_authority_add([authority]) - - swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - - with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.raw_extrinsic_metadata_get( - MetadataTargetType.ORIGIN, content_metadata.target, authority, - ) - class TestStorageGeneratedData: def test_generate_content_get_data(self, swh_storage, swh_contents):