diff --git a/docs/extrinsic-metadata-specification.rst b/docs/extrinsic-metadata-specification.rst --- a/docs/extrinsic-metadata-specification.rst +++ b/docs/extrinsic-metadata-specification.rst @@ -122,9 +122,11 @@ * Adding metadata:: - origin_metadata_add(origin_url, discovery_date, - authority, fetcher, - format, metadata) + raw_extrinsic_metadata_add( + "origin", origin_url, discovery_date, + authority, fetcher, + format, metadata + ) which adds a new `metadata` byte string obtained from a given authority and associated to the origin. @@ -138,7 +140,9 @@ * Getting latest metadata:: - origin_metadata_get_latest(origin_url, authority) + raw_extrinsic_metadata_get_latest( + "origin", origin_url, authority + ) where `authority` must be a dict containing keys `type` and `url`, which returns a dictionary corresponding to the latest metadata entry @@ -156,9 +160,11 @@ * Getting all metadata:: - origin_metadata_get(origin_url, - authority, - page_token, limit) + raw_extrinsic_metadata_get( + "origin", origin_url, + authority, + page_token, limit + ) where `authority` must be a dict containing keys `type` and `url` which returns a dictionary with keys: @@ -208,29 +214,33 @@ This is why it is important to qualify the metadata with the complete context for which it is intended, if any. -for each artifact type ````, there are two endpoints -to manipulate metadata associated with artifacts of that type: +The same two endpoints as for origin can be used, but with a different +value for the first argument: * Adding metadata:: - _metadata_add(id, context, discovery_date, - authority, fetcher, - format, metadata) + raw_extrinsic_metadata_add( + type, id, context, discovery_date, + authority, fetcher, + format, metadata + ) * Getting all metadata:: - _metadata_get(id, - authority, - after, - page_token, limit) + raw_extrinsic_metadata_get( + type, id, + authority, + after, + page_token, limit + ) definited similarly to ``origin_metadata_add`` and ``origin_metadata_get``, but where ``id`` is a core SWHID (with type matching ````), and with an extra ``context`` (argument when adding metadata, and dictionary key when getting them) that is a dictionary with keys -depending on the artifact type ````: +depending on the artifact ``type``: * for ``snapshot``: ``origin`` (a URL) and ``visit`` (an integer) * for ``release``: those above, plus ``snapshot`` diff --git a/sql/upgrades/159.sql b/sql/upgrades/159.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/159.sql @@ -0,0 +1,22 @@ +-- SWH DB schema upgrade +-- from_version: 158 +-- to_version: 159 +-- description: Rename "object_metadata" to "raw_extrinsic_metadata" + +-- latest schema version +insert into dbversion(version, release, description) + values(159, now(), 'Work Still In Progress'); + +alter table object_metadata + rename to raw_extrinsic_metadata; + +alter index object_metadata_content_authority_date_fetcher + rename to raw_extrinsic_metadata_content_authority_date_fetcher; + +alter table raw_extrinsic_metadata + rename constraint object_metadata_authority_fkey + to raw_extrinsic_metadata_authority_fkey; + +alter table raw_extrinsic_metadata + rename constraint object_metadata_fetcher_fkey + to raw_extrinsic_metadata_fetcher_fkey; diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py --- a/swh/storage/cassandra/cql.py +++ b/swh/storage/cassandra/cql.py @@ -870,10 +870,10 @@ return next(iter(self._execute_with_retries(statement, [name, version])), None) ######################### - # 'object_metadata' table + # 'raw_extrinsic_metadata' table ######################### - _object_metadata_keys = [ + _raw_extrinsic_metadata_keys = [ "type", "id", "authority_type", @@ -893,23 +893,24 @@ ] @_prepared_statement( - f"INSERT INTO object_metadata ({', '.join(_object_metadata_keys)}) " - f"VALUES ({', '.join('?' for _ in _object_metadata_keys)})" + f"INSERT INTO raw_extrinsic_metadata " + f" ({', '.join(_raw_extrinsic_metadata_keys)}) " + f"VALUES ({', '.join('?' for _ in _raw_extrinsic_metadata_keys)})" ) - def object_metadata_add( + def raw_extrinsic_metadata_add( self, statement, **kwargs, ): assert set(kwargs) == set( - self._object_metadata_keys + self._raw_extrinsic_metadata_keys ), f"Bad kwargs: {set(kwargs)}" - params = [kwargs[key] for key in self._object_metadata_keys] + params = [kwargs[key] for key in self._raw_extrinsic_metadata_keys] return self._execute_with_retries(statement, params,) @_prepared_statement( - "SELECT * from object_metadata " + "SELECT * from raw_extrinsic_metadata " "WHERE id=? AND authority_url=? AND discovery_date>? AND authority_type=?" ) - def object_metadata_get_after_date( + def raw_extrinsic_metadata_get_after_date( self, id: str, authority_type: str, @@ -923,11 +924,11 @@ ) @_prepared_statement( - "SELECT * from object_metadata " + "SELECT * from raw_extrinsic_metadata " "WHERE id=? AND authority_type=? AND authority_url=? " "AND (discovery_date, fetcher_name, fetcher_version) > (?, ?, ?)" ) - def object_metadata_get_after_date_and_fetcher( + def raw_extrinsic_metadata_get_after_date_and_fetcher( self, id: str, authority_type: str, @@ -951,10 +952,10 @@ ) @_prepared_statement( - "SELECT * from object_metadata " + "SELECT * from raw_extrinsic_metadata " "WHERE id=? AND authority_url=? AND authority_type=?" ) - def object_metadata_get( + def raw_extrinsic_metadata_get( self, id: str, authority_type: str, authority_url: str, *, statement ) -> Iterable[Row]: return self._execute_with_retries( diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py --- a/swh/storage/cassandra/schema.py +++ b/swh/storage/cassandra/schema.py @@ -205,7 +205,7 @@ ); -CREATE TABLE IF NOT EXISTS object_metadata ( +CREATE TABLE IF NOT EXISTS raw_extrinsic_metadata ( type text, id text, @@ -262,7 +262,7 @@ TABLES = ( "skipped_content content revision revision_parent release " "directory directory_entry snapshot snapshot_branch " - "origin_visit origin object_metadata object_count " + "origin_visit origin raw_extrinsic_metadata object_count " "origin_visit_status metadata_authority " "metadata_fetcher" ).split() diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -965,7 +965,9 @@ def refresh_stat_counters(self): pass - def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata]) -> None: + def raw_extrinsic_metadata_add( + self, metadata: Iterable[RawExtrinsicMetadata] + ) -> None: for metadata_entry in metadata: if not self._cql_runner.metadata_authority_get( metadata_entry.authority.type.value, metadata_entry.authority.url @@ -981,7 +983,7 @@ ) try: - self._cql_runner.object_metadata_add( + self._cql_runner.raw_extrinsic_metadata_add( type=metadata_entry.type.value, id=str(metadata_entry.id), authority_type=metadata_entry.authority.type.value, @@ -1002,7 +1004,7 @@ except TypeError as e: raise StorageArgumentException(*e.args) - def object_metadata_get( + def raw_extrinsic_metadata_get( self, object_type: MetadataTargetType, id: Union[str, SWHID], @@ -1014,14 +1016,14 @@ if object_type == MetadataTargetType.ORIGIN: if isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type='origin', but " - f"provided id is an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type='origin', " + f"but provided id is an SWHID: {id!r}" ) else: if not isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type!='origin', but " - f"provided id is not an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type!='origin', " + f"but provided id is not an SWHID: {id!r}" ) if page_token is not None: @@ -1032,7 +1034,7 @@ raise StorageArgumentException( "page_token is inconsistent with the value of 'after'." ) - entries = self._cql_runner.object_metadata_get_after_date_and_fetcher( + entries = self._cql_runner.raw_extrinsic_metadata_get_after_date_and_fetcher( # noqa str(id), authority.type.value, authority.url, @@ -1041,11 +1043,11 @@ after_fetcher_url, ) elif after is not None: - entries = self._cql_runner.object_metadata_get_after_date( + entries = self._cql_runner.raw_extrinsic_metadata_get_after_date( str(id), authority.type.value, authority.url, after ) else: - entries = self._cql_runner.object_metadata_get( + entries = self._cql_runner.raw_extrinsic_metadata_get( str(id), authority.type.value, authority.url ) diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -1086,7 +1086,7 @@ def release_get_random(self, cur=None): return self._get_random_row_from_table("release", ["id"], "id", cur) - _object_metadata_context_cols = [ + _raw_extrinsic_metadata_context_cols = [ "origin", "visit", "snapshot", @@ -1097,7 +1097,7 @@ ] """The list of context columns for all artifact types.""" - _object_metadata_insert_cols = [ + _raw_extrinsic_metadata_insert_cols = [ "type", "id", "authority_id", @@ -1105,46 +1105,46 @@ "discovery_date", "format", "metadata", - *_object_metadata_context_cols, + *_raw_extrinsic_metadata_context_cols, ] - """List of columns of the object_metadata table, used when writing + """List of columns of the raw_extrinsic_metadata table, used when writing metadata.""" - _object_metadata_insert_query = f""" - INSERT INTO object_metadata - ({', '.join(_object_metadata_insert_cols)}) - VALUES ({', '.join('%s' for _ in _object_metadata_insert_cols)}) + _raw_extrinsic_metadata_insert_query = f""" + INSERT INTO raw_extrinsic_metadata + ({', '.join(_raw_extrinsic_metadata_insert_cols)}) + VALUES ({', '.join('%s' for _ in _raw_extrinsic_metadata_insert_cols)}) ON CONFLICT (id, authority_id, discovery_date, fetcher_id) DO NOTHING """ - object_metadata_get_cols = [ - "object_metadata.id", - "object_metadata.type", + raw_extrinsic_metadata_get_cols = [ + "raw_extrinsic_metadata.id", + "raw_extrinsic_metadata.type", "discovery_date", "metadata_authority.type", "metadata_authority.url", "metadata_fetcher.id", "metadata_fetcher.name", "metadata_fetcher.version", - *_object_metadata_context_cols, + *_raw_extrinsic_metadata_context_cols, "format", - "object_metadata.metadata", + "raw_extrinsic_metadata.metadata", ] - """List of columns of the object_metadata, metadata_authority, + """List of columns of the raw_extrinsic_metadata, metadata_authority, and metadata_fetcher tables, used when reading object metadata.""" - _object_metadata_select_query = f""" + _raw_extrinsic_metadata_select_query = f""" SELECT - {', '.join(object_metadata_get_cols)} - FROM object_metadata + {', '.join(raw_extrinsic_metadata_get_cols)} + FROM raw_extrinsic_metadata INNER JOIN metadata_authority ON (metadata_authority.id=authority_id) INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id) - WHERE object_metadata.id=%s AND authority_id=%s + WHERE raw_extrinsic_metadata.id=%s AND authority_id=%s """ - def object_metadata_add( + def raw_extrinsic_metadata_add( self, object_type: str, id: str, @@ -1162,7 +1162,7 @@ directory: Optional[str], cur, ): - query = self._object_metadata_insert_query + query = self._raw_extrinsic_metadata_insert_query args: Dict[str, Any] = dict( type=object_type, id=id, @@ -1180,11 +1180,11 @@ directory=directory, ) - params = [args[col] for col in self._object_metadata_insert_cols] + params = [args[col] for col in self._raw_extrinsic_metadata_insert_cols] cur.execute(query, params) - def object_metadata_get( + def raw_extrinsic_metadata_get( self, object_type: str, id: str, @@ -1194,7 +1194,7 @@ limit: int, cur, ): - query_parts = [self._object_metadata_select_query] + query_parts = [self._raw_extrinsic_metadata_select_query] args = [id, authority_id] if after_fetcher is not None: diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -142,7 +142,7 @@ self._persons = {} # {object_type: {id: {authority: [metadata]}}} - self._object_metadata: Dict[ + self._raw_extrinsic_metadata: Dict[ MetadataTargetType, Dict[ Union[str, SWHID], @@ -1017,7 +1017,9 @@ def refresh_stat_counters(self): pass - def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: + def raw_extrinsic_metadata_add( + self, metadata: Iterable[RawExtrinsicMetadata], + ) -> None: for metadata_entry in metadata: authority_key = self._metadata_authority_key(metadata_entry.authority) if authority_key not in self._metadata_authorities: @@ -1030,23 +1032,23 @@ f"Unknown fetcher {metadata_entry.fetcher}" ) - object_metadata_list = self._object_metadata[metadata_entry.type][ - metadata_entry.id - ][authority_key] + raw_extrinsic_metadata_list = self._raw_extrinsic_metadata[ + metadata_entry.type + ][metadata_entry.id][authority_key] - for existing_object_metadata in object_metadata_list: + for existing_raw_extrinsic_metadata in raw_extrinsic_metadata_list: if ( - self._metadata_fetcher_key(existing_object_metadata.fetcher) + self._metadata_fetcher_key(existing_raw_extrinsic_metadata.fetcher) == fetcher_key - and existing_object_metadata.discovery_date + and existing_raw_extrinsic_metadata.discovery_date == metadata_entry.discovery_date ): # Duplicate of an existing one; ignore it. break else: - object_metadata_list.add(metadata_entry) + raw_extrinsic_metadata_list.add(metadata_entry) - def object_metadata_get( + def raw_extrinsic_metadata_get( self, object_type: MetadataTargetType, id: Union[str, SWHID], @@ -1060,14 +1062,14 @@ if object_type == MetadataTargetType.ORIGIN: if isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type='origin', but " - f"provided id is an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type='origin', " + f"but provided id is an SWHID: {id!r}" ) else: if not isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type!='origin', but " - f"provided id is not an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type!='origin', " + f"but provided id is not an SWHID: {id!r}" ) if page_token is not None: @@ -1077,16 +1079,16 @@ raise StorageArgumentException( "page_token is inconsistent with the value of 'after'." ) - entries = self._object_metadata[object_type][id][authority_key].iter_after( - (after_time, after_fetcher) - ) + entries = self._raw_extrinsic_metadata[object_type][id][ + authority_key + ].iter_after((after_time, after_fetcher)) elif after is not None: - entries = self._object_metadata[object_type][id][authority_key].iter_from( - (after,) - ) + entries = self._raw_extrinsic_metadata[object_type][id][ + authority_key + ].iter_from((after,)) entries = (entry for entry in entries if entry.discovery_date > after) else: - entries = iter(self._object_metadata[object_type][id][authority_key]) + entries = iter(self._raw_extrinsic_metadata[object_type][id][authority_key]) if limit: entries = itertools.islice(entries, 0, limit + 1) diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -1083,8 +1083,10 @@ """Recomputes the statistics for `stat_counters`.""" ... - @remote_api_endpoint("object_metadata/add") - def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: + @remote_api_endpoint("raw_extrinsic_metadata/add") + def raw_extrinsic_metadata_add( + self, metadata: Iterable[RawExtrinsicMetadata], + ) -> None: """Add extrinsic metadata on objects (contents, directories, ...). The authority and fetcher must be known to the storage before @@ -1100,8 +1102,8 @@ """ ... - @remote_api_endpoint("object_metadata/get") - def object_metadata_get( + @remote_api_endpoint("raw_extrinsic_metadata/get") + def raw_extrinsic_metadata_get( self, object_type: MetadataTargetType, id: Union[str, SWHID], @@ -1110,7 +1112,7 @@ page_token: Optional[bytes] = None, limit: int = 1000, ) -> Dict[str, Union[Optional[bytes], List[RawExtrinsicMetadata]]]: - """Retrieve list of all object_metadata entries for the id + """Retrieve list of all raw_extrinsic_metadata entries for the id Args: object_type: one of the values of swh.model.model.MetadataTargetType diff --git a/swh/storage/retry.py b/swh/storage/retry.py --- a/swh/storage/retry.py +++ b/swh/storage/retry.py @@ -117,8 +117,10 @@ return self.storage.metadata_authority_add(authorities) @swh_retry - def object_metadata_add(self, metadata: Iterable[RawExtrinsicMetadata],) -> None: - return self.storage.object_metadata_add(metadata) + def raw_extrinsic_metadata_add( + self, metadata: Iterable[RawExtrinsicMetadata], + ) -> None: + return self.storage.raw_extrinsic_metadata_add(metadata) @swh_retry def directory_add(self, directories: Iterable[Directory]) -> Dict: diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(158, now(), 'Work In Progress'); + values(159, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -426,7 +426,7 @@ -- Extrinsic metadata on a DAG objects and origins. -create table object_metadata +create table raw_extrinsic_metadata ( type text not null, id text not null, @@ -450,14 +450,14 @@ directory swhid ); -comment on table object_metadata is 'keeps all metadata found concerning an object'; -comment on column object_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; -comment on column object_metadata.id is 'the SWHID or origin URL for which the metadata was found'; -comment on column object_metadata.discovery_date is 'the date of retrieval'; -comment on column object_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; -comment on column object_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; -comment on column object_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; -comment on column object_metadata.metadata is 'original metadata in opaque format'; +comment on table raw_extrinsic_metadata is 'keeps all metadata found concerning an object'; +comment on column raw_extrinsic_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; +comment on column raw_extrinsic_metadata.id is 'the SWHID or origin URL for which the metadata was found'; +comment on column raw_extrinsic_metadata.discovery_date is 'the date of retrieval'; +comment on column raw_extrinsic_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; +comment on column raw_extrinsic_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; +comment on column raw_extrinsic_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; +comment on column raw_extrinsic_metadata.metadata is 'original metadata in opaque format'; -- Keep a cache of object counts diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql --- a/swh/storage/sql/60-swh-indexes.sql +++ b/swh/storage/sql/60-swh-indexes.sql @@ -168,14 +168,14 @@ create unique index metadata_authority_type_url on metadata_authority(type, url); --- object_metadata -create unique index concurrently object_metadata_content_authority_date_fetcher on object_metadata(id, authority_id, discovery_date, fetcher_id); +-- raw_extrinsic_metadata +create unique index concurrently raw_extrinsic_metadata_content_authority_date_fetcher on raw_extrinsic_metadata(id, authority_id, discovery_date, fetcher_id); -alter table object_metadata add constraint object_metadata_authority_fkey foreign key (authority_id) references metadata_authority(id) not valid; -alter table object_metadata validate constraint object_metadata_authority_fkey; +alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_authority_fkey foreign key (authority_id) references metadata_authority(id) not valid; +alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_authority_fkey; -alter table object_metadata add constraint object_metadata_fetcher_fkey foreign key (fetcher_id) references metadata_fetcher(id) not valid; -alter table object_metadata validate constraint object_metadata_fetcher_fkey; +alter table raw_extrinsic_metadata add constraint raw_extrinsic_metadata_fetcher_fkey foreign key (fetcher_id) references metadata_fetcher(id) not valid; +alter table raw_extrinsic_metadata validate constraint raw_extrinsic_metadata_fetcher_fkey; -- object_counts create unique index concurrently object_counts_pkey on object_counts(object_type); diff --git a/swh/storage/storage.py b/swh/storage/storage.py --- a/swh/storage/storage.py +++ b/swh/storage/storage.py @@ -1093,7 +1093,7 @@ cur.execute("select * from swh_update_counter(%s)", (key,)) @db_transaction() - def object_metadata_add( + def raw_extrinsic_metadata_add( self, metadata: Iterable[RawExtrinsicMetadata], db, cur, ) -> None: counter = Counter[MetadataTargetType]() @@ -1101,7 +1101,7 @@ authority_id = self._get_authority_id(metadata_entry.authority, db, cur) fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) - db.object_metadata_add( + db.raw_extrinsic_metadata_add( object_type=metadata_entry.type.value, id=str(metadata_entry.id), discovery_date=metadata_entry.discovery_date, @@ -1128,7 +1128,7 @@ ) @db_transaction() - def object_metadata_get( + def raw_extrinsic_metadata_get( self, object_type: MetadataTargetType, id: Union[str, SWHID], @@ -1142,14 +1142,14 @@ if object_type == MetadataTargetType.ORIGIN: if isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type='origin', but " - f"provided id is an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type='origin', " + f"but provided id is an SWHID: {id!r}" ) else: if not isinstance(id, SWHID): raise StorageArgumentException( - f"object_metadata_get called with object_type!='origin', but " - f"provided id is not an SWHID: {id!r}" + f"raw_extrinsic_metadata_get called with object_type!='origin', " + f"but provided id is not an SWHID: {id!r}" ) if page_token: @@ -1169,7 +1169,7 @@ "results": [], } - rows = db.object_metadata_get( + rows = db.raw_extrinsic_metadata_get( object_type, str(id), authority_id, @@ -1178,16 +1178,16 @@ limit + 1, cur, ) - rows = [dict(zip(db.object_metadata_get_cols, row)) for row in rows] + rows = [dict(zip(db.raw_extrinsic_metadata_get_cols, row)) for row in rows] results = [] for row in rows: row = row.copy() row.pop("metadata_fetcher.id") - assert str(id) == row["object_metadata.id"] + assert str(id) == row["raw_extrinsic_metadata.id"] result = RawExtrinsicMetadata( - type=MetadataTargetType(row["object_metadata.type"]), + type=MetadataTargetType(row["raw_extrinsic_metadata.type"]), id=id, authority=MetadataAuthority( type=MetadataAuthorityType(row["metadata_authority.type"]), @@ -1199,7 +1199,7 @@ ), discovery_date=row["discovery_date"], format=row["format"], - metadata=row["object_metadata.metadata"], + metadata=row["raw_extrinsic_metadata.metadata"], origin=row["origin"], visit=row["visit"], snapshot=map_optional(parse_swhid, row["snapshot"]), diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py --- a/swh/storage/tests/test_retry.py +++ b/swh/storage/tests/test_retry.py @@ -472,8 +472,8 @@ assert mock_memory.call_count == 1 -def test_retrying_proxy_storage_object_metadata_add(swh_storage, sample_data): - """Standard object_metadata_add works as before +def test_retrying_proxy_storage_raw_extrinsic_metadata_add(swh_storage, sample_data): + """Standard raw_extrinsic_metadata_add works as before """ origin = sample_data.origin @@ -483,21 +483,21 @@ swh_storage.metadata_authority_add([sample_data.metadata_authority]) swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) - origin_metadata = swh_storage.object_metadata_get( + origin_metadata = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority ) assert origin_metadata["next_page_token"] is None assert not origin_metadata["results"] - swh_storage.object_metadata_add([ori_meta]) + swh_storage.raw_extrinsic_metadata_add([ori_meta]) - origin_metadata = swh_storage.object_metadata_get( + origin_metadata = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, ori_meta.id, ori_meta.authority ) assert origin_metadata -def test_retrying_proxy_storage_object_metadata_add_with_retry( +def test_retrying_proxy_storage_raw_extrinsic_metadata_add_with_retry( monkeypatch_sleep, swh_storage, sample_data, mocker, fake_hash_collision, ): """Multiple retries for hash collision and psycopg2 error but finally ok @@ -510,7 +510,7 @@ swh_storage.metadata_authority_add([sample_data.metadata_authority]) swh_storage.metadata_fetcher_add([sample_data.metadata_fetcher]) mock_memory = mocker.patch( - "swh.storage.in_memory.InMemoryStorage.object_metadata_add" + "swh.storage.in_memory.InMemoryStorage.raw_extrinsic_metadata_add" ) mock_memory.side_effect = [ @@ -523,7 +523,7 @@ ] # No exception raised as insertion finally came through - swh_storage.object_metadata_add([ori_meta]) + swh_storage.raw_extrinsic_metadata_add([ori_meta]) mock_memory.assert_has_calls( [ # 3 calls, as long as error raised @@ -534,14 +534,14 @@ ) -def test_retrying_proxy_swh_storage_object_metadata_add_failure( +def test_retrying_proxy_swh_storage_raw_extrinsic_metadata_add_failure( swh_storage, sample_data, mocker ): """Unfiltered errors are raising without retry """ mock_memory = mocker.patch( - "swh.storage.in_memory.InMemoryStorage.object_metadata_add" + "swh.storage.in_memory.InMemoryStorage.raw_extrinsic_metadata_add" ) mock_memory.side_effect = StorageArgumentException("Refuse to add always!") @@ -551,7 +551,7 @@ swh_storage.origin_add([origin]) with pytest.raises(StorageArgumentException, match="Refuse to add"): - swh_storage.object_metadata_add([ori_meta]) + swh_storage.raw_extrinsic_metadata_add([ori_meta]) assert mock_memory.call_count == 1 diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -3196,9 +3196,9 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add(content_metadata) + swh_storage.raw_extrinsic_metadata_add(content_metadata) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority ) assert result["next_page_token"] is None @@ -3223,10 +3223,10 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([content_metadata, content_metadata2]) - swh_storage.object_metadata_add([new_content_metadata2]) + swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) + swh_storage.raw_extrinsic_metadata_add([new_content_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority ) assert result["next_page_token"] is None @@ -3256,7 +3256,7 @@ swh_storage.metadata_authority_add([authority, authority2]) swh_storage.metadata_fetcher_add([fetcher, fetcher2]) - swh_storage.object_metadata_add( + swh_storage.raw_extrinsic_metadata_add( [ content1_metadata1, content1_metadata2, @@ -3265,7 +3265,7 @@ ] ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content1_swhid, authority ) assert result["next_page_token"] is None @@ -3273,7 +3273,7 @@ sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content1_swhid, authority2 ) assert result["next_page_token"] is None @@ -3281,7 +3281,7 @@ sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content2_swhid, authority ) assert result["next_page_token"] is None @@ -3298,9 +3298,9 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([content_metadata, content_metadata2]) + swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, @@ -3311,7 +3311,7 @@ sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, @@ -3320,7 +3320,7 @@ assert result["next_page_token"] is None assert result["results"] == [content_metadata2] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, @@ -3339,18 +3339,18 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([content_metadata, content_metadata2]) - swh_storage.object_metadata_get( + swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) + swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1 ) assert result["next_page_token"] is not None assert result["results"] == [content_metadata] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, @@ -3377,15 +3377,17 @@ fetcher=attr.evolve(fetcher2, metadata=None), ) - swh_storage.object_metadata_add([content_metadata, new_content_metadata2]) + swh_storage.raw_extrinsic_metadata_add( + [content_metadata, new_content_metadata2] + ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, limit=1 ) assert result["next_page_token"] is not None assert result["results"] == [content_metadata] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, content_swhid, authority, @@ -3403,10 +3405,10 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([content_metadata, content_metadata2]) + swh_storage.raw_extrinsic_metadata_add([content_metadata, content_metadata2]) with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.object_metadata_get( + swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.CONTENT, origin.url, authority ) @@ -3421,9 +3423,9 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) assert result["next_page_token"] is None @@ -3447,10 +3449,10 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) - swh_storage.object_metadata_add([new_origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([new_origin_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) assert result["next_page_token"] is None @@ -3481,11 +3483,11 @@ swh_storage.metadata_authority_add([authority, authority2]) swh_storage.metadata_fetcher_add([fetcher, fetcher2]) - swh_storage.object_metadata_add( + swh_storage.raw_extrinsic_metadata_add( [origin1_metadata1, origin1_metadata2, origin1_metadata3, origin2_metadata] ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) assert result["next_page_token"] is None @@ -3493,7 +3495,7 @@ sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority2 ) assert result["next_page_token"] is None @@ -3501,7 +3503,7 @@ sorted(result["results"], key=lambda x: x.discovery_date,) ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin2.url, authority ) assert result["next_page_token"] is None @@ -3517,9 +3519,9 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, @@ -3531,7 +3533,7 @@ origin_metadata2, ] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, @@ -3540,7 +3542,7 @@ assert result["next_page_token"] is None assert result["results"] == [origin_metadata2] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, @@ -3559,19 +3561,19 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) - swh_storage.object_metadata_get( + swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority ) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1 ) assert result["next_page_token"] is not None assert result["results"] == [origin_metadata] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, @@ -3597,15 +3599,15 @@ fetcher=attr.evolve(fetcher2, metadata=None), ) - swh_storage.object_metadata_add([origin_metadata, new_origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, new_origin_metadata2]) - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, limit=1 ) assert result["next_page_token"] is not None assert result["results"] == [origin_metadata] - result = swh_storage.object_metadata_get( + result = swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, origin.url, authority, @@ -3624,7 +3626,7 @@ swh_storage.metadata_fetcher_add([fetcher]) with pytest.raises(StorageArgumentException, match="authority"): - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_add_missing_fetcher(self, swh_storage, sample_data): origin = sample_data.origin @@ -3635,7 +3637,7 @@ swh_storage.metadata_authority_add([authority]) with pytest.raises(StorageArgumentException, match="fetcher"): - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) def test_origin_metadata_get__invalid_id_type(self, swh_storage, sample_data): origin = sample_data.origin @@ -3648,10 +3650,10 @@ swh_storage.metadata_fetcher_add([fetcher]) swh_storage.metadata_authority_add([authority]) - swh_storage.object_metadata_add([origin_metadata, origin_metadata2]) + swh_storage.raw_extrinsic_metadata_add([origin_metadata, origin_metadata2]) with pytest.raises(StorageArgumentException, match="SWHID"): - swh_storage.object_metadata_get( + swh_storage.raw_extrinsic_metadata_get( MetadataTargetType.ORIGIN, content_metadata.id, authority, )