diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py --- a/swh/storage/cassandra/cql.py +++ b/swh/storage/cassandra/cql.py @@ -1126,6 +1126,18 @@ ), ) + @_prepared_statement( + "SELECT authority_type, authority_url FROM raw_extrinsic_metadata " + "WHERE target = ?" + ) + def raw_extrinsic_metadata_get_authorities( + self, target: str, *, statement + ) -> Iterable[Tuple[str, str]]: + return ( + (entry["authority_type"], entry["authority_url"]) + for entry in self._execute_with_retries(statement, [target]) + ) + ########################## # 'extid' table ########################## diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -1393,6 +1393,18 @@ return list(results) + def raw_extrinsic_metadata_get_authorities( + self, target: ExtendedSWHID + ) -> List[MetadataAuthority]: + return [ + MetadataAuthority( + type=MetadataAuthorityType(authority_type), url=authority_url + ) + for (authority_type, authority_url) in set( + self._cql_runner.raw_extrinsic_metadata_get_authorities(str(target)) + ) + ] + def metadata_fetcher_add(self, fetchers: List[MetadataFetcher]) -> Dict[str, int]: self.journal_writer.metadata_fetcher_add(fetchers) for fetcher in fetchers: diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -660,6 +660,12 @@ if m.authority_type == authority_type and m.authority_url == authority_url ) + def raw_extrinsic_metadata_get_authorities( + self, target: str + ) -> Iterable[Tuple[str, str]]: + metadata = self._raw_extrinsic_metadata.get_from_partition_key((target,)) + return ((m.authority_type, m.authority_url) for m in metadata) + ######################### # 'extid' table ######################### diff --git a/swh/storage/interface.py b/swh/storage/interface.py --- a/swh/storage/interface.py +++ b/swh/storage/interface.py @@ -1220,6 +1220,13 @@ """ ... + @remote_api_endpoint("raw_extrinsic_metadata/get_authorities") + def raw_extrinsic_metadata_get_authorities( + self, target: ExtendedSWHID + ) -> List[MetadataAuthority]: + """Returns all authorities that provided metadata on the given object.""" + ... + @remote_api_endpoint("metadata_fetcher/add") def metadata_fetcher_add(self, fetchers: List[MetadataFetcher],) -> Dict[str, int]: """Add new metadata fetchers to the storage. diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -1350,6 +1350,21 @@ [(id_,) for id_ in ids], ) + def raw_extrinsic_metadata_get_authorities(self, id: str, cur=None): + cur = self._cursor(cur) + cur.execute( + """ + SELECT + DISTINCT metadata_authority.type, metadata_authority.url + FROM raw_extrinsic_metadata + INNER JOIN metadata_authority + ON (metadata_authority.id=authority_id) + WHERE raw_extrinsic_metadata.target = %s + """, + (id,), + ) + yield from cur + metadata_fetcher_cols = ["name", "version"] def metadata_fetcher_add(self, name: str, version: str, cur=None) -> None: diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -1441,6 +1441,20 @@ for row in db.raw_extrinsic_metadata_get_by_ids(ids) ] + @db_transaction() + def raw_extrinsic_metadata_get_authorities( + self, target: ExtendedSWHID, db=None, cur=None, + ) -> List[MetadataAuthority]: + return [ + MetadataAuthority( + type=MetadataAuthorityType(authority_type), url=authority_url + ) + for ( + authority_type, + authority_url, + ) in db.raw_extrinsic_metadata_get_authorities(str(target), cur) + ] + @timed @process_metrics @db_transaction() diff --git a/swh/storage/tests/storage_tests.py b/swh/storage/tests/storage_tests.py --- a/swh/storage/tests/storage_tests.py +++ b/swh/storage/tests/storage_tests.py @@ -4064,6 +4064,55 @@ ) ) == {content1_metadata1, content2_metadata} + def test_content_metadata_get_authorities(self, swh_storage, sample_data): + content1, content2, content3 = sample_data.contents[:3] + fetcher, fetcher2 = sample_data.fetchers[:2] + authority, authority2 = sample_data.authorities[:2] + ( + content1_metadata1, + content1_metadata2, + content1_metadata3, + ) = sample_data.content_metadata[:3] + + content2_metadata = RawExtrinsicMetadata.from_dict( + { + **remove_keys(content1_metadata2.to_dict(), ("id",)), # recompute id + "target": str(content2.swhid()), + } + ) + + content1_metadata2 = RawExtrinsicMetadata.from_dict( + { + **remove_keys(content1_metadata2.to_dict(), ("id",)), # recompute id + "authority": authority2.to_dict(), + } + ) + + swh_storage.metadata_authority_add([authority, authority2]) + swh_storage.metadata_fetcher_add([fetcher, fetcher2]) + + swh_storage.raw_extrinsic_metadata_add( + [ + content1_metadata1, + content1_metadata2, + content1_metadata3, + content2_metadata, + ] + ) + + assert swh_storage.raw_extrinsic_metadata_get_authorities(content1.swhid()) in ( + [authority, authority2], + [authority2, authority], + ) + + assert swh_storage.raw_extrinsic_metadata_get_authorities(content2.swhid()) == [ + authority + ] + + assert ( + swh_storage.raw_extrinsic_metadata_get_authorities(content3.swhid()) == [] + ) + def test_origin_metadata_add(self, swh_storage, sample_data): origin = sample_data.origin fetcher = sample_data.metadata_fetcher