diff --git a/sql/upgrades/174.sql b/sql/upgrades/174.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/174.sql @@ -0,0 +1,12 @@ +-- SWH DB schema upgrade +-- from_version: 173 +-- to_version: 174 +-- description: remove authority and fetcher metadata + +insert into dbversion(version, release, description) + values(174, now(), 'Work In Progress'); + +alter table metadata_authority + drop column metadata; +alter table metadata_fetcher + drop column metadata; diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py --- a/swh/storage/backfill.py +++ b/swh/storage/backfill.py @@ -81,8 +81,8 @@ ], "directory": ["id", "dir_entries", "file_entries", "rev_entries"], "extid": ["extid_type", "extid", "target_type", "target"], - "metadata_authority": ["type", "url", "metadata",], - "metadata_fetcher": ["name", "version", "metadata",], + "metadata_authority": ["type", "url"], + "metadata_fetcher": ["name", "version"], "origin": ["url"], "origin_visit": ["visit", "type", ("origin.url", "origin"), "date",], "origin_visit_status": [ diff --git a/swh/storage/cassandra/model.py b/swh/storage/cassandra/model.py --- a/swh/storage/cassandra/model.py +++ b/swh/storage/cassandra/model.py @@ -242,7 +242,6 @@ url: str type: str - metadata: str @dataclasses.dataclass @@ -253,7 +252,6 @@ name: str version: str - metadata: str @dataclasses.dataclass diff --git a/swh/storage/cassandra/schema.py b/swh/storage/cassandra/schema.py --- a/swh/storage/cassandra/schema.py +++ b/swh/storage/cassandra/schema.py @@ -176,14 +176,12 @@ CREATE TABLE IF NOT EXISTS metadata_authority ( url text, type ascii, - metadata text, PRIMARY KEY ((url), type) );""", """ CREATE TABLE IF NOT EXISTS metadata_fetcher ( name ascii, version ascii, - metadata text, PRIMARY KEY ((name), version) );""", """ diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -6,7 +6,6 @@ import base64 import datetime import itertools -import json import random import re from typing import ( @@ -1353,11 +1352,7 @@ self.journal_writer.metadata_fetcher_add(fetchers) for fetcher in fetchers: self._cql_runner.metadata_fetcher_add( - MetadataFetcherRow( - name=fetcher.name, - version=fetcher.version, - metadata=json.dumps(map_optional(dict, fetcher.metadata)), - ) + MetadataFetcherRow(name=fetcher.name, version=fetcher.version,) ) return {"metadata_fetcher:add": len(fetchers)} @@ -1366,11 +1361,7 @@ ) -> Optional[MetadataFetcher]: fetcher = self._cql_runner.metadata_fetcher_get(name, version) if fetcher: - return MetadataFetcher( - name=fetcher.name, - version=fetcher.version, - metadata=json.loads(fetcher.metadata), - ) + return MetadataFetcher(name=fetcher.name, version=fetcher.version,) else: return None @@ -1380,11 +1371,7 @@ self.journal_writer.metadata_authority_add(authorities) for authority in authorities: self._cql_runner.metadata_authority_add( - MetadataAuthorityRow( - url=authority.url, - type=authority.type.value, - metadata=json.dumps(map_optional(dict, authority.metadata)), - ) + MetadataAuthorityRow(url=authority.url, type=authority.type.value,) ) return {"metadata_authority:add": len(authorities)} @@ -1394,9 +1381,7 @@ authority = self._cql_runner.metadata_authority_get(type.value, url) if authority: return MetadataAuthority( - type=MetadataAuthorityType(authority.type), - url=authority.url, - metadata=json.loads(authority.metadata), + type=MetadataAuthorityType(authority.type), url=authority.url, ) else: return None diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -29,7 +29,7 @@ """ - current_version = 173 + current_version = 174 def mktemp_dir_entry(self, entry_type, cur=None): self._cursor(cur).execute( @@ -1329,16 +1329,14 @@ cur.execute(" ".join(query_parts), args) yield from cur - metadata_fetcher_cols = ["name", "version", "metadata"] + metadata_fetcher_cols = ["name", "version"] - def metadata_fetcher_add( - self, name: str, version: str, metadata: bytes, cur=None - ) -> None: + def metadata_fetcher_add(self, name: str, version: str, cur=None) -> None: cur = self._cursor(cur) cur.execute( - "INSERT INTO metadata_fetcher (name, version, metadata) " - "VALUES (%s, %s, %s) ON CONFLICT DO NOTHING", - (name, version, jsonize(metadata)), + "INSERT INTO metadata_fetcher (name, version) " + "VALUES (%s, %s) ON CONFLICT DO NOTHING", + (name, version), ) def metadata_fetcher_get(self, name: str, version: str, cur=None): @@ -1365,16 +1363,14 @@ else: return None - metadata_authority_cols = ["type", "url", "metadata"] + metadata_authority_cols = ["type", "url"] - def metadata_authority_add( - self, type: str, url: str, metadata: bytes, cur=None - ) -> None: + def metadata_authority_add(self, type: str, url: str, cur=None) -> None: cur = self._cursor(cur) cur.execute( - "INSERT INTO metadata_authority (type, url, metadata) " - "VALUES (%s, %s, %s) ON CONFLICT DO NOTHING", - (type, url, jsonize(metadata)), + "INSERT INTO metadata_authority (type, url) " + "VALUES (%s, %s) ON CONFLICT DO NOTHING", + (type, url), ) def metadata_authority_get(self, type: str, url: str, cur=None): diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -1405,13 +1405,7 @@ self.journal_writer.metadata_fetcher_add(fetchers) count = 0 for fetcher in fetchers: - if fetcher.metadata is None: - raise StorageArgumentException( - "MetadataFetcher.metadata may not be None in metadata_fetcher_add." - ) - db.metadata_fetcher_add( - fetcher.name, fetcher.version, dict(fetcher.metadata), cur=cur - ) + db.metadata_fetcher_add(fetcher.name, fetcher.version, cur=cur) count += 1 return {"metadata_fetcher:add": count} @@ -1435,14 +1429,7 @@ self.journal_writer.metadata_authority_add(authorities) count = 0 for authority in authorities: - if authority.metadata is None: - raise StorageArgumentException( - "MetadataAuthority.metadata may not be None in " - "metadata_authority_add." - ) - db.metadata_authority_add( - authority.type.value, authority.url, dict(authority.metadata), cur=cur - ) + db.metadata_authority_add(authority.type.value, authority.url, cur=cur) count += 1 return {"metadata_authority:add": count} diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(173, now(), 'Work In Progress'); + values(174, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -401,30 +401,26 @@ ( id serial not null, name text not null, - version text not null, - metadata jsonb not null + version text not null ); comment on table metadata_fetcher is 'Tools used to retrieve metadata'; comment on column metadata_fetcher.id is 'Internal identifier of the fetcher'; comment on column metadata_fetcher.name is 'Fetcher name'; comment on column metadata_fetcher.version is 'Fetcher version'; -comment on column metadata_fetcher.metadata is 'Extra information about the fetcher'; create table metadata_authority ( id serial not null, type text not null, - url text not null, - metadata jsonb not null + url text not null ); comment on table metadata_authority is 'Metadata authority information'; comment on column metadata_authority.id is 'Internal identifier of the authority'; comment on column metadata_authority.type is 'Type of authority (deposit_client/forge/registry)'; comment on column metadata_authority.url is 'Authority''s uri'; -comment on column metadata_authority.metadata is 'Other metadata about authority'; -- Extrinsic metadata on a DAG objects and origins. diff --git a/swh/storage/tests/storage_data.py b/swh/storage/tests/storage_data.py --- a/swh/storage/tests/storage_data.py +++ b/swh/storage/tests/storage_data.py @@ -460,26 +460,18 @@ origin, origin2 = origins[:2] metadata_authority = MetadataAuthority( - type=MetadataAuthorityType.DEPOSIT_CLIENT, - url="http://hal.inria.example.com/", - metadata={"location": "France"}, + type=MetadataAuthorityType.DEPOSIT_CLIENT, url="http://hal.inria.example.com/", ) metadata_authority2 = MetadataAuthority( - type=MetadataAuthorityType.REGISTRY, - url="http://wikidata.example.com/", - metadata={}, + type=MetadataAuthorityType.REGISTRY, url="http://wikidata.example.com/", ) authorities: Tuple[MetadataAuthority, ...] = ( metadata_authority, metadata_authority2, ) - metadata_fetcher = MetadataFetcher( - name="swh-deposit", version="0.0.1", metadata={"sword_version": "2"}, - ) - metadata_fetcher2 = MetadataFetcher( - name="swh-example", version="0.0.1", metadata={}, - ) + metadata_fetcher = MetadataFetcher(name="swh-deposit", version="0.0.1",) + metadata_fetcher2 = MetadataFetcher(name="swh-example", version="0.0.1",) fetchers: Tuple[MetadataFetcher, ...] = (metadata_fetcher, metadata_fetcher2) date_visit1 = datetime.datetime(2015, 1, 1, 23, 0, 0, tzinfo=datetime.timezone.utc) @@ -605,8 +597,8 @@ discovery_date=datetime.datetime( 2015, 1, 1, 21, 0, 0, tzinfo=datetime.timezone.utc ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), + authority=metadata_authority, + fetcher=metadata_fetcher, format="json", metadata=b'{"foo": "bar"}', ) @@ -618,8 +610,8 @@ discovery_date=datetime.datetime( 2017, 1, 1, 22, 0, 0, tzinfo=datetime.timezone.utc ), - authority=attr.evolve(metadata_authority, metadata=None), - fetcher=attr.evolve(metadata_fetcher, metadata=None), + authority=metadata_authority, + fetcher=metadata_fetcher, format="yaml", metadata=b"foo: bar", )