diff --git a/sql/upgrades/171.sql b/sql/upgrades/171.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/171.sql @@ -0,0 +1,26 @@ +-- SWH DB schema upgrade +-- from_version: 170 +-- to_version: 171 +-- description: add raw_extrinsic_metadata.id + +insert into dbversion(version, release, description) + values(171, now(), 'Work In Progress'); + +-- 1. add the 'id' column + +alter table raw_extrinsic_metadata + add column id sha1_git; + +-- 2. restart swh-storage, so that it starts writing the id (but does not read it) + +-- 3. truncate the raw_extrinsic_metadata table + +-- 4. make the id column not null, and index it + +alter table raw_extrinsic_metadata + alter column id set not null; + +create unique index concurrently raw_extrinsic_metadata_pkey on raw_extrinsic_metadata(id); +alter table raw_extrinsic_metadata add primary key using index raw_extrinsic_metadata_pkey; + +-- 5. backfill from kafka diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py --- a/swh/storage/postgresql/db.py +++ b/swh/storage/postgresql/db.py @@ -29,7 +29,7 @@ """ - current_version = 170 + current_version = 171 def mktemp_dir_entry(self, entry_type, cur=None): self._cursor(cur).execute( @@ -1209,6 +1209,7 @@ """The list of context columns for all artifact types.""" _raw_extrinsic_metadata_insert_cols = [ + "id", "type", "target", "authority_id", @@ -1257,6 +1258,7 @@ def raw_extrinsic_metadata_add( self, + id: bytes, type: str, target: str, discovery_date: datetime.datetime, @@ -1275,6 +1277,7 @@ ): query = self._raw_extrinsic_metadata_insert_query args: Dict[str, Any] = dict( + id=id, type=type, target=target, authority_id=authority_id, diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py --- a/swh/storage/postgresql/storage.py +++ b/swh/storage/postgresql/storage.py @@ -1312,6 +1312,7 @@ fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur) db.raw_extrinsic_metadata_add( + id=metadata_entry.id, type=metadata_entry.target.object_type.name.lower(), target=str(metadata_entry.target), discovery_date=metadata_entry.discovery_date, diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql --- a/swh/storage/sql/30-schema.sql +++ b/swh/storage/sql/30-schema.sql @@ -17,7 +17,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(170, now(), 'Work In Progress'); + values(171, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -430,6 +430,8 @@ -- Extrinsic metadata on a DAG objects and origins. create table raw_extrinsic_metadata ( + id sha1_git not null, + type text not null, target text not null, diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql --- a/swh/storage/sql/60-indexes.sql +++ b/swh/storage/sql/60-indexes.sql @@ -264,6 +264,10 @@ -- raw_extrinsic_metadata + +create unique index concurrently raw_extrinsic_metadata_pkey on raw_extrinsic_metadata(id); +alter table raw_extrinsic_metadata add primary key using index raw_extrinsic_metadata_pkey; + create unique index concurrently raw_extrinsic_metadata_content_authority_date_fetcher on raw_extrinsic_metadata(target, authority_id, discovery_date, fetcher_id); \if :dbflavor_default