Page MenuHomeSoftware Heritage

D5029.diff
No OneTemporary

D5029.diff

diff --git a/sql/upgrades/171.sql b/sql/upgrades/171.sql
new file mode 100644
--- /dev/null
+++ b/sql/upgrades/171.sql
@@ -0,0 +1,26 @@
+-- SWH DB schema upgrade
+-- from_version: 170
+-- to_version: 171
+-- description: add raw_extrinsic_metadata.id
+
+insert into dbversion(version, release, description)
+ values(171, now(), 'Work In Progress');
+
+-- 1. add the 'id' column
+
+alter table raw_extrinsic_metadata
+ add column id sha1_git;
+
+-- 2. restart swh-storage, so that it starts writing the id (but does not read it)
+
+-- 3. truncate the raw_extrinsic_metadata table
+
+-- 4. make the id column not null, and index it
+
+alter table raw_extrinsic_metadata
+ alter column id set not null;
+
+create unique index concurrently raw_extrinsic_metadata_pkey on raw_extrinsic_metadata(id);
+alter table raw_extrinsic_metadata add primary key using index raw_extrinsic_metadata_pkey;
+
+-- 5. backfill from kafka
diff --git a/swh/storage/postgresql/db.py b/swh/storage/postgresql/db.py
--- a/swh/storage/postgresql/db.py
+++ b/swh/storage/postgresql/db.py
@@ -29,7 +29,7 @@
"""
- current_version = 170
+ current_version = 171
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute(
@@ -1209,6 +1209,7 @@
"""The list of context columns for all artifact types."""
_raw_extrinsic_metadata_insert_cols = [
+ "id",
"type",
"target",
"authority_id",
@@ -1257,6 +1258,7 @@
def raw_extrinsic_metadata_add(
self,
+ id: bytes,
type: str,
target: str,
discovery_date: datetime.datetime,
@@ -1275,6 +1277,7 @@
):
query = self._raw_extrinsic_metadata_insert_query
args: Dict[str, Any] = dict(
+ id=id,
type=type,
target=target,
authority_id=authority_id,
diff --git a/swh/storage/postgresql/storage.py b/swh/storage/postgresql/storage.py
--- a/swh/storage/postgresql/storage.py
+++ b/swh/storage/postgresql/storage.py
@@ -1312,6 +1312,7 @@
fetcher_id = self._get_fetcher_id(metadata_entry.fetcher, db, cur)
db.raw_extrinsic_metadata_add(
+ id=metadata_entry.id,
type=metadata_entry.target.object_type.name.lower(),
target=str(metadata_entry.target),
discovery_date=metadata_entry.discovery_date,
diff --git a/swh/storage/sql/30-schema.sql b/swh/storage/sql/30-schema.sql
--- a/swh/storage/sql/30-schema.sql
+++ b/swh/storage/sql/30-schema.sql
@@ -17,7 +17,7 @@
-- latest schema version
insert into dbversion(version, release, description)
- values(170, now(), 'Work In Progress');
+ values(171, now(), 'Work In Progress');
-- a SHA1 checksum
create domain sha1 as bytea check (length(value) = 20);
@@ -430,6 +430,8 @@
-- Extrinsic metadata on a DAG objects and origins.
create table raw_extrinsic_metadata
(
+ id sha1_git not null,
+
type text not null,
target text not null,
diff --git a/swh/storage/sql/60-indexes.sql b/swh/storage/sql/60-indexes.sql
--- a/swh/storage/sql/60-indexes.sql
+++ b/swh/storage/sql/60-indexes.sql
@@ -264,6 +264,10 @@
-- raw_extrinsic_metadata
+
+create unique index concurrently raw_extrinsic_metadata_pkey on raw_extrinsic_metadata(id);
+alter table raw_extrinsic_metadata add primary key using index raw_extrinsic_metadata_pkey;
+
create unique index concurrently raw_extrinsic_metadata_content_authority_date_fetcher on raw_extrinsic_metadata(target, authority_id, discovery_date, fetcher_id);
\if :dbflavor_default

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 1:18 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224373

Event Timeline