diff --git a/sql/upgrades/132.sql b/sql/upgrades/132.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/132.sql @@ -0,0 +1,11 @@ +-- SWH DB schema upgrade +-- from_version: 131 +-- to_version: 132 +-- description: Use sha1 instead of bigint as FK from origin_visit to snapshot (part 2: backfill) + +update origin_visit + set snapshot=snapshot.id + from snapshot + where snapshot.object_id=origin_visit.snapshot_id; + +alter table origin_visit validate constraint origin_visit_snapshot_fkey; diff --git a/sql/upgrades/133.sql b/sql/upgrades/133.sql new file mode 100644 --- /dev/null +++ b/sql/upgrades/133.sql @@ -0,0 +1,12 @@ +-- SWH DB schema upgrade +-- from_version: 132 +-- to_version: 133 +-- description: Use sha1 instead of bigint as FK from origin_visit to snapshot (part 3: remove old column) + +insert into dbversion(version, release, description) + values(133, now(), 'Work In Progress'); + +drop index constraint origin_visit_snapshot_id_fkey; +alter table origin_visit drop column snapshot_id; + +drop function swh_snapshot_get_by_origin_visit; diff --git a/swh/storage/db.py b/swh/storage/db.py --- a/swh/storage/db.py +++ b/swh/storage/db.py @@ -175,7 +175,8 @@ def snapshot_get_by_origin_visit(self, origin_id, visit_id, cur=None): cur = self._cursor(cur) query = """\ - SELECT swh_snapshot_get_by_origin_visit(%s, %s) + SELECT snapshot from origin_visit where + origin_visit.origin=%s and origin_visit.visit=%s; """ cur.execute(query, (origin_id, visit_id)) @@ -311,15 +312,8 @@ update_cols.append('metadata=%s') values.append(jsonize(updates.pop('metadata'))) if 'snapshot' in updates: - # New 'snapshot' column update_cols.append('snapshot=%s') - values.append(updates['snapshot']) - - # Old 'snapshot_id' column - update_cols.append('snapshot_id=snapshot.object_id') - from_ = 'FROM snapshot' - where.append('snapshot.id=%s') - where_values.append(updates.pop('snapshot')) + values.append(updates.pop('snapshot')) assert not updates, 'Unknown fields: %r' % updates query = """UPDATE origin_visit SET {update_cols} @@ -355,13 +349,12 @@ args = (origin_id, limit) query = """\ - SELECT %s, - (select id from snapshot where object_id = snapshot_id) as snapshot + SELECT %s FROM origin_visit WHERE origin=%%s %s order by visit asc limit %%s""" % ( - ', '.join(self.origin_visit_get_cols[:-1]), extra_condition + ', '.join(self.origin_visit_get_cols), extra_condition ) cur.execute(query, args) @@ -382,12 +375,10 @@ cur = self._cursor(cur) query = """\ - SELECT %s, - (select id from snapshot where object_id = snapshot_id) - as snapshot + SELECT %s FROM origin_visit WHERE origin = %%s AND visit = %%s - """ % (', '.join(self.origin_visit_get_cols[:-1])) + """ % (', '.join(self.origin_visit_get_cols)) cur.execute(query, (origin_id, visit_id)) r = cur.fetchall() @@ -425,15 +416,13 @@ (tuple(allowed_statuses),)).decode() query = """\ - SELECT %s, - (select id from snapshot where object_id = snapshot_id) - as snapshot + SELECT %s FROM origin_visit WHERE - origin = %%s AND snapshot_id is not null %s + origin = %%s AND snapshot is not null %s ORDER BY date DESC, visit DESC LIMIT 1 - """ % (', '.join(self.origin_visit_get_cols[:-1]), extra_clause) + """ % (', '.join(self.origin_visit_get_cols), extra_clause) cur.execute(query, (origin_id,)) r = cur.fetchone() diff --git a/swh/storage/sql/30-swh-schema.sql b/swh/storage/sql/30-swh-schema.sql --- a/swh/storage/sql/30-swh-schema.sql +++ b/swh/storage/sql/30-swh-schema.sql @@ -12,7 +12,7 @@ -- latest schema version insert into dbversion(version, release, description) - values(130, now(), 'Work In Progress'); + values(133, now(), 'Work In Progress'); -- a SHA1 checksum create domain sha1 as bytea check (length(value) = 20); @@ -211,7 +211,6 @@ date timestamptz not null, status origin_visit_status not null, metadata jsonb, - snapshot_id bigint, snapshot sha1_git ); @@ -220,7 +219,6 @@ comment on column origin_visit.date is 'Visit timestamp'; comment on column origin_visit.status is 'Visit result'; comment on column origin_visit.metadata is 'Origin metadata at visit time'; -comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time'; comment on column origin_visit.snapshot is 'Origin snapshot at visit time'; diff --git a/swh/storage/sql/40-swh-func.sql b/swh/storage/sql/40-swh-func.sql --- a/swh/storage/sql/40-swh-func.sql +++ b/swh/storage/sql/40-swh-func.sql @@ -782,18 +782,6 @@ group by target_type; $$; -create or replace function swh_snapshot_get_by_origin_visit(origin_id bigint, visit_id bigint) - returns snapshot.id%type - language sql - stable -as $$ - select snapshot.id - from origin_visit - left join snapshot - on snapshot.object_id = origin_visit.snapshot_id - where origin_visit.origin=origin_id and origin_visit.visit=visit_id; -$$; - -- Absolute path: directory reference + complete path relative to it create type content_dir as ( directory sha1_git, diff --git a/swh/storage/sql/60-swh-indexes.sql b/swh/storage/sql/60-swh-indexes.sql --- a/swh/storage/sql/60-swh-indexes.sql +++ b/swh/storage/sql/60-swh-indexes.sql @@ -135,9 +135,6 @@ alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid; alter table origin_visit validate constraint origin_visit_origin_fkey; -alter table origin_visit add constraint origin_visit_snapshot_id_fkey foreign key (snapshot_id) references snapshot(object_id) not valid; -alter table origin_visit validate constraint origin_visit_snapshot_id_fkey; - -- release create unique index concurrently release_pkey on release(id); alter table release add primary key using index release_pkey; diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -1293,6 +1293,19 @@ ('origin_visit', data4), ('origin_visit', data5)]) + def test_origin_visit_update_missing_snapshot(self): + # given + origin_id = self.storage.origin_add_one(self.origin) + + origin_visit = self.storage.origin_visit_add( + origin_id, + date=self.date_visit1) + + # when + self.storage.origin_visit_update( + origin_id, origin_visit['visit'], + snapshot=self.snapshot['id']) + def test_origin_visit_get_by(self): origin_id = self.storage.origin_add_one(self.origin2) origin_id2 = self.storage.origin_add_one(self.origin)