Page MenuHomeSoftware Heritage
Paste P747

160-161-bis.sql
ActivePublic

Authored by ardumont on Aug 25 2020, 2:11 PM.
-- SWH DB schema upgrade
-- from_version: 159
-- to_version: 160
-- description: Make neg_utc_offset not null
-- latest schema version
insert into dbversion(version, release, description)
values(160, now(), 'Work Still In Progress');
update release
set date_neg_utc_offset=false
where date is not null and date_neg_utc_offset is null;
alter table release
add constraint release_date_neg_utc_offset_not_null
check (date is null or date_neg_utc_offset is not null)
not valid;
alter table release
validate constraint release_date_neg_utc_offset_not_null;
-- for 20000 ranges on revision id
-- author date_neg_utc_offset to false when needed
update revision set date_neg_utc_offset=false where date is not null and date_neg_utc_offset is null and '\x000346dc5d638865a00000000000000000000000' <= id and id < '\x00068db8bac710cb400000000000000000000000';
-- committer_date_neg_utc_offset to false when needed
update revision set committer_date_neg_utc_offset=false where committer_date is not null and committer_date_neg_utc_offset is null and '\x000346dc5d638865a00000000000000000000000' <= id and id < '\x00068db8bac710cb400000000000000000000000';
-- extra_headers to empty array (not null)
update revision set extra_headers = ARRAY[]::bytea[][] where extra_headers is null and '\x000346dc5d638865a00000000000000000000000' <= id and id < '\x00068db8bac710cb400000000000000000000000';
update revision set date_neg_utc_offset=false where date is not null and date_neg_utc_offset is null and '\x00068db8bac710cb400000000000000000000000' <= id and id < '\x0009d495182a9930e00000000000000000000000';
update revision set committer_date_neg_utc_offset=false where committer_date is not null and committer_date_neg_utc_offset is null and '\x00068db8bac710cb400000000000000000000000' <= id and id < '\x0009d495182a9930e00000000000000000000000';
update revision set extra_headers = ARRAY[]::bytea[][] where extra_headers is null and '\x00068db8bac710cb400000000000000000000000' <= id and id < '\x0009d495182a9930e00000000000000000000000';
...
alter table revision
add constraint revision_date_neg_utc_offset_not_null
check (date is null or date_neg_utc_offset is not null)
not valid;
alter table revision
add constraint revision_committer_date_neg_utc_offset_not_null
check (committer_date is null or committer_date_neg_utc_offset is not null)
not valid;
alter table revision
validate constraint revision_date_neg_utc_offset_not_null;
alter table revision
validate constraint revision_committer_date_neg_utc_offset_not_null;
alter table revision
alter column extra_headers set not null;

Event Timeline

ardumont edited the content of this paste. (Show Details)

actually again modified so we do the 3 update queries (160, 161) next to each other to avoid caveats [1].

Something like:

update revision set date_neg_utc_offset=false where date is not null and date_neg_utc_offset is null and id <= '\x0000000000000000000000000000000000000000' and id < '\x000346dc5d638865a00000000000000000000000';
update revision set committer_date_neg_utc_offset=false where committer_date is not null and committer_date_neg_utc_offset is null and id <= '\x0000000000000000000000000000000000000000' and id < '\x000346dc5d638865a00000000000000000000000';
update revision set extra_headers = ARRAY[]::bytea[][] where extra_headers is null and id <= '\x0000000000000000000000000000000000000000' and id < '\x000346dc5d638865a00000000000000000000000';

update revision set date_neg_utc_offset=false where date is not null and date_neg_utc_offset is null and id <= '\x000346dc5d638865a00000000000000000000000' and id < '\x00068db8bac710cb400000000000000000000000';
update revision set committer_date_neg_utc_offset=false where committer_date is not null and committer_date_neg_utc_offset is null and id <= '\x000346dc5d638865a00000000000000000000000' and id < '\x00068db8bac710cb400000000000000000000000';
update revision set extra_headers = ARRAY[]::bytea[][] where extra_headers is null and id <= '\x000346dc5d638865a00000000000000000000000' and id < '\x00068db8bac710cb400000000000000000000000';

update revision set date_neg_utc_offset=false where date is not null and date_neg_utc_offset is null and id <= '\x00068db8bac710cb400000000000000000000000' and id < '\x0009d495182a9930e00000000000000000000000';
update revision set committer_date_neg_utc_offset=false where committer_date is not null and committer_date_neg_utc_offset is null and id <= '\x00068db8bac710cb400000000000000000000000' and id < '\x0009d495182a9930e00000000000000000000000';
update revision set extra_headers = ARRAY[]::bytea[][] where extra_headers is null and id <= '\x00068db8bac710cb400000000000000000000000' and id < '\x0009d495182a9930e00000000000000000000000';

...

[1] Thought process is to avoid the following:

  • after each pass only of the each update queries on all revisions (which rewrote tuples), that will probably trigger a vacuum blocking the subsequent update... So doing that way, the vacuum should happen once. This seems to work as the first one is long but the 2 next update queries seems pretty fast (relying on pg_activity).
  • splitting on range so the query is not ever lasting (meaning long-standing transaction) and create replication issues down the line...
ardumont changed the title of this paste from 160-bis.sql to 160-161-bis.sql.Aug 26 2020, 12:25 PM