Page MenuHomeSoftware Heritage
Paste P571

migrate load-* task arguments to use kwargs
ActivePublic

Authored by ardumont on Dec 10 2019, 6:40 PM.
create index on task(type, status, policy);
update task
set arguments=jsonb_set(arguments, '{kwargs}', json_build_object('url', arguments#>>'{kwargs,package_url}')::jsonb)
where type = 'load-npm' and
status = 'next_run_not_scheduled'
and policy='recurring';
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-hg' and
status != 'disabled' and
policy ='recurring';
update task
-- specific setup for svn
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{kwargs,origin_url}'))
where type = 'load-svn' and
id in (229511019, 219506963, 218934960, 198944775, 169802560, 169802559, 169802558, 169802557, 169802556, 169802555, 169802553, 169802548, 169801042, 169107074, 167830457, 167830469, 167830467, 167830466, 167830492, 167830468, 167830493, 167830503, 167830685);
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-svn' and
id in ( 167877660, 214983345, 167877647, 167877656, 167877651, 167877658, 167877646, 167877510);
-- many tryouts for this one as there are a lot of those
-- tryout 4: mix of clean up indexes and 2.
-- drop indexes first
-- "task_expr_idx" hash ((arguments -> 'args'::text))
-- "task_expr_idx1" hash ((arguments -> 'kwargs'::text)) INVALID
-- "task_kwargs" gin ((arguments -> 'kwargs'::text))
drop index "task_expr_idx";
drop index "task_expr_idx1";
drop index "task_kwargs";
-- drop constraints too
-- Check constraints:
-- "task_arguments_check" CHECK ((arguments -> 'args'::text) IS NOT NULL)
-- "task_arguments_check1" CHECK ((arguments -> 'kwargs'::text) IS NOT NULL)
alter table task drop constraint task_arguments_check;
alter table task drop constraint task_arguments_check1;
-- run it
for i in $(seq 0 100000 241824959); do
cat <<EOF
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-git' and
status != 'disabled' and
policy = 'recurring' and
$i <= id and id < $((i + 100000));
EOF
done > migrate-load-git-recurring.sql
-- status: in-progress
-- oneshot
-- select min(id), max(id)
-- from task
-- where type = 'load-git' and
-- status != 'disabled' and
-- policy = 'oneshot';
--
-- min | max
-- ----------+-----------
-- 93074642 | 241072686
-- (1 row)
for i in $(seq 93000000 100000 241072687); do
cat <<EOF
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-git' and
status != 'disabled' and
policy = 'oneshot' and
$i <= id and id < $((i + 100000));
EOF
done > migrate-load-git-oneshot.sql
-- status: OK
-- create index back
create index task_args on task using btree ((arguments -> 'args'));
create index task_kwargs on task using gin ((arguments -> 'kwargs'));
-- and constraints
alter table task add constraint task_arguments_check (arguments -> 'kwargs'::text) IS NOT NULL;

Event Timeline

ardumont created this paste.Dec 10 2019, 6:40 PM
ardumont changed the title of this paste from migrate load-npm task arguments to migrate load-* task arguments to use kwargs.Dec 11 2019, 11:16 AM
ardumont edited the content of this paste. (Show Details)
ardumont added a project: Scheduling utilities.
ardumont edited the content of this paste. (Show Details)Dec 11 2019, 11:20 AM
ardumont edited the content of this paste. (Show Details)Dec 11 2019, 3:15 PM

Other load-git migration tryouts (to keep a reference on):

select json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
from task
where type = 'load-git' and
      status != 'disabled' and
      policy ='recurring'
limit 10;

-- tryout 1: no
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-git' and
      status != 'disabled' and
      policy ='recurring';

-- tryout 2: no
 for i in $(seq 0 100000 241824959); do
  cat <<EOF
update task
set arguments = json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}'))
where type = 'load-git' and
      status != 'disabled' and
      policy = 'recurring' and
      $i <= id and id < $((i + 100000));
EOF
done > migrate-load-git.sql

-- tryout 3: neither
create table git_task as
select id, type,
       json_build_object('args', '[]', 'kwargs', json_build_object('url', arguments#>>'{args,0}')),
       next_run,
       current_interval,
       status,
       policy,
       retries_left,
       priority
from task
where type = 'load-git' and
      status != 'disabled';
-- stop because slow