begin;
-- identify release and snapshots to mark them for cleanup
create or replace function mark_wrong_datarelease_and_snapshot_for_cleanup()
returns void
language plpgsql
as $$
declare
release_id release.id%type;
snapshot_id snapshot.object_id%type;
branch_id snapshot_branches.branch_id%type;
begin
-- data to clean up
create table temp_to_cleanup_release(
id sha1_git not null
);
create table temp_to_cleanup_snapshot_branches(
snapshot_id bigint not null, branch_id bigint not null
);
for release_id, snapshot_id, branch_id in (
select distinct r.id, s.object_id, sbs.branch_id
from origin o
inner join origin_visit ov on (o.id=ov.origin and o.type='hg')
inner join snapshot s on ov.snapshot_id=s.object_id
inner join snapshot_branches sbs on s.object_id=sbs.snapshot_id
inner join snapshot_branch sb on sbs.branch_id=sb.object_id
inner join release r on (sb.target_type='release' and r.id=sb.target)
)
loop
-- reference the release to clean up later
insert into temp_to_cleanup_release(id)
values (release_id);
-- then referencing raw snapshot targetting release (not enough though)
insert into temp_to_cleanup_snapshot_branches(snapshot_id, branch_id)
values (snapshot_id, branch_id);
end loop;
-- Note: if a snapshot is mixed with target of type release and
-- other stuff (revision, etc...), it will not be identified by the
-- prior loop. But this needs to as those snapshot identifiers will
-- be dangling (indeed, the next ingestion will create other correct
-- snapshots with other identifiers).
-- To conclude, we need to iterate over the remaining snapshots as
-- well to mark them for cleanup as well
for snapshot_id, branch_id in (
select distinct s.object_id, sbs.branch_id
from origin o
inner join origin_visit ov on (o.id=ov.origin and o.type='hg')
inner join snapshot s on ov.snapshot_id=s.object_id
inner join snapshot_branches sbs on s.object_id=sbs.snapshot_id
inner join snapshot_branch sb on sbs.branch_id=sb.object_id
)
loop
insert into temp_to_cleanup_snapshot_branches(snapshot_id, branch_id)
values (snapshot_id, branch_id); -- this will possibly create
-- duplicates here (not a
-- problem)
end loop;
return;
end;
$$;
-- clean up 'simple' origin data
create or replace function cleanupmark_wrong_origin_visitst_and_fetch_history_for_cleanup()
returns void
language plpgsql
as $$
declare
origin_id origin.id%type;
begin
create table if not exists temp_to_cleanup_origin_visit(
origin bigint not null,
visit bigint not null
);
create table if not exists temp_to_cleanup_fetch_history(
id bigint not null,
origin bigint not null
);
-- clean up those origins' metadata
for origin_id in (
select id from origin where type='hg'
)
loop
delete from fetch_history where insert into temp_to_cleanup_origin=_visit(origin_id;, visit)
deleteselect origin, visit from origin_visit where origin=origin_id;
delete from occurrenceinsert into temp_to_cleanup_fetch_history where origin=(id, origin_id;)
deleteselect id, origin from occurrencefetch_history where origin=origin_id;
end loop;
return;
end;
$$;
-- mark wrong data for cleanup, following tables will be populated:
-- - temp_to_cleanup_release
-- - temp_to_cleanup_snapshot_branches
-- - temp_to_cleanup_origin_visit
-- - temp_to_cleanup_fetch_history
create or replace function cleanupmark_wrong_snapshots_and_releasesdata_for_cleanup()
returns void
language plpgsql
as $$
begin
-- data to perform mark_wrong_release_and_snapshot_for_clean upup();
create temporary table temp_to_cleanup_release(perform mark_wrong_origin_visit_and_fetch_history_for_cleanup();
id sha1_git not nullend;
$$;
-- cleanup
);create or replace function clean_wrong_data_up()
create temporary table temp_to_cleanup_snapshot_branches(returns void
snapshot_id bigint not null, branch_id bigint not nulllanguage sql volatile
);
perform mark_wrong_data_for_cleanup();as $$
delete from release where id in (
select distinct id
from temp_to_cleanup_release
);
delete from snapshot_branches sbs
using temp_to_cleanup_snapshot_branches t
where sbs.branch_id=t.branch_id and sbs.snapshot_id=t.snapshot_id;
-- this could fail if snapshot is also referencing other releases
delete from snapshot_branch where object_id in (
select distinct branch_id
from temp_to_cleanup_snapshot_branches
);
delete from origin_visit where snapshot_id in (
select distinct snapshot_id
from temp_to_cleanup_snapshot_branches
);
delete from snapshot where object_id in (
select distinct snapshot_id
from temp_to_cleanup_snapshot_branches
);
drop table temp_to_cleanup_release;delete from origin_visit ov
drop tableusing temp_to_cleanup_snapshot_branches;origin_visit t
end;
$$; where ov.origin=t.origin and ov.visit = t.origin;
create or replace function cleanup_wrong_data() delete from fetch_history fh
returns void using temp_to_cleanup_fetch_history t
language sql stable where fh.id=t.id and fh.origin=t.origin;
drop table temp_to_cleanup_release;
as $$ drop table temp_to_cleanup_snapshot_branches;
select * from cleanup_wrong_snapshots_and_releases();drop table temp_to_cleanup_origin_visit;
select * from drop table temp_to_cleanup_wrong_origin_visits();_fetch_history;
$$;
begin;
-- effective cleanup marking starts hereere (create table, populate them)
begin;select * from mark_wrong_data_for_cleanup();
-- effective cleanup starts here (drops table too)
select * from cleanup_wrong_data();_up();
drop function cleanupmark_wrong_data;a_for_cleanup();
drop function backupmark_wrong_data_tog_origin_visit_and_fetch_history_for_cleanup;();
drop function cleanupmark_wrong_release_and_snapshots_and_releases;_for_cleanup();
drop function clean_wrong_data_up();
commit;
--rollback;