create or replace function backup_wrong_data_to_cleanup()
returns void
language plpgsql
as $$
declare
release_id release.id%type;
snapshot_id snapshot.object_id%type;
branch_id snapshot_branches.branch_id%type;
begin
for release_id, snapshot_id, branch_id in (
select distinct r.id, s.object_id, sbs.branch_id
from origin o
inner join origin_visit ov on (o.id=ov.origin and o.type='hg')
inner join snapshot s on ov.snapshot_id=s.object_id
inner join snapshot_branches sbs on s.object_id=sbs.snapshot_id
inner join snapshot_branch sb on sbs.branch_id=sb.object_id
inner join release r on (sb.target_type='release' and r.id=sb.target)
)
loop
-- reference the release to clean up later
insert into temp_to_cleanup_release(id)
values (release_id);
-- then cleaning up the snapshot
insert into temp_to_cleanup_snapshot_branches(snapshot_id, branch_id)
values (snapshot_id, branch_id);
end loop;
-- Note: if a snapshot is mixed with target of type release and
-- other stuff, it will not be identified by the prior loop. As
-- those needs to be cleaned up as well, looping again on all
-- snapshot from the same visits. This time to mark the remaining
-- snapshot as to be cleaned up. This is needed as the next
-- ingestion will result in different identifiers
for snapshot_id, branch_id in (
-- other stuffselect distinct s.object_id, it will be identified here and marked for cleansbs.branch_id
-- up (as it needs to otherwise, dangling snapshot).from origin o
-- As future ingestion run will end up computing a new hash.
inner join origin_visit ov on (o.id=ov.origin and o.type='hg')
-- For any other kind ofinner join snapshots (without mixed release), there s on ov.snapshot_id=s.object_id
-- is no need to clean those up as the snapshot identifier willinner join snapshot_branches sbs on s.object_id=sbs.snapshot_id
inner join snapshot_branch sb on sbs.branch_id=sb.object_id
)
loop
-- remain the same with future runs (hash identifier computationinsert into temp_to_cleanup_snapshot_branches(snapshot_id, branch_id)
-- is idempotent)values (snapshot_id, branch_id);
end loop;
return;
end;
$$;
create or replace function cleanup_wrong_origin_visits()
returns void
language plpgsql
as $$
declare
origin_id origin.id%type;
begin
-- clean up those origins' metadata
for origin_id in (
select id from origin where type='hg'
)
loop
delete from fetch_history where origin=origin_id;
delete from origin_visit where origin=origin_id;
delete from occurrence_history where origin=origin_id;
delete from occurrence where origin=origin_id;
end loop;
return;
end;
$$;
create or replace function cleanup_wrong_snapshots_and_releases()
returns void
language plpgsql
as $$
begin
-- data to clean up
create temporary table temp_to_cleanup_release(
id sha1_git not null
);
create temporary table temp_to_cleanup_snapshot_branches(
snapshot_id bigint not null, branch_id bigint not null
);
perform backup_wrong_data_to_cleanup();
delete from release where id in (
select distinct id
from temp_to_cleanup_release
);
delete from snapshot_branches sbs
using temp_to_cleanup_snapshot_branches t
where sbs.branch_id=t.branch_id and sbs.snapshot_id=t.snapshot_id;
-- this could fail if snapshot is also referencing other releases
delete from snapshot_branch where object_id in (
select distinct branch_id
from temp_to_cleanup_snapshot_branches
);
delete from origin_visit where snapshot_id in (
select distinct snapshot_id
from temp_to_cleanup_snapshot_branches
);
delete from snapshot where object_id in (
select distinct snapshot_id
from temp_to_cleanup_snapshot_branches
);
drop table temp_to_cleanup_release;
drop table temp_to_cleanup_snapshot_branches;
return;
end;
$$;
create or replace function cleanup_wrong_data()
returns void
language plpgsqlsql stable
as $$
begin
perform cleanup_wrong_snapshots_and_releases();
perforselect * from cleanup_wrong_origin_visitsnapshots_and_releases();
return;
end;select * from cleanup_wrong_origin_visits();
$$;
select * from cleanup_wrong_data();
drop function cleanup_wrong_data;
drop function backup_wrong_data_to_cleanup;
drop function cleanup_wrong_snapshots_and_releases;
--commit;
rollback;