Page MenuHomeSoftware Heritage
Paste P203

Clean up wrong googlecode origins (googlecode instead of eclipselabs or apache-extras) - T897 related
ActivePublic

Authored by ardumont on Dec 14 2017, 2:55 PM.
```
begin;
select count(*) from origin where type='svn';
create temporary table origins_to_cleanup(url text not null);
\copy origins_to_cleanup from './origins-to-cleanup';
create or replace function list_wrong_origins()
returns setof origin.id%type
language sql
stable
as $$
select o.id from origin o
inner join origins_to_cleanup oc
on o.url=oc.url
where o.type='svn'
$$;
create or replace function cleanup_wrong_origins()
returns void
language plpgsql
as $$
declare
origin_id origin.id%type;
begin
for origin_id in (select * from list_wrong_origins())
loop
delete from fetch_history where origin=origin_id;
delete from origin_visit where origin=origin_id;
delete from skipped_content where origin=origin_id;
delete from occurrence_history where origin=origin_id;
delete from occurrence where origin=origin_id;
delete from origin where id=origin_id;
end loop;
return;
end;
$$;
select count(*) from origin where type='svn';
select count(*) from list_wrong_origins();
select * from cleanup_wrong_origins();
-- should be 0
select count(*) from list_wrong_origins();
-- should be the total minus the wrong origins removed
select count(*) from origin where type='svn';
drop table origins_to_cleanup;
drop function list_wrong_origins();
drop function cleanup_wrong_origins();
rollback; -- replace with `commit` when ready
```
Run:
```
softwareheritage=>
softwareheritage=> select count(*) from origin where type='svn';
count
--------
577427
(1 row)
softwareheritage=>
softwareheritage=> create temporary table origins_to_cleanup(url text not null);
CREATE TABLE
softwareheritage=>
softwareheritage=> \copy origins_to_cleanup from './origins-to-cleanup';
COPY 1608
softwareheritage=>
softwareheritage=> create or replace function list_wrong_origins()
softwareheritage-> returns setof origin.id%type
softwareheritage-> language sql
softwareheritage-> stable
softwareheritage-> as $$
softwareheritage$> select o.id from origin o
softwareheritage$> inner join origins_to_cleanup oc
softwareheritage$> on o.url=oc.url
softwareheritage$> where o.type='svn'
softwareheritage$> $$;
NOTICE: type reference origin.id%TYPE converted to bigint
CREATE FUNCTION
softwareheritage=>
softwareheritage=>
softwareheritage=> create or replace function cleanup_wrong_origins()
softwareheritage-> returns void
softwareheritage-> language plpgsql
softwareheritage-> as $$
softwareheritage$> declare
softwareheritage$> origin_id origin.id%type;
softwareheritage$> begin
softwareheritage$> for origin_id in (select * from list_wrong_origins())
softwareheritage$> loop
softwareheritage$> delete from fetch_history where origin=origin_id;
softwareheritage$> delete from origin_visit where origin=origin_id;
softwareheritage$> delete from occurrence_history where origin=origin_id;
softwareheritage$> delete from occurrence where origin=origin_id;
softwareheritage$> delete from skipped_content where origin=origin_id;
softwareheritage$> delete from origin where id=origin_id;
softwareheritage$> end loop;
softwareheritage$> return;
softwareheritage$> end;
softwareheritage$> $$;
CREATE FUNCTION
softwareheritage=>
softwareheritage=> select count(*) from origin where type='svn';
count
--------
577427
(1 row)
softwareheritage=>
softwareheritage=> select count(*) from list_wrong_origins();
count
-------
1592
(1 row)
softwareheritage=>
softwareheritage=> select * from cleanup_wrong_origins();
cleanup_wrong_origins
-----------------------
(1 row)
softwareheritage=>
softwareheritage=> -- should be 0
softwareheritage=> select count(*) from list_wrong_origins();
count
-------
0
(1 row)
softwareheritage=>
softwareheritage=> -- should be the total minus the wrong origins removed
softwareheritage=> select count(*) from origin where type='svn';
count
--------
575835
(1 row)
softwareheritage=>
softwareheritage=> drop table origins_to_cleanup;
DROP TABLE
softwareheritage=>
softwareheritage=> drop function list_wrong_origins();
DROP FUNCTION
softwareheritage=>
softwareheritage=> drop function cleanup_wrong_origins();
DROP FUNCTION
softwareheritage=>
softwareheritage=> rollback;
ROLLBACK
```