Page MenuHomeSoftware Heritage
Paste P202

Clean up wrong origins (url is filled with the size instead of the actual origin url) - T896 related
ActivePublic

Authored by ardumont on Dec 13 2017, 6:55 PM.
```
begin;
create or replace function list_wrong_origins()
returns setof origin.id%type
language sql
stable
as $$
select id from origin where type='svn' and url like '0%'
union
select id from origin where type='svn' and url like '1%'
union
select id from origin where type='svn' and url like '2%'
union
select id from origin where type='svn' and url like '3%'
union
select id from origin where type='svn' and url like '4%'
union
select id from origin where type='svn' and url like '5%'
union
select id from origin where type='svn' and url like '6%'
union
select id from origin where type='svn' and url like '7%'
union
select id from origin where type='svn' and url like '8%'
union
select id from origin where type='svn' and url like '9%'
$$;
create or replace function cleanup_wrong_origins()
returns void
language plpgsql
as $$
declare
origin_id origin.id%type;
begin
for origin_id in (select * from list_wrong_origins())
loop
delete from fetch_history where origin=origin_id;
delete from origin_visit where origin=origin_id;
delete from occurrence_history where origin=origin_id;
delete from occurrence where origin=origin_id;
delete from origin where id=origin_id;
end loop;
return;
end;
$$;
select count(*) from origin where type='svn';
select count(*) from list_wrong_origins();
-- clean up
select * from cleanup_wrong_origins();
-- should be 0
select count(*) from list_wrong_origins();
-- should be the initial count minus the count of wrong origins
select count(*) from origin where type='svn';
drop function list_wrong_origins();
drop function cleanup_wrong_origins();
rollback;
```
Run ok:
```
softwareheritage=> begin;
BEGIN
softwareheritage=>
softwareheritage=> create or replace function list_wrong_origins()
softwareheritage-> returns setof origin.id%type
softwareheritage-> language sql
softwareheritage-> stable
softwareheritage-> as $$
softwareheritage$> select id from origin where type='svn' and url like '0%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '1%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '2%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '3%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '4%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '5%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '6%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '7%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '8%'
softwareheritage$> union
softwareheritage$> select id from origin where type='svn' and url like '9%'
softwareheritage$> $$;
NOTICE: type reference origin.id%TYPE converted to bigint
CREATE FUNCTION
softwareheritage=>
softwareheritage=> create or replace function cleanup_wrong_origins()
softwareheritage-> returns void
softwareheritage-> language plpgsql
softwareheritage-> as $$
softwareheritage$> declare
softwareheritage$> origin_id origin.id%type;
softwareheritage$> begin
softwareheritage$> for origin_id in (select * from list_wrong_origins())
softwareheritage$> loop
softwareheritage$> delete from fetch_history where origin=origin_id;
softwareheritage$> delete from origin_visit where origin=origin_id;
softwareheritage$> delete from occurrence_history where origin=origin_id;
softwareheritage$> delete from occurrence where origin=origin_id;
softwareheritage$> delete from origin where id=origin_id;
softwareheritage$> end loop;
softwareheritage$> return;
softwareheritage$> end;
softwareheritage$> $$;
CREATE FUNCTION
softwareheritage=>
softwareheritage=> select count(*) from origin where type='svn';
count
--------
580073
(1 row)
softwareheritage=>
softwareheritage=> select count(*) from list_wrong_origins();
count
-------
2646
(1 row)
softwareheritage=>
softwareheritage=> -- clean up
softwareheritage=> select * from cleanup_wrong_origins();
cleanup_wrong_origins
-----------------------
(1 row)
softwareheritage=>
softwareheritage=> -- should be 0
softwareheritage=> select count(*) from list_wrong_origins();
count
-------
0
(1 row)
softwareheritage=>
softwareheritage=> -- should be the initial count minus the count of wrong origins
softwareheritage=> select count(*) from origin where type='svn';
count
--------
577427
(1 row)
softwareheritage=>
softwareheritage=> drop function list_wrong_origins();
DROP FUNCTION
softwareheritage=>
softwareheritage=> drop function cleanup_wrong_origins();
DROP FUNCTION
softwareheritage=>
softwareheritage=> rollback;
ROLLBACK
```

Event Timeline

ardumont changed the title of this paste from Clean up wrong origins - T896 related to Clean up wrong origins (url is filled with the size instead of the actual origin url) - T896 related.Dec 14 2017, 2:50 PM
ardumont edited the content of this paste. (Show Details)