Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Paste
P202
Clean up wrong origins (url is filled with the size instead of the actual origin url) - T896 related
Active
Public
Actions
Authored by
ardumont
on Dec 13 2017, 6:55 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
Storage manager
Origin-GoogleCode
Subscribers
None
```
begin
;
create
or
replace
function
list_wrong_origins
()
returns
setof
origin
.
id
%
type
language
sql
stable
as
$$
select
id
from
origin
where
type
=
'svn'
and
url
like
'0%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'1%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'2%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'3%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'4%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'5%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'6%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'7%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'8%'
union
select
id
from
origin
where
type
=
'svn'
and
url
like
'9%'
$$
;
create
or
replace
function
cleanup_wrong_origins
()
returns
void
language
plpgsql
as
$$
declare
origin_id
origin
.
id
%
type
;
begin
for
origin_id
in
(
select
*
from
list_wrong_origins
())
loop
delete
from
fetch_history
where
origin
=
origin_id
;
delete
from
origin_visit
where
origin
=
origin_id
;
delete
from
occurrence_history
where
origin
=
origin_id
;
delete
from
occurrence
where
origin
=
origin_id
;
delete
from
origin
where
id
=
origin_id
;
end
loop
;
return
;
end
;
$$
;
select
count
(
*
)
from
origin
where
type
=
'svn'
;
select
count
(
*
)
from
list_wrong_origins
();
-- clean up
select
*
from
cleanup_wrong_origins
();
-- should be 0
select
count
(
*
)
from
list_wrong_origins
();
-- should be the initial count minus the count of wrong origins
select
count
(
*
)
from
origin
where
type
=
'svn'
;
drop
function
list_wrong_origins
();
drop
function
cleanup_wrong_origins
();
rollback
;
```
Run
ok
:
```
softwareheritage
=>
begin
;
BEGIN
softwareheritage
=>
softwareheritage
=>
create
or
replace
function
list_wrong_origins
()
softwareheritage
->
returns
setof
origin
.
id
%
type
softwareheritage
->
language
sql
softwareheritage
->
stable
softwareheritage
->
as
$$
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'0%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'1%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'2%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'3%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'4%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'5%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'6%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'7%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'8%'
softwareheritage$
>
union
softwareheritage$
>
select
id
from
origin
where
type
=
'svn'
and
url
like
'9%'
softwareheritage$
>
$$
;
NOTICE
:
type
reference
origin
.
id
%
TYPE
converted
to
bigint
CREATE
FUNCTION
softwareheritage
=>
softwareheritage
=>
create
or
replace
function
cleanup_wrong_origins
()
softwareheritage
->
returns
void
softwareheritage
->
language
plpgsql
softwareheritage
->
as
$$
softwareheritage$
>
declare
softwareheritage$
>
origin_id
origin
.
id
%
type
;
softwareheritage$
>
begin
softwareheritage$
>
for
origin_id
in
(
select
*
from
list_wrong_origins
())
softwareheritage$
>
loop
softwareheritage$
>
delete
from
fetch_history
where
origin
=
origin_id
;
softwareheritage$
>
delete
from
origin_visit
where
origin
=
origin_id
;
softwareheritage$
>
delete
from
occurrence_history
where
origin
=
origin_id
;
softwareheritage$
>
delete
from
occurrence
where
origin
=
origin_id
;
softwareheritage$
>
delete
from
origin
where
id
=
origin_id
;
softwareheritage$
>
end
loop
;
softwareheritage$
>
return
;
softwareheritage$
>
end
;
softwareheritage$
>
$$
;
CREATE
FUNCTION
softwareheritage
=>
softwareheritage
=>
select
count
(
*
)
from
origin
where
type
=
'svn'
;
count
--------
580073
(
1
row
)
softwareheritage
=>
softwareheritage
=>
select
count
(
*
)
from
list_wrong_origins
();
count
-------
2646
(
1
row
)
softwareheritage
=>
softwareheritage
=>
-- clean up
softwareheritage
=>
select
*
from
cleanup_wrong_origins
();
cleanup_wrong_origins
-----------------------
(
1
row
)
softwareheritage
=>
softwareheritage
=>
-- should be 0
softwareheritage
=>
select
count
(
*
)
from
list_wrong_origins
();
count
-------
0
(
1
row
)
softwareheritage
=>
softwareheritage
=>
-- should be the initial count minus the count of wrong origins
softwareheritage
=>
select
count
(
*
)
from
origin
where
type
=
'svn'
;
count
--------
577427
(
1
row
)
softwareheritage
=>
softwareheritage
=>
drop
function
list_wrong_origins
();
DROP
FUNCTION
softwareheritage
=>
softwareheritage
=>
drop
function
cleanup_wrong_origins
();
DROP
FUNCTION
softwareheritage
=>
softwareheritage
=>
rollback
;
ROLLBACK
```
Event Timeline
ardumont
created this paste.
Dec 13 2017, 6:55 PM
2017-12-13 18:55:16 (UTC+1)
ardumont
added a project:
Origin-GoogleCode
.
Dec 13 2017, 6:57 PM
2017-12-13 18:57:22 (UTC+1)
ardumont
mentioned this in
T896: Clean up wrong origins
.
Dec 14 2017, 12:06 PM
2017-12-14 12:06:15 (UTC+1)
ardumont
changed the title of this paste from
Clean up wrong origins - T896 related
to
Clean up wrong origins (url is filled with the size instead of the actual origin url) - T896 related
.
Dec 14 2017, 2:50 PM
2017-12-14 14:50:55 (UTC+1)
ardumont
edited the content of this paste.
(Show Details)
Log In to Comment