Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8391961
fix_revisions.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
1 KB
Subscribers
None
fix_revisions.py
View Options
#!/usr/bin/env python3
import
logging
import
pickle
import
os
import
sys
from
.reader
import
GitCommitRemoteReader
def
get_revisions_and_origins_from_file
(
filename
):
revs
=
{}
with
open
(
filename
,
'r'
)
as
f
:
for
line
in
f
:
data
=
line
.
strip
()
.
split
()
revision
,
origins
=
data
[
0
],
data
[
1
:]
revs
[
revision
]
=
set
(
origins
)
return
revs
def
revisions_from_origin
(
origin_url
):
reader
=
GitCommitRemoteReader
()
return
{
id
.
decode
():
revision
for
id
,
revision
in
reader
.
load
(
origin_url
)
.
items
()
}
def
dump_to_file
(
filename
,
data
):
with
open
(
'
%s
.tmp'
%
filename
,
'wb'
)
as
f
:
pickle
.
dump
(
data
,
f
)
os
.
rename
(
'
%s
.tmp'
%
filename
,
filename
)
if
__name__
==
'__main__'
:
filename
=
sys
.
argv
[
1
]
snapshot
=
sys
.
argv
[
2
]
if
os
.
path
.
exists
(
snapshot
):
with
open
(
snapshot
,
'rb'
)
as
f
:
revs
,
parsed_revs
,
origins
=
pickle
.
load
(
f
)
else
:
revs
=
get_revisions_and_origins_from_file
(
filename
)
origins
=
set
()
for
urls
in
revs
.
values
():
origins
|=
{
url
for
url
in
urls
if
url
.
startswith
(
'https://github.com/'
)}
parsed_revs
=
{}
dump_to_file
(
snapshot
,
[
revs
,
parsed_revs
,
origins
])
ctr
=
0
while
origins
:
print
(
"
%s
origins,
%s
/
%s
revs remaining"
%
(
len
(
origins
),
len
(
revs
)
-
len
(
parsed_revs
),
len
(
revs
)
))
ctr
+=
1
origin_url
=
origins
.
pop
()
try
:
origin_revs
=
revisions_from_origin
(
origin_url
)
except
Exception
as
e
:
logging
.
exception
(
e
)
continue
for
id
,
revision
in
origin_revs
.
items
():
if
id
in
revs
and
id
not
in
parsed_revs
:
parsed_revs
[
id
]
=
revision
if
ctr
>=
10
:
ctr
=
0
dump_to_file
(
snapshot
,
[
revs
,
parsed_revs
,
origins
])
dump_to_file
(
snapshot
,
[
revs
,
parsed_revs
,
origins
])
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jun 4 2025, 6:52 PM (11 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3398848
Attached To
rDSNIP Code snippets
Event Timeline
Log In to Comment