Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9338347
utils.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
utils.py
View Options
# Copyright (C) 2018-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from
datetime
import
datetime
,
timezone
import
io
import
os
from
pathlib
import
Path
import
shutil
import
signal
from
subprocess
import
PIPE
,
Popen
import
time
import
traceback
from
typing
import
Callable
,
Iterable
,
Optional
,
Union
from
billiard
import
Process
,
Queue
# type: ignore
from
dateutil.parser
import
parse
import
psutil
from
swh.loader.exception
import
MissingOptionalDependency
from
swh.model.hashutil
import
MultiHash
def
clean_dangling_folders
(
dirpath
:
str
,
pattern_check
:
str
,
log
=
None
)
->
None
:
"""Clean up potential dangling temporary working folder rooted at `dirpath`. Those
folders must match a dedicated pattern and not belonging to a live pid.
Args:
dirpath: Path to check for dangling files
pattern_check: A dedicated pattern to check on first level directory (e.g
`swh.loader.mercurial.`, `swh.loader.svn.`)
log (Logger): Optional logger
"""
if
not
os
.
path
.
exists
(
dirpath
):
return
for
filename
in
os
.
listdir
(
dirpath
):
path_to_cleanup
=
os
.
path
.
join
(
dirpath
,
filename
)
try
:
# pattern: `swh.loader.{loader-type}-pid.{noise}`
if
(
pattern_check
not
in
filename
or
"-"
not
in
filename
):
# silently ignore unknown patterns
continue
_
,
pid_
=
filename
.
split
(
"-"
)
pid
=
int
(
pid_
.
split
(
"."
)[
0
])
if
psutil
.
pid_exists
(
pid
):
if
log
:
log
.
debug
(
"PID
%s
is live, skipping"
,
pid
)
continue
# could be removed concurrently, so check before removal
if
os
.
path
.
exists
(
path_to_cleanup
):
shutil
.
rmtree
(
path_to_cleanup
)
except
Exception
as
e
:
if
log
:
log
.
warn
(
"Fail to clean dangling path
%s
:
%s
"
,
path_to_cleanup
,
e
)
class
CloneTimeout
(
Exception
):
pass
class
CloneFailure
(
Exception
):
pass
def
_clone_task
(
clone_func
:
Callable
[[],
None
],
errors
:
Queue
)
->
None
:
try
:
clone_func
()
except
Exception
as
e
:
exc_buffer
=
io
.
StringIO
()
traceback
.
print_exc
(
file
=
exc_buffer
)
errors
.
put_nowait
(
exc_buffer
.
getvalue
())
raise
e
def
clone_with_timeout
(
src
:
str
,
dest
:
str
,
clone_func
:
Callable
[[],
None
],
timeout
:
float
)
->
None
:
"""Clone a repository with timeout.
Args:
src: clone source
dest: clone destination
clone_func: callable that does the actual cloning
timeout: timeout in seconds
"""
errors
:
Queue
=
Queue
()
process
=
Process
(
target
=
_clone_task
,
args
=
(
clone_func
,
errors
))
process
.
start
()
process
.
join
(
timeout
)
if
process
.
is_alive
():
process
.
terminate
()
# Give it literally a second (in successive steps of 0.1 second),
# then kill it.
# Can't use `process.join(1)` here, billiard appears to be bugged
# https://github.com/celery/billiard/issues/270
killed
=
False
for
_
in
range
(
10
):
time
.
sleep
(
0.1
)
if
not
process
.
is_alive
():
break
else
:
killed
=
True
os
.
kill
(
process
.
pid
,
signal
.
SIGKILL
)
raise
CloneTimeout
(
src
,
timeout
,
killed
)
if
not
errors
.
empty
():
raise
CloneFailure
(
src
,
dest
,
errors
.
get
())
def
parse_visit_date
(
visit_date
:
Optional
[
Union
[
datetime
,
str
]])
->
Optional
[
datetime
]:
"""Convert visit date from either None, a string or a datetime to either None or
datetime.
"""
if
visit_date
is
None
:
return
None
if
isinstance
(
visit_date
,
datetime
):
return
visit_date
if
visit_date
==
"now"
:
return
datetime
.
now
(
tz
=
timezone
.
utc
)
if
isinstance
(
visit_date
,
str
):
return
parse
(
visit_date
)
raise
ValueError
(
f
"invalid visit date {visit_date!r}"
)
def
nix_hashes
(
filepath
:
Path
,
hash_names
:
Iterable
[
str
])
->
MultiHash
:
"""Compute nix-store hashes on filepath.
Raises:
FileNotFoundError in case the nix-store command is not available on the system.
"""
NIX_STORE
=
shutil
.
which
(
"nix-store"
)
if
NIX_STORE
is
None
:
raise
MissingOptionalDependency
(
"nix-store"
)
multi_hash
=
MultiHash
(
hash_names
=
hash_names
)
command
=
[
NIX_STORE
,
"--dump"
,
str
(
filepath
)]
with
Popen
(
command
,
stdout
=
PIPE
)
as
proc
:
assert
proc
.
stdout
is
not
None
for
chunk
in
proc
.
stdout
:
multi_hash
.
update
(
chunk
)
return
multi_hash
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jul 4 2025, 8:43 AM (6 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3298136
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment