diff --git a/debian/control b/debian/control index 3d3e988..32c29f3 100644 --- a/debian/control +++ b/debian/control @@ -1,32 +1,33 @@ Source: swh-loader-core Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-all, python3-nose, + python3-psutil, python3-retrying, python3-setuptools, python3-swh.core, python3-swh.model (>= 0.0.18~), python3-swh.storage (>= 0.0.97~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/60/ Package: python3-swh.loader.core Architecture: all Depends: python3-swh.core, python3-swh.model (>= 0.0.18~), python3-swh.storage (>= 0.0.97~), ${misc:Depends}, ${python3:Depends} Breaks: python3-swh.deposit.loader (<< 0.0.48~), python3-swh.loader.debian (<< 0.0.9~), python3-swh.loader.dir (<< 0.0.31~), python3-swh.loader.git (<< 0.0.36~), python3-swh.loader.mercurial (<< 0.0.3~), python3-swh.loader.svn (<< 0.0.35~), python3-swh.loader.tar (<< 0.0.33~) Description: Software Heritage Loader Core diff --git a/requirements.txt b/requirements.txt index b814c8e..0e002e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # Add here external Python modules dependencies, one per line. Module names # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html vcversioner retrying +psutil diff --git a/swh/loader/core/utils.py b/swh/loader/core/utils.py new file mode 100644 index 0000000..f1f43cc --- /dev/null +++ b/swh/loader/core/utils.py @@ -0,0 +1,47 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +import os +import shutil +import psutil + + +def clean_dangling_folders(dirpath, pattern_check, log=None): + """Clean up potential dangling temporary working folder rooted at + `dirpath`. Those folders must match a dedicated pattern and not + belonging to a live pid. + + Args: + dirpath (str): Path to check for dangling files + pattern_check (str): A dedicated pattern to check on first + level directory (e.g `swh.loader.mercurial.`, + `swh.loader.svn.`) + log (Logger): Optional logger + + """ + if not os.path.exists(dirpath): + return + for filename in os.listdir(dirpath): + try: + # pattern: `swh.loader.svn-pid.{noise}` + if pattern_check not in filename or \ + '-' not in filename: # silently ignore unknown patterns + continue + _, pid = filename.split('-') + pid = int(pid.split('.')[0]) + if psutil.pid_exists(pid): + if log: + log.debug('PID %s is live, skipping' % pid) + continue + path_to_cleanup = os.path.join(dirpath, filename) + # could be removed concurrently, so check before removal + if os.path.exists(path_to_cleanup): + shutil.rmtree(path_to_cleanup) + except Exception as e: + if log: + msg = 'Fail to clean dangling path %s: %s' % ( + path_to_cleanup, e) + log.warn(msg)