Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
from abc import abstractmethod | from abc import abstractmethod | ||||
from swh.loader.core.loader import BufferedLoader | from swh.loader.core.loader import BufferedLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.storage.algos.snapshot import snapshot_get_all_branches | |||||
from tempfile import mkdtemp | from tempfile import mkdtemp | ||||
DEBUG_MODE = '** DEBUG MODE **' | DEBUG_MODE = '** DEBUG MODE **' | ||||
class PackageLoader(BufferedLoader): | class PackageLoader(BufferedLoader): | ||||
"""Package loader class for package manager loader | """Package loader class for package manager loader | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | def fetch_metadata(self, kwargs): | ||||
time_modified: 1599887203 }, | time_modified: 1599887203 }, | ||||
... | ... | ||||
] | ] | ||||
} | } | ||||
""" | """ | ||||
pass | pass | ||||
def pre_cleanup(self): | |||||
"""To prevent disk explosion if some other workers exploded | |||||
in mid-air (OOM killed), we try and clean up dangling files. | |||||
""" | |||||
if self.debug: | |||||
self.log.warn('%s Will not pre-clean up temp dir %s' % ( | |||||
DEBUG_MODE, self.temp_directory | |||||
)) | |||||
return | |||||
clean_dangling_folders(self.config['temp_directory'], | |||||
pattern_check=self.TEMPORARY_DIR_PREFIX_PATTERN, | |||||
log=self.log) | |||||
def prepare_origin_visit(self, *args, **kwargs): | def prepare_origin_visit(self, *args, **kwargs): | ||||
"""Prepare package visit. | """Prepare package visit. | ||||
Args: | Args: | ||||
**kwargs: Arbitrary keyword arguments passed by the lister. | **kwargs: Arbitrary keyword arguments passed by the lister. | ||||
""" | """ | ||||
# reset statuses | # reset statuses | ||||
Show All 9 Lines | def set_origin(self): | ||||
"""Assign value to self.origin. | """Assign value to self.origin. | ||||
""" | """ | ||||
self.origin = { | self.origin = { | ||||
'url': self.package_details['origin_url'], | 'url': self.package_details['origin_url'], | ||||
'type': self.loader_name, | 'type': self.loader_name, | ||||
} | } | ||||
def pre_cleanup(self): | def prepare(self, *args, **kwargs): | ||||
"""To prevent disk explosion if some other workers exploded | """Prepare effective loading of source tarballs for a package manager | ||||
in mid-air (OOM killed), we try and clean up dangling files. | package. | ||||
Args: | |||||
**kwargs: Arbitrary keyword arguments passed by the lister. | |||||
""" | """ | ||||
if self.debug: | self.package_contents = [] | ||||
self.log.warn('%s Will not pre-clean up temp dir %s' % ( | self.package_directories = [] | ||||
DEBUG_MODE, self.temp_directory | self.package_revisions = [] | ||||
)) | self.package_source_data = [] | ||||
return | self.package_temp_dir = os.path.join(self.temp_directory, | ||||
clean_dangling_folders(self.config['temp_directory'], | self.package_details['name']) | ||||
pattern_check=self.TEMPORARY_DIR_PREFIX_PATTERN, | |||||
log=self.log) | last_snapshot = self.last_snapshot() | ||||
self.known_versions = self.get_known_versions(last_snapshot) | |||||
self.new_versions = \ | |||||
self.prepare_package_versions(self.package_details['tarballs'], | |||||
self.known_versions) | |||||
def last_snapshot(self): | |||||
"""Retrieve the last snapshot of the package if any. | |||||
""" | |||||
visit = self.storage.origin_visit_get_latest( | |||||
self.origin['url'], require_snapshot=True) | |||||
if visit: | |||||
return snapshot_get_all_branches(self.storage, visit['snapshot']) |