diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py --- a/swh/indexer/ctags.py +++ b/swh/indexer/ctags.py @@ -9,7 +9,7 @@ from swh.model import hashutil from .language import compute_language -from .indexer import ContentIndexer, DiskIndexer +from .indexer import ContentIndexer, write_to_temp # Options used to compute tags @@ -53,7 +53,7 @@ } -class CtagsIndexer(ContentIndexer, DiskIndexer): +class CtagsIndexer(ContentIndexer): CONFIG_BASE_FILENAME = 'indexer/ctags' ADDITIONAL_CONFIG = { @@ -119,17 +119,14 @@ } filename = hashutil.hash_to_hex(id) - content_path = self.write_to_temp( - filename=filename, - data=data) - - result = run_ctags(content_path, lang=ctags_lang) - ctags.update({ - 'ctags': list(result), - 'indexer_configuration_id': self.tool['id'], - }) - - self.cleanup(content_path) + with write_to_temp( + filename=filename, data=data, + working_directory=self.working_directory) as content_path: + result = run_ctags(content_path, lang=ctags_lang) + ctags.update({ + 'ctags': list(result), + 'indexer_configuration_id': self.tool['id'], + }) return ctags diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py --- a/swh/indexer/fossology_license.py +++ b/swh/indexer/fossology_license.py @@ -7,7 +7,7 @@ from swh.model import hashutil -from .indexer import ContentIndexer, ContentRangeIndexer, DiskIndexer +from .indexer import ContentIndexer, ContentRangeIndexer, write_to_temp def compute_license(path, log=None): @@ -89,19 +89,15 @@ """ assert isinstance(id, bytes) - content_path = self.write_to_temp( - filename=hashutil.hash_to_hex(id), # use the id as pathname - data=data) - - try: + with write_to_temp( + filename=hashutil.hash_to_hex(id), # use the id as pathname + data=data, + working_directory=self.working_directory) as content_path: properties = compute_license(path=content_path, log=self.log) properties.update({ 'id': id, 'indexer_configuration_id': self.tool['id'], }) - finally: - self.cleanup(content_path) - return properties def persist_index_computations(self, results, policy_update): @@ -124,7 +120,7 @@ class FossologyLicenseIndexer( - MixinFossologyLicenseIndexer, DiskIndexer, ContentIndexer): + MixinFossologyLicenseIndexer, ContentIndexer): """Indexer in charge of: - filtering out content already indexed @@ -146,7 +142,7 @@ class FossologyLicenseRangeIndexer( - MixinFossologyLicenseIndexer, DiskIndexer, ContentRangeIndexer): + MixinFossologyLicenseIndexer, ContentRangeIndexer): """FossologyLicense Range Indexer working on range of content identifiers. - filters out the non textual content diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py --- a/swh/indexer/indexer.py +++ b/swh/indexer/indexer.py @@ -11,6 +11,7 @@ import tempfile import datetime from copy import deepcopy +from contextlib import contextmanager from swh.scheduler import get_scheduler from swh.storage import get_storage @@ -22,48 +23,29 @@ from swh.core import utils -class DiskIndexer: - """Mixin intended to be used with other SomethingIndexer classes. +@contextmanager +def write_to_temp(filename, data, working_directory): + """Write the sha1's content in a temporary file. - Indexers inheriting from this class are a category of indexers - which needs the disk for their computations. + Args: + filename (str): one of sha1's many filenames + data (bytes): the sha1's content to write in temporary + file - Note: - This expects `self.working_directory` variable defined at - runtime. + Returns: + The path to the temporary file created. That file is + filled in with the raw content's data. """ - def write_to_temp(self, filename, data): - """Write the sha1's content in a temporary file. + os.makedirs(working_directory, exist_ok=True) + temp_dir = tempfile.mkdtemp(dir=working_directory) + content_path = os.path.join(temp_dir, filename) - Args: - filename (str): one of sha1's many filenames - data (bytes): the sha1's content to write in temporary - file - - Returns: - The path to the temporary file created. That file is - filled in with the raw content's data. - - """ - os.makedirs(self.working_directory, exist_ok=True) - temp_dir = tempfile.mkdtemp(dir=self.working_directory) - content_path = os.path.join(temp_dir, filename) - - with open(content_path, 'wb') as f: - f.write(data) + with open(content_path, 'wb') as f: + f.write(data) - return content_path - - def cleanup(self, content_path): - """Remove content_path from working directory. - - Args: - content_path (str): the file to remove - - """ - temp_dir = os.path.dirname(content_path) - shutil.rmtree(temp_dir) + yield content_path + shutil.rmtree(temp_dir) class BaseIndexer(SWHConfig, metaclass=abc.ABCMeta): diff --git a/swh/indexer/tests/test_ctags.py b/swh/indexer/tests/test_ctags.py --- a/swh/indexer/tests/test_ctags.py +++ b/swh/indexer/tests/test_ctags.py @@ -15,7 +15,7 @@ from swh.indexer.tests.utils import ( CommonContentIndexerTest, CommonIndexerWithErrorsTest, CommonIndexerNoTool, - SHA1_TO_CTAGS, NoDiskIndexer, BASE_TEST_CONFIG, + SHA1_TO_CTAGS, BASE_TEST_CONFIG, OBJ_STORAGE_DATA, fill_storage, fill_obj_storage ) @@ -78,7 +78,7 @@ } -class CtagsIndexerTest(NoDiskIndexer, InjectCtagsIndexer, CtagsIndexer): +class CtagsIndexerTest(InjectCtagsIndexer, CtagsIndexer): """Specific language whose configuration is enough to satisfy the indexing tests. """ @@ -99,7 +99,7 @@ 'haskell': 'haskell', 'bar': 'bar', }, - 'workdir': '/nowhere', + 'workdir': '/tmp', } @@ -168,7 +168,7 @@ def fake_check_output(cmd, *args, **kwargs): print(cmd) - id_ = cmd[-1] # when using NoDiskIndexer, path is replaced by id + id_ = cmd[-1].split('/')[-1] return '\n'.join( json.dumps({'language': ctag['lang'], **ctag}) for ctag in SHA1_TO_CTAGS[id_]) diff --git a/swh/indexer/tests/test_fossology_license.py b/swh/indexer/tests/test_fossology_license.py --- a/swh/indexer/tests/test_fossology_license.py +++ b/swh/indexer/tests/test_fossology_license.py @@ -15,7 +15,7 @@ from swh.indexer.tests.utils import ( SHA1_TO_LICENSES, CommonContentIndexerTest, CommonContentIndexerRangeTest, - CommonIndexerWithErrorsTest, CommonIndexerNoTool, NoDiskIndexer, + CommonIndexerWithErrorsTest, CommonIndexerNoTool, BASE_TEST_CONFIG, fill_storage, fill_obj_storage ) @@ -49,13 +49,14 @@ """ if isinstance(id, bytes): path = path.decode('utf-8') + # path is something like /tmp/tmpXXX/ so we keep only the sha1 part + path = path.split('/')[-1] return { 'licenses': SHA1_TO_LICENSES.get(path) } -class FossologyLicenseTestIndexer( - NoDiskIndexer, FossologyLicenseIndexer): +class FossologyLicenseTestIndexer(FossologyLicenseIndexer): """Specific fossology license whose configuration is enough to satisfy the indexing checks. @@ -63,7 +64,7 @@ def parse_config_file(self, *args, **kwargs): return { **BASE_TEST_CONFIG, - 'workdir': '/nowhere', + 'workdir': '/tmp', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', @@ -123,15 +124,14 @@ fossology_license.compute_license = self.orig_compute_license -class FossologyLicenseRangeIndexerTest( - NoDiskIndexer, FossologyLicenseRangeIndexer): +class FossologyLicenseRangeIndexerTest(FossologyLicenseRangeIndexer): """Testing the range indexer on fossology license. """ def parse_config_file(self, *args, **kwargs): return { **BASE_TEST_CONFIG, - 'workdir': '/nowhere', + 'workdir': '/tmp', 'tools': { 'name': 'nomos', 'version': '3.1.0rc2-31-ga2cbb8c', diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -658,16 +658,3 @@ # then self.assertFalse(actual_results) - - -class NoDiskIndexer: - """Mixin to override the DiskIndexer behavior avoiding side-effects in - tests. - - """ - - def write_to_temp(self, filename, data): # noop - return filename - - def cleanup(self, content_path): # noop - return None