diff --git a/pytest.ini b/pytest.ini --- a/pytest.ini +++ b/pytest.ini @@ -1,2 +1,6 @@ [pytest] norecursedirs = docs + +markers = + db: marks tests as using a db (deselect with '-m "not db"') + fs: marks tests as using the filesystem (deselect with '-m "not fs"') diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -210,7 +210,8 @@ if not hasattr(self, '__save_data_path'): year = str(self.visit_date.year) - origin_url_hash = hashlib.sha1(self.origin['url']).hexdigest() + url = self.origin['url'].encode('utf-8') + origin_url_hash = hashlib.sha1(url).hexdigest() path = os.path.join( self.config['save_data_path'], diff --git a/swh/loader/core/tests/test_loader.py b/swh/loader/core/tests/test_loader.py --- a/swh/loader/core/tests/test_loader.py +++ b/swh/loader/core/tests/test_loader.py @@ -1,10 +1,12 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import datetime +import hashlib import logging +import pytest from swh.model.hashutil import hash_to_bytes @@ -63,7 +65,9 @@ class DummyBufferedLoader(DummyLoader, BufferedLoader): - pass + def __init__(self, *args, save_data_path=None, **kwargs): + super().__init__(*args, **kwargs) + self.__save_data_path = save_data_path class DummyBaseLoaderTest(BaseLoaderTest): @@ -344,3 +348,25 @@ assert isinstance(loader.log, logging.Logger) assert loader.log.name == \ 'some.logger.name' + + +@pytest.mark.fs +def test_loader_save_data_path(tmp_path): + loader = DummyBufferedLoader('some.logger.name.1') + url = 'http://bitbucket.org/something' + loader.origin = { + 'url': url, + } + loader.visit_date = datetime.datetime(year=2019, month=10, day=1) + loader.config = { + 'save_data_path': tmp_path, + } + assert not hasattr(loader, '__save_data_path') + + hash_url = hashlib.sha1(url.encode('utf-8')).hexdigest() + expected_save_path = str( + tmp_path / ('sha1:%s' % hash_url[0:2]) / hash_url / '2019' + ) + + save_path = loader.get_save_data_path() + assert save_path == expected_save_path diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -69,7 +69,7 @@ """ if self.debug: - self.log.warn('%s Will not pre-clean up temp dir %s' % ( + self.log.warning('%s Will not pre-clean up temp dir %s' % ( DEBUG_MODE, self.temp_directory )) return @@ -114,7 +114,7 @@ self.all_version_data = [] self.latest_timestamp = 0 # Conceled the data into one dictionary to eleminate the need of - # passing all the paramaters when required in some method + # passing all the parameters when required in some method self.package_details = { 'name': name, 'origin_url': origin_url, @@ -162,8 +162,8 @@ tarball_request = self._request(tarball_url, throw_error=False) if tarball_request.status_code == 404: - self.log.warn('Tarball url %s returns a 404 error.', - tarball_url) + self.log.warning('Tarball url %s returns a 404 error.', + tarball_url) self._visit_status = 'partial' # FIX ME: Do we need to mark it `partial` here continue @@ -468,7 +468,7 @@ """ if self.debug: - self.log.warn('%s Will not clean up temp dir %s' % ( + self.log.warning('%s Will not clean up temp dir %s' % ( DEBUG_MODE, self.temp_directory )) return