diff --git a/swh/loader/core/converters.py b/swh/loader/core/converters.py index e1b8b53..cc1c30c 100644 --- a/swh/loader/core/converters.py +++ b/swh/loader/core/converters.py @@ -1,41 +1,46 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Convert objects to dictionaries suitable for swh.storage""" from swh.model.hashutil import hash_to_hex def content_for_storage(content, log=None, max_content_size=None, origin_url=None): """Prepare content to be ready for storage Note: - 'data' is returned only if max_content_size is not reached. Returns: content with added data (or reason for being missing) """ ret = content.copy() if max_content_size and ret['length'] > max_content_size: if log: log.info('Skipping content %s, too large (%s > %s)' % (hash_to_hex(content['sha1_git']), ret['length'], max_content_size)) ret.pop('data', None) ret.update({'status': 'absent', 'reason': 'Content too large', 'origin': origin_url}) return ret if 'data' not in ret: ret['data'] = open(ret['path'], 'rb').read() + # Extra keys added by swh.model.from_disk, that are not accepted + # by swh-storage + ret.pop('perms', None) + ret.pop('path', None) + ret['status'] = 'visible' return ret diff --git a/swh/loader/core/tests/test_converters.py b/swh/loader/core/tests/test_converters.py index 15dd422..ae7a04d 100644 --- a/swh/loader/core/tests/test_converters.py +++ b/swh/loader/core/tests/test_converters.py @@ -1,99 +1,102 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os import tempfile import unittest from unittest.mock import Mock from swh.loader.core import converters from swh.model.from_disk import Content def tmpfile_with_content(fromdir, contentfile): """Create a temporary file with content contentfile in directory fromdir. """ tmpfilepath = tempfile.mktemp( suffix='.swh', prefix='tmp-file-for-test', dir=fromdir) with open(tmpfilepath, 'wb') as f: f.write(contentfile) return tmpfilepath class TestContentForStorage(unittest.TestCase): maxDiff = None def setUp(self): super().setUpClass() self.tmpdir = tempfile.TemporaryDirectory( prefix='test-swh-loader-core.' ) def tearDown(self): self.tmpdir.cleanup() def test_content_for_storage_path(self): # given data = b'temp file for testing content storage conversion' tmpfile = tmpfile_with_content(self.tmpdir.name, data) obj = Content.from_file(path=os.fsdecode(tmpfile), save_path=True).get_data() expected_content = obj.copy() expected_content['data'] = data expected_content['status'] = 'visible' + del expected_content['path'] + del expected_content['perms'] # when content = converters.content_for_storage(obj) # then self.assertEqual(content, expected_content) def test_content_for_storage_data(self): # given data = b'temp file for testing content storage conversion' obj = Content.from_bytes(data=data, mode=0o100644).get_data() expected_content = obj.copy() expected_content['status'] = 'visible' + del expected_content['perms'] # when content = converters.content_for_storage(obj) # then self.assertEqual(content, expected_content) def test_content_for_storage_too_long(self): # given data = b'temp file for testing content storage conversion' obj = Content.from_bytes(data=data, mode=0o100644).get_data() log = Mock() expected_content = obj.copy() expected_content.pop('data') expected_content['status'] = 'absent' expected_content['origin'] = 'http://example.org/' expected_content['reason'] = 'Content too large' # when content = converters.content_for_storage( obj, log, max_content_size=len(data) - 1, origin_url=expected_content['origin'], ) # then self.assertEqual(content, expected_content) self.assertTrue(log.info.called) self.assertIn('Skipping content', log.info.call_args[0][0]) self.assertIn('too large', log.info.call_args[0][0])