Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/tests/test_converters.py
# Copyright (C) 2015-2017 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import tempfile | import tempfile | ||||
import unittest | |||||
from unittest.mock import Mock | |||||
from swh.loader.core import converters | from swh.loader.core import converters | ||||
from swh.model.from_disk import Content | from swh.model import from_disk | ||||
from swh.model.model import Content, SkippedContent | |||||
def tmpfile_with_content(fromdir, contentfile): | def tmpfile_with_content(fromdir, contentfile): | ||||
"""Create a temporary file with content contentfile in directory fromdir. | """Create a temporary file with content contentfile in directory fromdir. | ||||
""" | """ | ||||
tmpfilepath = tempfile.mktemp( | tmpfilepath = tempfile.mktemp( | ||||
suffix='.swh', | suffix='.swh', | ||||
prefix='tmp-file-for-test', | prefix='tmp-file-for-test', | ||||
dir=fromdir) | dir=str(fromdir)) | ||||
with open(tmpfilepath, 'wb') as f: | with open(tmpfilepath, 'wb') as f: | ||||
f.write(contentfile) | f.write(contentfile) | ||||
return tmpfilepath | return tmpfilepath | ||||
class TestContentForStorage(unittest.TestCase): | def test_content_for_storage_path(tmpdir): | ||||
maxDiff = None | |||||
def setUp(self): | |||||
super().setUpClass() | |||||
self.tmpdir = tempfile.TemporaryDirectory( | |||||
prefix='test-swh-loader-core.' | |||||
) | |||||
def tearDown(self): | |||||
self.tmpdir.cleanup() | |||||
def test_content_for_storage_path(self): | |||||
# given | # given | ||||
data = b'temp file for testing content storage conversion' | data = b'temp file for testing content storage conversion' | ||||
tmpfile = tmpfile_with_content(self.tmpdir.name, data) | tmpfile = tmpfile_with_content(tmpdir, data) | ||||
obj = Content.from_file(path=os.fsdecode(tmpfile), | obj = from_disk.Content.from_file(path=os.fsdecode(tmpfile), | ||||
save_path=True).get_data() | save_path=True).get_data() | ||||
expected_content = obj.copy() | expected_content = obj.copy() | ||||
expected_content['data'] = data | expected_content['data'] = data | ||||
expected_content['status'] = 'visible' | expected_content['status'] = 'visible' | ||||
del expected_content['path'] | del expected_content['path'] | ||||
del expected_content['perms'] | del expected_content['perms'] | ||||
expected_content = Content.from_dict(expected_content) | |||||
# when | # when | ||||
content = converters.content_for_storage(obj) | content = converters.content_for_storage(obj) | ||||
# then | # then | ||||
self.assertEqual(content, expected_content) | assert content == expected_content | ||||
def test_content_for_storage_data(self): | def test_content_for_storage_data(tmpdir): | ||||
# given | # given | ||||
data = b'temp file for testing content storage conversion' | data = b'temp file for testing content storage conversion' | ||||
obj = from_disk.Content.from_bytes(data=data, mode=0o100644).get_data() | |||||
obj = Content.from_bytes(data=data, mode=0o100644).get_data() | del obj['perms'] | ||||
expected_content = obj.copy() | expected_content = obj.copy() | ||||
expected_content['status'] = 'visible' | expected_content['status'] = 'visible' | ||||
del expected_content['perms'] | expected_content = Content.from_dict(expected_content) | ||||
# when | # when | ||||
content = converters.content_for_storage(obj) | content = converters.content_for_storage(obj) | ||||
# then | # then | ||||
self.assertEqual(content, expected_content) | assert content == expected_content | ||||
def test_content_for_storage_too_long(self): | def test_content_for_storage_too_long(tmpdir): | ||||
# given | # given | ||||
data = b'temp file for testing content storage conversion' | data = b'temp file for testing content storage conversion' | ||||
obj = from_disk.Content.from_bytes(data=data, mode=0o100644).get_data() | |||||
obj = Content.from_bytes(data=data, mode=0o100644).get_data() | del obj['perms'] | ||||
log = Mock() | |||||
expected_content = obj.copy() | expected_content = obj.copy() | ||||
expected_content.pop('data') | expected_content.pop('data') | ||||
expected_content['status'] = 'absent' | expected_content['status'] = 'absent' | ||||
expected_content['origin'] = 'http://example.org/' | expected_content['origin'] = 'http://example.org/' | ||||
expected_content['reason'] = 'Content too large' | expected_content['reason'] = 'Content too large' | ||||
expected_content = SkippedContent.from_dict(expected_content) | |||||
# when | # when | ||||
content = converters.content_for_storage( | content = converters.content_for_storage( | ||||
obj, log, max_content_size=len(data) - 1, | obj, max_content_size=len(data) - 1, | ||||
origin_url=expected_content['origin'], | origin_url=expected_content.origin, | ||||
) | ) | ||||
# then | # then | ||||
self.assertEqual(content, expected_content) | assert content == expected_content | ||||
self.assertTrue(log.info.called) | |||||
self.assertIn('Skipping content', log.info.call_args[0][0]) | |||||
self.assertIn('too large', log.info.call_args[0][0]) | def test_prepare_contents(tmpdir): | ||||
contents = [] | |||||
data_fine = b'tmp file fine' | |||||
max_size = len(data_fine) | |||||
for data in [b'tmp file with too much data', data_fine]: | |||||
obj = from_disk.Content.from_bytes(data=data, mode=0o100644).get_data() | |||||
del obj['perms'] | |||||
contents.append(obj) | |||||
actual_contents, actual_skipped_contents = converters.prepare_contents( | |||||
contents, max_content_size=max_size, origin_url='some-origin') | |||||
assert len(actual_contents) == 1 | |||||
assert len(actual_skipped_contents) == 1 | |||||
actual_content = actual_contents[0] | |||||
assert 'reason' not in actual_content | |||||
assert actual_content['status'] == 'visible' | |||||
actual_skipped_content = actual_skipped_contents[0] | |||||
assert actual_skipped_content['reason'] == 'Content too large' | |||||
assert actual_skipped_content['status'] == 'absent' | |||||
assert actual_skipped_content['origin'] == 'some-origin' |