Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_mimetype.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import unittest | import unittest | ||||
import logging | |||||
from unittest.mock import patch | from unittest.mock import patch | ||||
from swh.indexer.mimetype import ( | from swh.indexer.mimetype import ( | ||||
MimetypeIndexer, MimetypeRangeIndexer, compute_mimetype_encoding | MimetypeIndexer, MimetypeRangeIndexer, compute_mimetype_encoding | ||||
) | ) | ||||
from swh.indexer.tests.test_utils import ( | from swh.indexer.tests.test_utils import ( | ||||
MockObjStorage, BasicMockStorage, BasicMockIndexerStorage, | |||||
CommonContentIndexerTest, CommonContentIndexerRangeTest, | CommonContentIndexerTest, CommonContentIndexerRangeTest, | ||||
CommonIndexerWithErrorsTest, CommonIndexerNoTool, | CommonIndexerWithErrorsTest, CommonIndexerNoTool, | ||||
BASE_TEST_CONFIG | BASE_TEST_CONFIG, fill_storage, fill_obj_storage | ||||
) | ) | ||||
class FakeMagicResult: | class FakeMagicResult: | ||||
def __init__(self, mimetype, encoding): | def __init__(self, mimetype, encoding): | ||||
self.mime_type = mimetype | self.mime_type = mimetype | ||||
self.encoding = encoding | self.encoding = encoding | ||||
Show All 28 Lines | def parse_config_file(self, *args, **kwargs): | ||||
'version': '1:5.30-1+deb9u1', | 'version': '1:5.30-1+deb9u1', | ||||
'configuration': { | 'configuration': { | ||||
"type": "library", | "type": "library", | ||||
"debian-package": "python3-magic" | "debian-package": "python3-magic" | ||||
}, | }, | ||||
}, | }, | ||||
} | } | ||||
def prepare(self): | |||||
super().prepare() | |||||
self.idx_storage = BasicMockIndexerStorage() | |||||
self.log = logging.getLogger('swh.indexer') | |||||
self.objstorage = MockObjStorage() | |||||
class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): | class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase): | ||||
"""Mimetype indexer test scenarios: | """Mimetype indexer test scenarios: | ||||
- Known sha1s in the input list have their data indexed | - Known sha1s in the input list have their data indexed | ||||
- Unknown sha1 in the input list are not indexed | - Unknown sha1 in the input list are not indexed | ||||
""" | """ | ||||
def get_indexer_results(self, ids): | |||||
yield from self.idx_storage.content_mimetype_get(ids) | |||||
def setUp(self): | def setUp(self): | ||||
self.indexer = MimetypeTestIndexer() | self.indexer = MimetypeTestIndexer() | ||||
self.idx_storage = self.indexer.idx_storage | |||||
self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' | self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' | ||||
self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' | self.id1 = '688a5ef812c53907562fe379d4b3851e69c7cb15' | ||||
self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' | self.id2 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' | ||||
tool_id = self.indexer.tool['id'] | tool_id = self.indexer.tool['id'] | ||||
self.expected_results = { | self.expected_results = { | ||||
self.id0: { | self.id0: { | ||||
'id': self.id0, | 'id': self.id0, | ||||
Show All 30 Lines | def parse_config_file(self, *args, **kwargs): | ||||
'configuration': { | 'configuration': { | ||||
"type": "library", | "type": "library", | ||||
"debian-package": "python3-magic" | "debian-package": "python3-magic" | ||||
}, | }, | ||||
}, | }, | ||||
'write_batch_size': 100, | 'write_batch_size': 100, | ||||
} | } | ||||
def prepare(self): | |||||
super().prepare() | |||||
self.idx_storage = BasicMockIndexerStorage() | |||||
# this hardcodes some contents, will use this to setup the storage | |||||
self.objstorage = MockObjStorage() | |||||
# sync objstorage and storage | |||||
contents = [{'sha1': c_id} for c_id in self.objstorage] | |||||
self.storage = BasicMockStorage(contents) | |||||
class TestMimetypeRangeIndexer( | class TestMimetypeRangeIndexer( | ||||
CommonContentIndexerRangeTest, unittest.TestCase): | CommonContentIndexerRangeTest, unittest.TestCase): | ||||
"""Range Mimetype Indexer tests. | """Range Mimetype Indexer tests. | ||||
- new data within range are indexed | - new data within range are indexed | ||||
- no data outside a range are indexed | - no data outside a range are indexed | ||||
- with filtering existing indexed data prior to compute new index | - with filtering existing indexed data prior to compute new index | ||||
- without filtering existing indexed data prior to compute new index | - without filtering existing indexed data prior to compute new index | ||||
""" | """ | ||||
def setUp(self): | def setUp(self): | ||||
super().setUp() | |||||
self.indexer = MimetypeRangeIndexerTest() | self.indexer = MimetypeRangeIndexerTest() | ||||
# will play along with the objstorage's mocked contents for now | fill_storage(self.indexer.storage) | ||||
self.contents = sorted(self.indexer.objstorage) | fill_obj_storage(self.indexer.objstorage) | ||||
# FIXME: leverage swh.objstorage.in_memory_storage's | |||||
# InMemoryObjStorage, swh.storage.tests's gen_contents, and | |||||
# hypothesis to generate data to actually run indexer on those | |||||
self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' | self.id0 = '01c9379dfc33803963d07c1ccc748d3fe4c96bb5' | ||||
self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069' | self.id1 = '02fb2c89e14f7fab46701478c83779c7beb7b069' | ||||
self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6' | self.id2 = '103bc087db1d26afc3a0283f38663d081e9b01e6' | ||||
tool_id = self.indexer.tool['id'] | tool_id = self.indexer.tool['id'] | ||||
self.expected_results = { | self.expected_results = { | ||||
self.id0: { | self.id0: { | ||||
Show All 32 Lines |