self = <swh.indexer.tests.test_mimetype.TestMimetypePartitionIndexer testMethod=test__index_contents_with_indexed_data>
def test__index_contents_with_indexed_data(self):
"""Indexing contents with existing data results in less indexed data
"""
_start, _end = [self.contents[0], self.contents[2]] # output hex ids
start, end = map(hashutil.hash_to_bytes, (_start, _end))
data_indexed = [self.id0, self.id2]
# given
actual_results = self.indexer._index_contents(
start, end, indexed=set(map(hash_to_bytes, data_indexed))
)
# craft the expected results
expected_results = self.expected_results.copy()
for already_indexed_key in data_indexed:
expected_results.pop(already_indexed_key)
> self.assert_results_ok(start, end, actual_results, expected_results)
.tox/py3/lib/python3.7/site-packages/swh/indexer/tests/utils.py:732:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py3/lib/python3.7/site-packages/swh/indexer/tests/utils.py:692: in assert_results_ok
actual_results = list(actual_results)
.tox/py3/lib/python3.7/site-packages/swh/indexer/indexer.py:426: in _index_contents
for sha1 in self._list_contents_to_index(partition_id, nb_partitions, indexed):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.indexer.mimetype.MimetypePartitionIndexer object at 0x7ff7a4e0ee48>
partition_id = b'\x01\xc97\x9d\xfc3\x809c\xd0|\x1c\xcct\x8d?\xe4\xc9k\xb5'
nb_partitions = b'\x10;\xc0\x87\xdb\x1d&\xaf\xc3\xa0(?8f=\x08\x1e\x9b\x01\xe6'
indexed = {b'\x01\xc97\x9d\xfc3\x809c\xd0|\x1c\xcct\x8d?\xe4\xc9k\xb5', b'\x10;\xc0\x87\xdb\x1d&\xaf\xc3\xa0(?8f=\x08\x1e\x9b\x01\xe6'}
def _list_contents_to_index(
self, partition_id: int, nb_partitions: int, indexed: Set[Sha1]
) -> Iterator[Sha1]:
"""Compute from storage the new contents to index in the partition_id . The already
indexed contents are skipped.
Args:
partition_id: Index of the partition to fetch data from
nb_partitions: Total number of partition
indexed: Set of content already indexed.
Yields:
Sha1 id (bytes) of contents to index
"""
if not isinstance(partition_id, int) or not isinstance(nb_partitions, int):
raise TypeError(
> f"identifiers must be int, not {partition_id!r} and {nb_partitions!r}."
)
E TypeError: identifiers must be int, not b'\x01\xc97\x9d\xfc3\x809c\xd0|\x1c\xcct\x8d?\xe4\xc9k\xb5' and b'\x10;\xc0\x87\xdb\x1d&\xaf\xc3\xa0(?8f=\x08\x1e\x9b\x01\xe6'.
.tox/py3/lib/python3.7/site-packages/swh/indexer/indexer.py:395: TypeError
TEST RESULT
TEST RESULT
- Run At
- Aug 6 2020, 10:24 AM