Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_ctags.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import unittest | import unittest | ||||
from unittest.mock import patch | from unittest.mock import patch | ||||
import pytest | import pytest | ||||
import swh.indexer.ctags | import swh.indexer.ctags | ||||
from swh.indexer.ctags import CtagsIndexer, run_ctags | from swh.indexer.ctags import CtagsIndexer, run_ctags | ||||
from swh.indexer.storage.model import ContentCtagsRow | from swh.indexer.storage.model import ContentCtagsRow | ||||
from swh.indexer.tests.utils import ( | from swh.indexer.tests.utils import ( | ||||
BASE_TEST_CONFIG, | BASE_TEST_CONFIG, | ||||
OBJ_STORAGE_DATA, | OBJ_STORAGE_DATA, | ||||
RAW_CONTENT_IDS, | |||||
SHA1_TO_CTAGS, | SHA1_TO_CTAGS, | ||||
CommonContentIndexerTest, | CommonContentIndexerTest, | ||||
fill_obj_storage, | fill_obj_storage, | ||||
fill_storage, | fill_storage, | ||||
filter_dict, | filter_dict, | ||||
) | ) | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def setUp(self): | ||||
super().setUp() | super().setUp() | ||||
self.indexer = CtagsIndexer(config=CONFIG) | self.indexer = CtagsIndexer(config=CONFIG) | ||||
self.indexer.catch_exceptions = False | self.indexer.catch_exceptions = False | ||||
self.idx_storage = self.indexer.idx_storage | self.idx_storage = self.indexer.idx_storage | ||||
fill_storage(self.indexer.storage) | fill_storage(self.indexer.storage) | ||||
fill_obj_storage(self.indexer.objstorage) | fill_obj_storage(self.indexer.objstorage) | ||||
# Prepare test input | # Prepare test input | ||||
self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5" | self.id0, self.id1, self.id2 = RAW_CONTENT_IDS | ||||
self.id1 = "d4c647f0fc257591cc9ba1722484229780d1c607" | |||||
self.id2 = "688a5ef812c53907562fe379d4b3851e69c7cb15" | |||||
tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()} | tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()} | ||||
self.expected_results = [ | self.expected_results = [ | ||||
*[ | *[ | ||||
ContentCtagsRow( | ContentCtagsRow( | ||||
id=hash_to_bytes(self.id0), | id=self.id0, | ||||
tool=tool, | tool=tool, | ||||
**kwargs, | **kwargs, | ||||
) | ) | ||||
for kwargs in SHA1_TO_CTAGS[self.id0] | for kwargs in SHA1_TO_CTAGS[self.id0] | ||||
], | ], | ||||
*[ | *[ | ||||
ContentCtagsRow( | ContentCtagsRow( | ||||
id=hash_to_bytes(self.id1), | id=self.id1, | ||||
tool=tool, | tool=tool, | ||||
**kwargs, | **kwargs, | ||||
) | ) | ||||
for kwargs in SHA1_TO_CTAGS[self.id1] | for kwargs in SHA1_TO_CTAGS[self.id1] | ||||
], | ], | ||||
*[ | *[ | ||||
ContentCtagsRow( | ContentCtagsRow( | ||||
id=hash_to_bytes(self.id2), | id=self.id2, | ||||
tool=tool, | tool=tool, | ||||
**kwargs, | **kwargs, | ||||
) | ) | ||||
for kwargs in SHA1_TO_CTAGS[self.id2] | for kwargs in SHA1_TO_CTAGS[self.id2] | ||||
], | ], | ||||
] | ] | ||||
self._set_mocks() | self._set_mocks() | ||||
def _set_mocks(self): | def _set_mocks(self): | ||||
def find_ctags_for_content(raw_content): | def find_ctags_for_content(raw_content): | ||||
for (sha1, ctags) in SHA1_TO_CTAGS.items(): | for (sha1, ctags) in SHA1_TO_CTAGS.items(): | ||||
if OBJ_STORAGE_DATA[sha1] == raw_content: | if OBJ_STORAGE_DATA[hash_to_bytes(sha1)] == raw_content: | ||||
return ctags | return ctags | ||||
else: | else: | ||||
raise ValueError( | raise ValueError( | ||||
("%r not found in objstorage, can't mock its ctags.") % raw_content | ("%r not found in objstorage, can't mock its ctags.") % raw_content | ||||
) | ) | ||||
def fake_language(raw_content, *args, **kwargs): | def fake_language(raw_content, *args, **kwargs): | ||||
ctags = find_ctags_for_content(raw_content) | ctags = find_ctags_for_content(raw_content) | ||||
return {"lang": ctags[0]["lang"]} | return {"lang": ctags[0]["lang"]} | ||||
self._real_compute_language = swh.indexer.ctags.compute_language | self._real_compute_language = swh.indexer.ctags.compute_language | ||||
swh.indexer.ctags.compute_language = fake_language | swh.indexer.ctags.compute_language = fake_language | ||||
def fake_check_output(cmd, *args, **kwargs): | def fake_check_output(cmd, *args, **kwargs): | ||||
id_ = cmd[-1].split("/")[-1] | id_ = cmd[-1].split("/")[-1] | ||||
return "\n".join( | return "\n".join( | ||||
json.dumps({"language": ctag["lang"], **ctag}) | json.dumps({"language": ctag["lang"], **ctag}) | ||||
for ctag in SHA1_TO_CTAGS[id_] | for ctag in SHA1_TO_CTAGS[hash_to_bytes(id_)] | ||||
) | ) | ||||
self._real_check_output = swh.indexer.ctags.subprocess.check_output | self._real_check_output = swh.indexer.ctags.subprocess.check_output | ||||
swh.indexer.ctags.subprocess.check_output = fake_check_output | swh.indexer.ctags.subprocess.check_output = fake_check_output | ||||
def tearDown(self): | def tearDown(self): | ||||
swh.indexer.ctags.compute_language = self._real_compute_language | swh.indexer.ctags.compute_language = self._real_compute_language | ||||
swh.indexer.ctags.subprocess.check_output = self._real_check_output | swh.indexer.ctags.subprocess.check_output = self._real_check_output | ||||
super().tearDown() | super().tearDown() | ||||
def test_ctags_w_no_tool(): | def test_ctags_w_no_tool(): | ||||
with pytest.raises(ValueError): | with pytest.raises(ValueError): | ||||
CtagsIndexer(config=filter_dict(CONFIG, "tools")) | CtagsIndexer(config=filter_dict(CONFIG, "tools")) |