Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import math | import math | ||||
import threading | import threading | ||||
from typing import Any, Dict, List, Tuple, Type | from typing import Any, Dict, List, Tuple, Type | ||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from swh.indexer.storage.exc import DuplicateId, IndexerStorageArgumentException | from swh.indexer.storage.exc import DuplicateId, IndexerStorageArgumentException | ||||
from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | from swh.indexer.storage.interface import IndexerStorageInterface, PagedResult | ||||
from swh.indexer.storage.model import ( | from swh.indexer.storage.model import ( | ||||
BaseRow, | BaseRow, | ||||
ContentCtagsRow, | |||||
ContentLanguageRow, | ContentLanguageRow, | ||||
ContentLicenseRow, | ContentLicenseRow, | ||||
ContentMetadataRow, | ContentMetadataRow, | ||||
ContentMimetypeRow, | ContentMimetypeRow, | ||||
DirectoryIntrinsicMetadataRow, | DirectoryIntrinsicMetadataRow, | ||||
OriginExtrinsicMetadataRow, | OriginExtrinsicMetadataRow, | ||||
OriginIntrinsicMetadataRow, | OriginIntrinsicMetadataRow, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 489 Lines • ▼ Show 20 Lines | example_data = [ | ||||
}, | }, | ||||
{ | { | ||||
"lang": "common-lisp", | "lang": "common-lisp", | ||||
}, | }, | ||||
] | ] | ||||
row_class = ContentLanguageRow | row_class = ContentLanguageRow | ||||
class TestIndexerStorageContentCTags(StorageETypeTester): | |||||
"""Test Indexer Storage content_ctags related methods""" | |||||
endpoint_type = "content_ctags" | |||||
tool_name = "universal-ctags" | |||||
example_data = [ | |||||
{ | |||||
"name": "done", | |||||
"kind": "variable", | |||||
"line": 119, | |||||
"lang": "OCaml", | |||||
}, | |||||
{ | |||||
"name": "done", | |||||
"kind": "variable", | |||||
"line": 100, | |||||
"lang": "Python", | |||||
}, | |||||
{ | |||||
"name": "main", | |||||
"kind": "function", | |||||
"line": 119, | |||||
"lang": "Python", | |||||
}, | |||||
] | |||||
row_class = ContentCtagsRow | |||||
# the following tests are disabled because CTAGS behaves differently | |||||
@pytest.mark.skip | |||||
def test_add__update_in_place_duplicate(self): | |||||
pass | |||||
@pytest.mark.skip | |||||
def test_add_deadlock(self): | |||||
pass | |||||
def test_content_ctags_search( | |||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | |||||
) -> None: | |||||
storage, data = swh_indexer_storage_with_data | |||||
# 1. given | |||||
tool = data.tools["universal-ctags"] | |||||
tool_id = tool["id"] | |||||
ctags1 = [ | |||||
ContentCtagsRow( | |||||
id=data.sha1_1, | |||||
indexer_configuration_id=tool_id, | |||||
**kwargs, # type: ignore | |||||
) | |||||
for kwargs in [ | |||||
{ | |||||
"name": "hello", | |||||
"kind": "function", | |||||
"line": 133, | |||||
"lang": "Python", | |||||
}, | |||||
{ | |||||
"name": "counter", | |||||
"kind": "variable", | |||||
"line": 119, | |||||
"lang": "Python", | |||||
}, | |||||
{ | |||||
"name": "hello", | |||||
"kind": "variable", | |||||
"line": 210, | |||||
"lang": "Python", | |||||
}, | |||||
] | |||||
] | |||||
ctags1_with_tool = [ | |||||
attr.evolve(ctag, indexer_configuration_id=None, tool=tool) | |||||
for ctag in ctags1 | |||||
] | |||||
ctags2 = [ | |||||
ContentCtagsRow( | |||||
id=data.sha1_2, | |||||
indexer_configuration_id=tool_id, | |||||
**kwargs, # type: ignore | |||||
) | |||||
for kwargs in [ | |||||
{ | |||||
"name": "hello", | |||||
"kind": "variable", | |||||
"line": 100, | |||||
"lang": "C", | |||||
}, | |||||
{ | |||||
"name": "result", | |||||
"kind": "variable", | |||||
"line": 120, | |||||
"lang": "C", | |||||
}, | |||||
] | |||||
] | |||||
ctags2_with_tool = [ | |||||
attr.evolve(ctag, indexer_configuration_id=None, tool=tool) | |||||
for ctag in ctags2 | |||||
] | |||||
storage.content_ctags_add(ctags1 + ctags2) | |||||
# 1. when | |||||
actual_ctags = list(storage.content_ctags_search("hello", limit=1)) | |||||
# 1. then | |||||
assert actual_ctags == [ctags1_with_tool[0]] | |||||
# 2. when | |||||
actual_ctags = list( | |||||
storage.content_ctags_search("hello", limit=1, last_sha1=data.sha1_1) | |||||
) | |||||
# 2. then | |||||
assert actual_ctags == [ctags2_with_tool[0]] | |||||
# 3. when | |||||
actual_ctags = list(storage.content_ctags_search("hello")) | |||||
# 3. then | |||||
assert actual_ctags == [ | |||||
ctags1_with_tool[0], | |||||
ctags1_with_tool[2], | |||||
ctags2_with_tool[0], | |||||
] | |||||
# 4. when | |||||
actual_ctags = list(storage.content_ctags_search("counter")) | |||||
# then | |||||
assert actual_ctags == [ctags1_with_tool[1]] | |||||
# 5. when | |||||
actual_ctags = list(storage.content_ctags_search("result", limit=1)) | |||||
# then | |||||
assert actual_ctags == [ctags2_with_tool[1]] | |||||
def test_content_ctags_search_no_result( | |||||
self, swh_indexer_storage: IndexerStorageInterface | |||||
) -> None: | |||||
storage = swh_indexer_storage | |||||
actual_ctags = list(storage.content_ctags_search("counter")) | |||||
assert not actual_ctags | |||||
def test_content_ctags_add__add_new_ctags_added( | |||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | |||||
) -> None: | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | |||||
tool = data.tools["universal-ctags"] | |||||
tool_id = tool["id"] | |||||
ctag1 = ContentCtagsRow( | |||||
id=data.sha1_2, | |||||
indexer_configuration_id=tool_id, | |||||
name="done", | |||||
kind="variable", | |||||
line=100, | |||||
lang="Scheme", | |||||
) | |||||
ctag1_with_tool = attr.evolve(ctag1, indexer_configuration_id=None, tool=tool) | |||||
# given | |||||
storage.content_ctags_add([ctag1]) | |||||
storage.content_ctags_add([ctag1]) # conflict does nothing | |||||
# when | |||||
actual_ctags = list(storage.content_ctags_get([data.sha1_2])) | |||||
# then | |||||
assert actual_ctags == [ctag1_with_tool] | |||||
# given | |||||
ctag2 = ContentCtagsRow( | |||||
id=data.sha1_2, | |||||
indexer_configuration_id=tool_id, | |||||
name="defn", | |||||
kind="function", | |||||
line=120, | |||||
lang="Scheme", | |||||
) | |||||
ctag2_with_tool = attr.evolve(ctag2, indexer_configuration_id=None, tool=tool) | |||||
storage.content_ctags_add([ctag2]) | |||||
actual_ctags = list(storage.content_ctags_get([data.sha1_2])) | |||||
assert actual_ctags == [ctag1_with_tool, ctag2_with_tool] | |||||
def test_content_ctags_add__update_in_place( | |||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | |||||
) -> None: | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | |||||
tool = data.tools["universal-ctags"] | |||||
tool_id = tool["id"] | |||||
ctag1 = ContentCtagsRow( | |||||
id=data.sha1_2, | |||||
indexer_configuration_id=tool_id, | |||||
name="done", | |||||
kind="variable", | |||||
line=100, | |||||
lang="Scheme", | |||||
) | |||||
ctag1_with_tool = attr.evolve(ctag1, indexer_configuration_id=None, tool=tool) | |||||
# given | |||||
storage.content_ctags_add([ctag1]) | |||||
# when | |||||
actual_ctags = list(storage.content_ctags_get([data.sha1_2])) | |||||
# then | |||||
assert actual_ctags == [ctag1_with_tool] | |||||
# given | |||||
ctag2 = ContentCtagsRow( | |||||
id=data.sha1_2, | |||||
indexer_configuration_id=tool_id, | |||||
name="defn", | |||||
kind="function", | |||||
line=120, | |||||
lang="Scheme", | |||||
) | |||||
ctag2_with_tool = attr.evolve(ctag2, indexer_configuration_id=None, tool=tool) | |||||
storage.content_ctags_add([ctag1, ctag2]) | |||||
actual_ctags = list(storage.content_ctags_get([data.sha1_2])) | |||||
assert actual_ctags == [ctag1_with_tool, ctag2_with_tool] | |||||
def test_add_empty( | |||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | |||||
) -> None: | |||||
(storage, data) = swh_indexer_storage_with_data | |||||
etype = self.endpoint_type | |||||
summary = endpoint(storage, etype, "add")([]) | |||||
assert summary == {"content_ctags:add": 0} | |||||
actual_ctags = list(endpoint(storage, etype, "get")([data.sha1_2])) | |||||
assert actual_ctags == [] | |||||
def test_get_unknown( | |||||
self, swh_indexer_storage_with_data: Tuple[IndexerStorageInterface, Any] | |||||
) -> None: | |||||
(storage, data) = swh_indexer_storage_with_data | |||||
etype = self.endpoint_type | |||||
actual_ctags = list(endpoint(storage, etype, "get")([data.sha1_2])) | |||||
assert actual_ctags == [] | |||||
class TestIndexerStorageContentMetadata(StorageETypeTester): | class TestIndexerStorageContentMetadata(StorageETypeTester): | ||||
"""Test Indexer Storage content_metadata related methods""" | """Test Indexer Storage content_metadata related methods""" | ||||
tool_name = "swh-metadata-detector" | tool_name = "swh-metadata-detector" | ||||
endpoint_type = "content_metadata" | endpoint_type = "content_metadata" | ||||
example_data = [ | example_data = [ | ||||
{ | { | ||||
"metadata": { | "metadata": { | ||||
▲ Show 20 Lines • Show All 1,296 Lines • Show Last 20 Lines |