Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
# Copyright (C) 2015-2018 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import os | |||||
import threading | import threading | ||||
import unittest | |||||
import pytest | import pytest | ||||
from hypothesis import given | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.indexer.storage import get_indexer_storage, MAPPING_NAMES | |||||
from swh.core.db.tests.db_testing import SingleDbTestFixture | |||||
from swh.indexer.tests.storage.generate_data_test import ( | |||||
gen_content_mimetypes, gen_content_fossology_licenses | |||||
) | |||||
from swh.indexer.tests.storage import SQL_DIR | |||||
from swh.indexer.metadata_dictionary import MAPPINGS | |||||
TOOLS = [ | |||||
{ | |||||
'tool_name': 'universal-ctags', | |||||
'tool_version': '~git7859817b', | |||||
'tool_configuration': { | |||||
"command_line": "ctags --fields=+lnz --sort=no --links=no " | |||||
"--output-format=json <filepath>"} | |||||
}, | |||||
{ | |||||
'tool_name': 'swh-metadata-translator', | |||||
'tool_version': '0.0.1', | |||||
'tool_configuration': {"type": "local", "context": "NpmMapping"}, | |||||
}, | |||||
{ | |||||
'tool_name': 'swh-metadata-detector', | |||||
'tool_version': '0.0.1', | |||||
'tool_configuration': { | |||||
"type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, | |||||
}, | |||||
{ | |||||
'tool_name': 'swh-metadata-detector2', | |||||
'tool_version': '0.0.1', | |||||
'tool_configuration': { | |||||
"type": "local", "context": ["NpmMapping", "CodemetaMapping"]}, | |||||
}, | |||||
{ | |||||
'tool_name': 'file', | |||||
'tool_version': '5.22', | |||||
'tool_configuration': {"command_line": "file --mime <filepath>"}, | |||||
}, | |||||
{ | |||||
'tool_name': 'pygments', | |||||
'tool_version': '2.0.1+dfsg-1.1+deb8u1', | |||||
'tool_configuration': { | |||||
"type": "library", "debian-package": "python3-pygments"}, | |||||
}, | |||||
{ | |||||
'tool_name': 'pygments', | |||||
'tool_version': '2.0.1+dfsg-1.1+deb8u1', | |||||
'tool_configuration': { | |||||
"type": "library", | |||||
"debian-package": "python3-pygments", | |||||
"max_content_size": 10240 | |||||
}, | |||||
}, | |||||
{ | |||||
'tool_name': 'nomos', | |||||
'tool_version': '3.1.0rc2-31-ga2cbb8c', | |||||
'tool_configuration': {"command_line": "nomossa <filepath>"}, | |||||
} | |||||
] | |||||
@pytest.mark.db | def prepare_mimetypes_from(fossology_licenses): | ||||
class BasePgTestStorage(SingleDbTestFixture): | """Fossology license needs some consistent data in db to run. | ||||
"""Base test class for most indexer tests. | |||||
It adds support for Storage testing to the SingleDbTestFixture class. | |||||
It will also build the database from the swh-indexed/sql/*.sql files. | |||||
""" | """ | ||||
mimetypes = [] | |||||
TEST_DB_NAME = 'softwareheritage-test-indexer' | for c in fossology_licenses: | ||||
TEST_DB_DUMP = os.path.join(SQL_DIR, '*.sql') | mimetypes.append({ | ||||
'id': c['id'], | |||||
def setUp(self): | 'mimetype': 'text/plain', | ||||
super().setUp() | 'encoding': 'utf-8', | ||||
self.storage_config = { | 'indexer_configuration_id': c['indexer_configuration_id'], | ||||
'cls': 'local', | }) | ||||
'args': { | return mimetypes | ||||
'db': 'dbname=%s' % self.TEST_DB_NAME, | |||||
}, | |||||
} | |||||
def tearDown(self): | |||||
self.reset_storage_tables() | |||||
self.storage = None | |||||
super().tearDown() | |||||
def reset_storage_tables(self): | |||||
excluded = {'indexer_configuration'} | |||||
self.reset_db_tables(self.TEST_DB_NAME, excluded=excluded) | |||||
db = self.test_db[self.TEST_DB_NAME] | |||||
db.conn.commit() | |||||
def gen_generic_endpoint_tests(endpoint_type, tool_name, | def endpoint(storage, endpoint_type, endpoint_name): | ||||
example_data1, example_data2): | return getattr(storage, endpoint_type + '_' + endpoint_name) | ||||
def rename(f): | |||||
f.__name__ = 'test_' + endpoint_type + f.__name__ | |||||
return f | |||||
def endpoint(self, endpoint_name): | |||||
return getattr(self.storage, endpoint_type + '_' + endpoint_name) | |||||
@rename | def check_missing(self, swh_indexer_storage_with_data): | ||||
def missing(self): | storage, data = swh_indexer_storage_with_data | ||||
# given | etype = self.endpoint_type | ||||
tool_id = self.tools[tool_name]['id'] | tool_id = data.tools[self.tool_name]['id'] | ||||
# given 2 (hopefully) unknown objects | |||||
query = [ | query = [ | ||||
{ | { | ||||
'id': self.sha1_1, | 'id': data.sha1_1, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
}, | }, | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
}] | }] | ||||
# when | # we expect these are both returned by the xxx_missing endpoint | ||||
actual_missing = endpoint(self, 'missing')(query) | actual_missing = endpoint(storage, etype, 'missing')(query) | ||||
assert list(actual_missing) == [ | |||||
# then | data.sha1_1, | ||||
self.assertEqual(list(actual_missing), [ | data.sha1_2, | ||||
self.sha1_1, | ] | ||||
self.sha1_2, | |||||
]) | |||||
# given | # now, when we add one of them | ||||
endpoint(self, 'add')([{ | endpoint(storage, etype, 'add')([{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
}]) | }]) | ||||
# when | # we expect only the other one returned | ||||
actual_missing = endpoint(self, 'missing')(query) | actual_missing = endpoint(storage, etype, 'missing')(query) | ||||
assert list(actual_missing) == [data.sha1_1] | |||||
# then | |||||
self.assertEqual(list(actual_missing), [self.sha1_1]) | |||||
@rename | def check_add__drop_duplicate(self, swh_indexer_storage_with_data): | ||||
def add__drop_duplicate(self): | storage, data = swh_indexer_storage_with_data | ||||
# given | etype = self.endpoint_type | ||||
tool_id = self.tools[tool_name]['id'] | tool_id = data.tools[self.tool_name]['id'] | ||||
# add the first object | |||||
data_v1 = { | data_v1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
endpoint(storage, etype, 'add')([data_v1]) | |||||
# given | # should be able to retrieve it | ||||
endpoint(self, 'add')([data_v1]) | actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2])) | ||||
# when | |||||
actual_data = list(endpoint(self, 'get')([self.sha1_2])) | |||||
# then | |||||
expected_data_v1 = [{ | expected_data_v1 = [{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'tool': self.tools[tool_name], | 'tool': data.tools[self.tool_name], | ||||
}] | }] | ||||
self.assertEqual(actual_data, expected_data_v1) | assert actual_data == expected_data_v1 | ||||
# given | # now if we add a modified version of the same object (same id) | ||||
data_v2 = data_v1.copy() | data_v2 = data_v1.copy() | ||||
data_v2.update(example_data2) | data_v2.update(self.example_data[1]) | ||||
endpoint(storage, etype, 'add')([data_v2]) | |||||
endpoint(self, 'add')([data_v2]) | # we excpect to retrieve the original data, not the modified one | ||||
actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2])) | |||||
assert actual_data == expected_data_v1 | |||||
actual_data = list(endpoint(self, 'get')([self.sha1_2])) | |||||
# data did not change as the v2 was dropped. | def check_add__update_in_place_duplicate(self, swh_indexer_storage_with_data): | ||||
self.assertEqual(actual_data, expected_data_v1) | storage, data = swh_indexer_storage_with_data | ||||
etype = self.endpoint_type | |||||
@rename | tool = data.tools[self.tool_name] | ||||
def add__update_in_place_duplicate(self): | |||||
# given | |||||
tool_id = self.tools[tool_name]['id'] | |||||
data_v1 = { | data_v1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
# given | # given | ||||
endpoint(self, 'add')([data_v1]) | endpoint(storage, etype, 'add')([data_v1]) | ||||
# when | # when | ||||
actual_data = list(endpoint(self, 'get')([self.sha1_2])) | actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2])) | ||||
expected_data_v1 = [{ | expected_data_v1 = [{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'tool': self.tools[tool_name], | 'tool': tool, | ||||
}] | }] | ||||
# then | # then | ||||
self.assertEqual(actual_data, expected_data_v1) | assert actual_data == expected_data_v1 | ||||
# given | # given | ||||
data_v2 = data_v1.copy() | data_v2 = data_v1.copy() | ||||
data_v2.update(example_data2) | data_v2.update(self.example_data[1]) | ||||
endpoint(self, 'add')([data_v2], conflict_update=True) | endpoint(storage, etype, 'add')([data_v2], conflict_update=True) | ||||
actual_data = list(endpoint(self, 'get')([self.sha1_2])) | actual_data = list(endpoint(storage, etype, 'get')([data.sha1_2])) | ||||
expected_data_v2 = [{ | expected_data_v2 = [{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data2, | **self.example_data[1], | ||||
'tool': self.tools[tool_name], | 'tool': tool, | ||||
}] | }] | ||||
# data did change as the v2 was used to overwrite v1 | # data did change as the v2 was used to overwrite v1 | ||||
self.assertEqual(actual_data, expected_data_v2) | assert actual_data == expected_data_v2 | ||||
@rename | |||||
def add__update_in_place_deadlock(self): | def check_add__update_in_place_deadlock(self, swh_indexer_storage_with_data): | ||||
# given | storage, data = swh_indexer_storage_with_data | ||||
tool_id = self.tools[tool_name]['id'] | etype = self.endpoint_type | ||||
tool = data.tools[self.tool_name] | |||||
hashes = [ | hashes = [ | ||||
hash_to_bytes( | hash_to_bytes( | ||||
'34973274ccef6ab4dfaaf86599792fa9c3fe4{:03d}'.format(i)) | '34973274ccef6ab4dfaaf86599792fa9c3fe4{:03d}'.format(i)) | ||||
for i in range(1000)] | for i in range(1000)] | ||||
data_v1 = [ | data_v1 = [ | ||||
{ | { | ||||
'id': hash_, | 'id': hash_, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
for hash_ in hashes | for hash_ in hashes | ||||
] | ] | ||||
data_v2 = [ | data_v2 = [ | ||||
{ | { | ||||
'id': hash_, | 'id': hash_, | ||||
**example_data2, | **self.example_data[1], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
for hash_ in hashes | for hash_ in hashes | ||||
] | ] | ||||
# Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
# all items to be in the DB. | # all items to be in the DB. | ||||
data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
# given | # given | ||||
endpoint(self, 'add')(data_v1) | endpoint(storage, etype, 'add')(data_v1) | ||||
# when | # when | ||||
actual_data = list(endpoint(self, 'get')(hashes)) | actual_data = list(endpoint(storage, etype, 'get')(hashes)) | ||||
expected_data_v1 = [ | expected_data_v1 = [ | ||||
{ | { | ||||
'id': hash_, | 'id': hash_, | ||||
**example_data1, | **self.example_data[0], | ||||
'tool': self.tools[tool_name], | 'tool': tool, | ||||
} | } | ||||
for hash_ in hashes | for hash_ in hashes | ||||
] | ] | ||||
# then | # then | ||||
self.assertEqual(actual_data, expected_data_v1) | assert actual_data == expected_data_v1 | ||||
# given | # given | ||||
def f1(): | def f1(): | ||||
endpoint(self, 'add')(data_v2a, conflict_update=True) | endpoint(storage, etype, 'add')(data_v2a, conflict_update=True) | ||||
def f2(): | def f2(): | ||||
endpoint(self, 'add')(data_v2b, conflict_update=True) | endpoint(storage, etype, 'add')(data_v2b, conflict_update=True) | ||||
t1 = threading.Thread(target=f1) | t1 = threading.Thread(target=f1) | ||||
t2 = threading.Thread(target=f2) | t2 = threading.Thread(target=f2) | ||||
t2.start() | t2.start() | ||||
t1.start() | t1.start() | ||||
t1.join() | t1.join() | ||||
t2.join() | t2.join() | ||||
actual_data = list(endpoint(self, 'get')(hashes)) | actual_data = sorted(endpoint(storage, etype, 'get')(hashes), | ||||
key=lambda x: x['id']) | |||||
expected_data_v2 = [ | expected_data_v2 = [ | ||||
{ | { | ||||
'id': hash_, | 'id': hash_, | ||||
**example_data2, | **self.example_data[1], | ||||
'tool': self.tools[tool_name], | 'tool': tool, | ||||
} | } | ||||
for hash_ in hashes | for hash_ in hashes | ||||
] | ] | ||||
self.assertCountEqual(actual_data, expected_data_v2) | assert actual_data == expected_data_v2 | ||||
def add__duplicate_twice(self): | |||||
# given | def check_add__duplicate_twice(self, swh_indexer_storage_with_data): | ||||
tool_id = self.tools[tool_name]['id'] | storage, data = swh_indexer_storage_with_data | ||||
etype = self.endpoint_type | |||||
tool = data.tools[self.tool_name] | |||||
data_rev1 = { | data_rev1 = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool['id'] | ||||
} | } | ||||
data_rev2 = { | data_rev2 = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
**example_data2, | **self.example_data[1], | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool['id'] | ||||
} | } | ||||
# when | # when | ||||
endpoint(self, 'add')([data_rev1]) | endpoint(storage, etype, 'add')([data_rev1]) | ||||
with self.assertRaises(ValueError): | with pytest.raises(ValueError): | ||||
endpoint(self, 'add')( | endpoint(storage, etype, 'add')( | ||||
[data_rev2, data_rev2], | [data_rev2, data_rev2], | ||||
conflict_update=True) | conflict_update=True) | ||||
# then | # then | ||||
actual_data = list(endpoint(self, 'get')( | actual_data = list(endpoint(storage, etype, 'get')( | ||||
[self.revision_id_2, self.revision_id_1])) | [data.revision_id_2, data.revision_id_1])) | ||||
expected_data = [{ | expected_data = [{ | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'tool': self.tools[tool_name] | 'tool': tool, | ||||
}] | }] | ||||
self.assertEqual(actual_data, expected_data) | assert actual_data == expected_data | ||||
@rename | |||||
def get(self): | |||||
# given | |||||
tool_id = self.tools[tool_name]['id'] | |||||
query = [self.sha1_2, self.sha1_1] | def check_get(self, swh_indexer_storage_with_data): | ||||
storage, data = swh_indexer_storage_with_data | |||||
etype = self.endpoint_type | |||||
tool = data.tools[self.tool_name] | |||||
query = [data.sha1_2, data.sha1_1] | |||||
data1 = { | data1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
# when | # when | ||||
endpoint(self, 'add')([data1]) | endpoint(storage, etype, 'add')([data1]) | ||||
# then | # then | ||||
actual_data = list(endpoint(self, 'get')(query)) | actual_data = list(endpoint(storage, etype, 'get')(query)) | ||||
# then | # then | ||||
expected_data = [{ | expected_data = [{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'tool': self.tools[tool_name] | 'tool': tool, | ||||
}] | }] | ||||
self.assertEqual(actual_data, expected_data) | assert actual_data == expected_data | ||||
@rename | |||||
def delete(self): | |||||
# given | |||||
tool_id = self.tools[tool_name]['id'] | |||||
query = [self.sha1_2, self.sha1_1] | def check_revision_intrinsic_metadata_delete( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
etype = self.endpoint_type | |||||
tool = data.tools[self.tool_name] | |||||
query = [data.sha1_2, data.sha1_1] | |||||
data1 = { | data1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
**example_data1, | **self.example_data[0], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
# when | # when | ||||
endpoint(self, 'add')([data1]) | endpoint(storage, etype, 'add')([data1]) | ||||
endpoint(self, 'delete')([ | endpoint(storage, etype, 'delete')([ | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
]) | ]) | ||||
# then | # then | ||||
actual_data = list(endpoint(self, 'get')(query)) | actual_data = list(endpoint(storage, etype, 'get')(query)) | ||||
# then | # then | ||||
self.assertEqual(actual_data, []) | assert not actual_data | ||||
@rename | def check_revision_intrinsic_metadata_delete_nonexisting( | ||||
def delete_nonexisting(self): | self, swh_indexer_storage_with_data): | ||||
tool_id = self.tools[tool_name]['id'] | storage, data = swh_indexer_storage_with_data | ||||
endpoint(self, 'delete')([ | etype = self.endpoint_type | ||||
tool = data.tools[self.tool_name] | |||||
endpoint(storage, etype, 'delete')([ | |||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool['id'], | ||||
} | } | ||||
]) | ]) | ||||
return ( | |||||
missing, | |||||
add__drop_duplicate, | |||||
add__update_in_place_duplicate, | |||||
add__update_in_place_deadlock, | |||||
add__duplicate_twice, | |||||
get, | |||||
delete, | |||||
delete_nonexisting, | |||||
) | |||||
class CommonTestStorage: | |||||
"""Base class for Indexer Storage testing. | |||||
class TestIndexerStorageContentMimetypes: | |||||
"""Test Indexer Storage content_mimetype related methods | |||||
""" | """ | ||||
def setUp(self, *args, **kwargs): | endpoint_type = 'content_mimetype' | ||||
super().setUp() | tool_name = 'file' | ||||
self.storage = get_indexer_storage(**self.storage_config) | example_data = [ | ||||
tools = self.storage.indexer_configuration_add(TOOLS) | { | ||||
self.tools = {} | |||||
for tool in tools: | |||||
tool_name = tool['tool_name'] | |||||
while tool_name in self.tools: | |||||
tool_name += '_' | |||||
self.tools[tool_name] = { | |||||
'id': tool['id'], | |||||
'name': tool['tool_name'], | |||||
'version': tool['tool_version'], | |||||
'configuration': tool['tool_configuration'], | |||||
} | |||||
self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') | |||||
self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7') | |||||
self.revision_id_1 = hash_to_bytes( | |||||
'7026b7c1a2af56521e951c01ed20f255fa054238') | |||||
self.revision_id_2 = hash_to_bytes( | |||||
'7026b7c1a2af56521e9587659012345678904321') | |||||
self.revision_id_3 = hash_to_bytes( | |||||
'7026b7c1a2af56521e9587659012345678904320') | |||||
self.origin_url_1 = 'file:///dev/0/zero' # 44434341 | |||||
self.origin_url_2 = 'file:///dev/1/one' # 44434342 | |||||
self.origin_url_3 = 'file:///dev/2/two' # 54974445 | |||||
def test_check_config(self): | |||||
self.assertTrue(self.storage.check_config(check_write=True)) | |||||
self.assertTrue(self.storage.check_config(check_write=False)) | |||||
# generate content_mimetype tests | |||||
( | |||||
test_content_mimetype_missing, | |||||
test_content_mimetype_add__drop_duplicate, | |||||
test_content_mimetype_add__update_in_place_duplicate, | |||||
test_content_mimetype_add__update_in_place_deadlock, | |||||
test_content_mimetype_add__duplicate_twice, | |||||
test_content_mimetype_get, | |||||
_, # content_mimetype_detete, | |||||
_, # content_mimetype_detete_nonexisting, | |||||
) = gen_generic_endpoint_tests( | |||||
endpoint_type='content_mimetype', | |||||
tool_name='file', | |||||
example_data1={ | |||||
'mimetype': 'text/plain', | 'mimetype': 'text/plain', | ||||
'encoding': 'utf-8', | 'encoding': 'utf-8', | ||||
}, | }, | ||||
example_data2={ | { | ||||
'mimetype': 'text/html', | 'mimetype': 'text/html', | ||||
'encoding': 'us-ascii', | 'encoding': 'us-ascii', | ||||
}, | }, | ||||
) | ] | ||||
# content_language tests | test_missing = check_missing | ||||
( | test_add__drop_duplicate = check_add__drop_duplicate | ||||
test_content_language_missing, | test_add__update_in_place_duplicate = check_add__update_in_place_duplicate | ||||
test_content_language_add__drop_duplicate, | test_add__update_in_place_deadlock = check_add__update_in_place_deadlock | ||||
test_content_language_add__update_in_place_duplicate, | test_add__duplicate_twice = check_add__duplicate_twice | ||||
test_content_language_add__update_in_place_deadlock, | test_get = check_get | ||||
test_content_language_add__duplicate_twice, | |||||
test_content_language_get, | |||||
_, # test_content_language_delete, | class TestIndexerStorageContentLanguage: | ||||
_, # test_content_language_delete_nonexisting, | """Test Indexer Storage content_language related methods | ||||
) = gen_generic_endpoint_tests( | """ | ||||
endpoint_type='content_language', | endpoint_type = 'content_language' | ||||
tool_name='pygments', | tool_name = 'pygments' | ||||
example_data1={ | example_data = [ | ||||
{ | |||||
'lang': 'haskell', | 'lang': 'haskell', | ||||
}, | }, | ||||
example_data2={ | { | ||||
'lang': 'common-lisp', | 'lang': 'common-lisp', | ||||
}, | }, | ||||
) | ] | ||||
# content_ctags tests | test_missing = check_missing | ||||
( | test_add__drop_duplicate = check_add__drop_duplicate | ||||
test_content_ctags_missing, | test_add__update_in_place_duplicate = check_add__update_in_place_duplicate | ||||
# the following tests are disabled because CTAGS behave differently | test_add__update_in_place_deadlock = check_add__update_in_place_deadlock | ||||
_, # test_content_ctags_add__drop_duplicate, | test_add__duplicate_twice = check_add__duplicate_twice | ||||
_, # test_content_ctags_add__update_in_place_duplicate, | test_get = check_get | ||||
_, # test_content_ctags_add__update_in_place_deadlock, | |||||
_, # test_content_ctags_add__duplicate_twice, | |||||
_, # test_content_ctags_get, | class TestIndexerStorageContentCTags: | ||||
_, # test_content_ctags_delete, | """Test Indexer Storage content_ctags related methods | ||||
_, # test_content_ctags_delete_nonexisting, | """ | ||||
) = gen_generic_endpoint_tests( | endpoint_type = 'content_ctags' | ||||
endpoint_type='content_ctags', | tool_name = 'universal-ctags' | ||||
tool_name='universal-ctags', | example_data = [ | ||||
example_data1={ | { | ||||
'ctags': [{ | 'ctags': [{ | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 119, | 'line': 119, | ||||
'lang': 'OCaml', | 'lang': 'OCaml', | ||||
}] | }] | ||||
}, | }, | ||||
example_data2={ | { | ||||
'ctags': [ | 'ctags': [ | ||||
{ | { | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
{ | { | ||||
'name': 'main', | 'name': 'main', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 119, | 'line': 119, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}] | }] | ||||
}, | }, | ||||
) | ] | ||||
def test_content_ctags_search(self): | test_missing = check_missing | ||||
class TestIndexerStorageContentMetadata: | |||||
"""Test Indexer Storage content_metadata related methods | |||||
""" | |||||
tool_name = 'swh-metadata-detector' | |||||
endpoint_type = 'content_metadata' | |||||
example_data = [ | |||||
{ | |||||
'metadata': { | |||||
'other': {}, | |||||
'codeRepository': { | |||||
'type': 'git', | |||||
'url': 'https://github.com/moranegg/metadata_test' | |||||
}, | |||||
'description': 'Simple package.json test for indexer', | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
}, | |||||
{ | |||||
'metadata': { | |||||
'other': {}, | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
}, | |||||
] | |||||
test_missing = check_missing | |||||
test_add__drop_duplicate = check_add__drop_duplicate | |||||
test_add__update_in_place_duplicate = check_add__update_in_place_duplicate | |||||
test_add__update_in_place_deadlock = check_add__update_in_place_deadlock | |||||
test_add__duplicate_twice = check_add__duplicate_twice | |||||
test_get = check_get | |||||
class TestIndexerStorageRevisionIntrinsicMetadata: | |||||
"""Test Indexer Storage revision_intrinsic_metadata related methods | |||||
""" | |||||
tool_name = 'swh-metadata-detector' | |||||
endpoint_type = 'revision_intrinsic_metadata' | |||||
example_data = [ | |||||
{ | |||||
'metadata': { | |||||
'other': {}, | |||||
'codeRepository': { | |||||
'type': 'git', | |||||
'url': 'https://github.com/moranegg/metadata_test' | |||||
}, | |||||
'description': 'Simple package.json test for indexer', | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
'mappings': ['mapping1'], | |||||
}, | |||||
{ | |||||
'metadata': { | |||||
'other': {}, | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
'mappings': ['mapping2'], | |||||
}, | |||||
] | |||||
test_missing = check_missing | |||||
test_add__drop_duplicate = check_add__drop_duplicate | |||||
test_add__update_in_place_duplicate = check_add__update_in_place_duplicate | |||||
test_add__update_in_place_deadlock = check_add__update_in_place_deadlock | |||||
test_add__duplicate_twice = check_add__duplicate_twice | |||||
test_get = check_get | |||||
test_revision_intrinsic_metadata_delete = \ | |||||
check_revision_intrinsic_metadata_delete | |||||
test_revision_intrinsic_metadata_delete_nonexisting = \ | |||||
check_revision_intrinsic_metadata_delete_nonexisting | |||||
class TestIndexerStorageOthers: | |||||
"""Non generic tests for the IndexerStorage. | |||||
""" | |||||
def test_check_config(self, swh_indexer_storage): | |||||
storage = swh_indexer_storage | |||||
assert storage.check_config(check_write=True) | |||||
assert storage.check_config(check_write=False) | |||||
def test_content_ctags_search(self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# 1. given | # 1. given | ||||
tool = self.tools['universal-ctags'] | tool = data.tools['universal-ctags'] | ||||
tool_id = tool['id'] | tool_id = tool['id'] | ||||
ctag1 = { | ctag1 = { | ||||
'id': self.sha1_1, | 'id': data.sha1_1, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'ctags': [ | 'ctags': [ | ||||
{ | { | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 133, | 'line': 133, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
{ | { | ||||
'name': 'counter', | 'name': 'counter', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 119, | 'line': 119, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
{ | { | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 210, | 'line': 210, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
] | ] | ||||
} | } | ||||
ctag2 = { | ctag2 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'ctags': [ | 'ctags': [ | ||||
{ | { | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'C', | 'lang': 'C', | ||||
}, | }, | ||||
{ | { | ||||
'name': 'result', | 'name': 'result', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'C', | 'lang': 'C', | ||||
}, | }, | ||||
] | ] | ||||
} | } | ||||
self.storage.content_ctags_add([ctag1, ctag2]) | storage.content_ctags_add([ctag1, ctag2]) | ||||
# 1. when | # 1. when | ||||
actual_ctags = list(self.storage.content_ctags_search('hello', | actual_ctags = list(storage.content_ctags_search('hello', limit=1)) | ||||
limit=1)) | |||||
# 1. then | # 1. then | ||||
self.assertEqual(actual_ctags, [ | assert actual_ctags == [ | ||||
{ | { | ||||
'id': ctag1['id'], | 'id': ctag1['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 133, | 'line': 133, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
} | } | ||||
]) | ] | ||||
# 2. when | # 2. when | ||||
actual_ctags = list(self.storage.content_ctags_search( | actual_ctags = list(storage.content_ctags_search( | ||||
'hello', | 'hello', | ||||
limit=1, | limit=1, | ||||
last_sha1=ctag1['id'])) | last_sha1=ctag1['id'])) | ||||
# 2. then | # 2. then | ||||
self.assertEqual(actual_ctags, [ | assert actual_ctags == [ | ||||
{ | { | ||||
'id': ctag2['id'], | 'id': ctag2['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'C', | 'lang': 'C', | ||||
} | } | ||||
]) | ] | ||||
# 3. when | # 3. when | ||||
actual_ctags = list(self.storage.content_ctags_search('hello')) | actual_ctags = list(storage.content_ctags_search('hello')) | ||||
# 3. then | # 3. then | ||||
self.assertEqual(actual_ctags, [ | assert actual_ctags == [ | ||||
{ | { | ||||
'id': ctag1['id'], | 'id': ctag1['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 133, | 'line': 133, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
{ | { | ||||
'id': ctag1['id'], | 'id': ctag1['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 210, | 'line': 210, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}, | }, | ||||
{ | { | ||||
'id': ctag2['id'], | 'id': ctag2['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'hello', | 'name': 'hello', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'C', | 'lang': 'C', | ||||
}, | }, | ||||
]) | ] | ||||
# 4. when | # 4. when | ||||
actual_ctags = list(self.storage.content_ctags_search('counter')) | actual_ctags = list(storage.content_ctags_search('counter')) | ||||
# then | # then | ||||
self.assertEqual(actual_ctags, [{ | assert actual_ctags == [{ | ||||
'id': ctag1['id'], | 'id': ctag1['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'counter', | 'name': 'counter', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 119, | 'line': 119, | ||||
'lang': 'Python', | 'lang': 'Python', | ||||
}]) | }] | ||||
# 5. when | # 5. when | ||||
actual_ctags = list(self.storage.content_ctags_search('result', | actual_ctags = list(storage.content_ctags_search('result', limit=1)) | ||||
limit=1)) | |||||
# then | # then | ||||
self.assertEqual(actual_ctags, [{ | assert actual_ctags == [{ | ||||
'id': ctag2['id'], | 'id': ctag2['id'], | ||||
'tool': tool, | 'tool': tool, | ||||
'name': 'result', | 'name': 'result', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'C', | 'lang': 'C', | ||||
}]) | }] | ||||
def test_content_ctags_search_no_result(self, swh_indexer_storage): | |||||
storage = swh_indexer_storage | |||||
actual_ctags = list(storage.content_ctags_search('counter')) | |||||
def test_content_ctags_search_no_result(self): | assert not actual_ctags | ||||
actual_ctags = list(self.storage.content_ctags_search('counter')) | |||||
self.assertEqual(actual_ctags, []) | def test_content_ctags_add__add_new_ctags_added( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
def test_content_ctags_add__add_new_ctags_added(self): | |||||
# given | # given | ||||
tool = self.tools['universal-ctags'] | tool = data.tools['universal-ctags'] | ||||
tool_id = tool['id'] | tool_id = tool['id'] | ||||
ctag_v1 = { | ctag_v1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'ctags': [{ | 'ctags': [{ | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
}] | }] | ||||
} | } | ||||
# given | # given | ||||
self.storage.content_ctags_add([ctag_v1]) | storage.content_ctags_add([ctag_v1]) | ||||
self.storage.content_ctags_add([ctag_v1]) # conflict does nothing | storage.content_ctags_add([ctag_v1]) # conflict does nothing | ||||
# when | # when | ||||
actual_ctags = list(self.storage.content_ctags_get( | actual_ctags = list(storage.content_ctags_get([data.sha1_2])) | ||||
[self.sha1_2])) | |||||
# then | # then | ||||
expected_ctags = [{ | expected_ctags = [{ | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool, | 'tool': tool, | ||||
}] | }] | ||||
self.assertEqual(actual_ctags, expected_ctags) | assert actual_ctags == expected_ctags | ||||
# given | # given | ||||
ctag_v2 = ctag_v1.copy() | ctag_v2 = ctag_v1.copy() | ||||
ctag_v2.update({ | ctag_v2.update({ | ||||
'ctags': [ | 'ctags': [ | ||||
{ | { | ||||
'name': 'defn', | 'name': 'defn', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
} | } | ||||
] | ] | ||||
}) | }) | ||||
self.storage.content_ctags_add([ctag_v2]) | storage.content_ctags_add([ctag_v2]) | ||||
expected_ctags = [ | expected_ctags = [ | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool, | 'tool': tool, | ||||
}, { | }, { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'defn', | 'name': 'defn', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool, | 'tool': tool, | ||||
} | } | ||||
] | ] | ||||
actual_ctags = list(self.storage.content_ctags_get( | actual_ctags = list(storage.content_ctags_get( | ||||
[self.sha1_2])) | [data.sha1_2])) | ||||
self.assertEqual(actual_ctags, expected_ctags) | assert actual_ctags == expected_ctags | ||||
def test_content_ctags_add__update_in_place(self): | def test_content_ctags_add__update_in_place( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool = self.tools['universal-ctags'] | tool = data.tools['universal-ctags'] | ||||
tool_id = tool['id'] | tool_id = tool['id'] | ||||
ctag_v1 = { | ctag_v1 = { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'ctags': [{ | 'ctags': [{ | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
}] | }] | ||||
} | } | ||||
# given | # given | ||||
self.storage.content_ctags_add([ctag_v1]) | storage.content_ctags_add([ctag_v1]) | ||||
# when | # when | ||||
actual_ctags = list(self.storage.content_ctags_get( | actual_ctags = list(storage.content_ctags_get( | ||||
[self.sha1_2])) | [data.sha1_2])) | ||||
# then | # then | ||||
expected_ctags = [ | expected_ctags = [ | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool | 'tool': tool | ||||
} | } | ||||
] | ] | ||||
self.assertEqual(actual_ctags, expected_ctags) | assert actual_ctags == expected_ctags | ||||
# given | # given | ||||
ctag_v2 = ctag_v1.copy() | ctag_v2 = ctag_v1.copy() | ||||
ctag_v2.update({ | ctag_v2.update({ | ||||
'ctags': [ | 'ctags': [ | ||||
{ | { | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
}, | }, | ||||
{ | { | ||||
'name': 'defn', | 'name': 'defn', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
} | } | ||||
] | ] | ||||
}) | }) | ||||
self.storage.content_ctags_add([ctag_v2], conflict_update=True) | storage.content_ctags_add([ctag_v2], conflict_update=True) | ||||
actual_ctags = list(self.storage.content_ctags_get( | actual_ctags = list(storage.content_ctags_get( | ||||
[self.sha1_2])) | [data.sha1_2])) | ||||
# ctag did change as the v2 was used to overwrite v1 | # ctag did change as the v2 was used to overwrite v1 | ||||
expected_ctags = [ | expected_ctags = [ | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'done', | 'name': 'done', | ||||
'kind': 'variable', | 'kind': 'variable', | ||||
'line': 100, | 'line': 100, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool, | 'tool': tool, | ||||
}, | }, | ||||
{ | { | ||||
'id': self.sha1_2, | 'id': data.sha1_2, | ||||
'name': 'defn', | 'name': 'defn', | ||||
'kind': 'function', | 'kind': 'function', | ||||
'line': 120, | 'line': 120, | ||||
'lang': 'Scheme', | 'lang': 'Scheme', | ||||
'tool': tool, | 'tool': tool, | ||||
} | } | ||||
] | ] | ||||
self.assertEqual(actual_ctags, expected_ctags) | assert actual_ctags == expected_ctags | ||||
# content_fossology_license tests | def test_content_fossology_license_add__new_license_added( | ||||
( | self, swh_indexer_storage_with_data): | ||||
_, # The endpoint content_fossology_license_missing does not exist | storage, data = swh_indexer_storage_with_data | ||||
# the following tests are disabled because fossology_license tests | |||||
# behave differently | |||||
_, # test_content_fossology_license_add__drop_duplicate, | |||||
_, # test_content_fossology_license_add__update_in_place_duplicate, | |||||
_, # test_content_fossology_license_add__update_in_place_deadlock, | |||||
_, # test_content_metadata_add__duplicate_twice, | |||||
_, # test_content_fossology_license_get, | |||||
_, # test_content_fossology_license_delete, | |||||
_, # test_content_fossology_license_delete_nonexisting, | |||||
) = gen_generic_endpoint_tests( | |||||
endpoint_type='content_fossology_license', | |||||
tool_name='nomos', | |||||
example_data1={ | |||||
'licenses': ['Apache-2.0'], | |||||
}, | |||||
example_data2={ | |||||
'licenses': ['BSD-2-Clause'], | |||||
}, | |||||
) | |||||
def test_content_fossology_license_add__new_license_added(self): | |||||
# given | # given | ||||
tool = self.tools['nomos'] | tool = data.tools['nomos'] | ||||
tool_id = tool['id'] | tool_id = tool['id'] | ||||
license_v1 = { | license_v1 = { | ||||
'id': self.sha1_1, | 'id': data.sha1_1, | ||||
'licenses': ['Apache-2.0'], | 'licenses': ['Apache-2.0'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
# given | # given | ||||
self.storage.content_fossology_license_add([license_v1]) | storage.content_fossology_license_add([license_v1]) | ||||
# conflict does nothing | # conflict does nothing | ||||
self.storage.content_fossology_license_add([license_v1]) | storage.content_fossology_license_add([license_v1]) | ||||
# when | # when | ||||
actual_licenses = list(self.storage.content_fossology_license_get( | actual_licenses = list(storage.content_fossology_license_get( | ||||
[self.sha1_1])) | [data.sha1_1])) | ||||
# then | # then | ||||
expected_license = { | expected_license = { | ||||
self.sha1_1: [{ | data.sha1_1: [{ | ||||
'licenses': ['Apache-2.0'], | 'licenses': ['Apache-2.0'], | ||||
'tool': tool, | 'tool': tool, | ||||
}] | }] | ||||
} | } | ||||
self.assertEqual(actual_licenses, [expected_license]) | assert actual_licenses == [expected_license] | ||||
# given | # given | ||||
license_v2 = license_v1.copy() | license_v2 = license_v1.copy() | ||||
license_v2.update({ | license_v2.update({ | ||||
'licenses': ['BSD-2-Clause'], | 'licenses': ['BSD-2-Clause'], | ||||
}) | }) | ||||
self.storage.content_fossology_license_add([license_v2]) | storage.content_fossology_license_add([license_v2]) | ||||
actual_licenses = list(self.storage.content_fossology_license_get( | actual_licenses = list(storage.content_fossology_license_get( | ||||
[self.sha1_1])) | [data.sha1_1])) | ||||
expected_license = { | expected_license = { | ||||
self.sha1_1: [{ | data.sha1_1: [{ | ||||
'licenses': ['Apache-2.0', 'BSD-2-Clause'], | 'licenses': ['Apache-2.0', 'BSD-2-Clause'], | ||||
'tool': tool | 'tool': tool | ||||
}] | }] | ||||
} | } | ||||
# license did not change as the v2 was dropped. | # license did not change as the v2 was dropped. | ||||
self.assertEqual(actual_licenses, [expected_license]) | assert actual_licenses == [expected_license] | ||||
# content_metadata tests | def test_origin_intrinsic_metadata_get( | ||||
( | self, swh_indexer_storage_with_data): | ||||
test_content_metadata_missing, | storage, data = swh_indexer_storage_with_data | ||||
test_content_metadata_add__drop_duplicate, | |||||
test_content_metadata_add__update_in_place_duplicate, | |||||
test_content_metadata_add__update_in_place_deadlock, | |||||
test_content_metadata_add__duplicate_twice, | |||||
test_content_metadata_get, | |||||
_, # test_content_metadata_delete, | |||||
_, # test_content_metadata_delete_nonexisting, | |||||
) = gen_generic_endpoint_tests( | |||||
endpoint_type='content_metadata', | |||||
tool_name='swh-metadata-detector', | |||||
example_data1={ | |||||
'metadata': { | |||||
'other': {}, | |||||
'codeRepository': { | |||||
'type': 'git', | |||||
'url': 'https://github.com/moranegg/metadata_test' | |||||
}, | |||||
'description': 'Simple package.json test for indexer', | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
}, | |||||
example_data2={ | |||||
'metadata': { | |||||
'other': {}, | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
}, | |||||
) | |||||
# revision_intrinsic_metadata tests | |||||
( | |||||
test_revision_intrinsic_metadata_missing, | |||||
test_revision_intrinsic_metadata_add__drop_duplicate, | |||||
test_revision_intrinsic_metadata_add__update_in_place_duplicate, | |||||
test_revision_intrinsic_metadata_add__update_in_place_deadlock, | |||||
test_revision_intrinsic_metadata_add__duplicate_twice, | |||||
test_revision_intrinsic_metadata_get, | |||||
test_revision_intrinsic_metadata_delete, | |||||
test_revision_intrinsic_metadata_delete_nonexisting, | |||||
) = gen_generic_endpoint_tests( | |||||
endpoint_type='revision_intrinsic_metadata', | |||||
tool_name='swh-metadata-detector', | |||||
example_data1={ | |||||
'metadata': { | |||||
'other': {}, | |||||
'codeRepository': { | |||||
'type': 'git', | |||||
'url': 'https://github.com/moranegg/metadata_test' | |||||
}, | |||||
'description': 'Simple package.json test for indexer', | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
'mappings': ['mapping1'], | |||||
}, | |||||
example_data2={ | |||||
'metadata': { | |||||
'other': {}, | |||||
'name': 'test_metadata', | |||||
'version': '0.0.1' | |||||
}, | |||||
'mappings': ['mapping2'], | |||||
}, | |||||
) | |||||
def test_origin_intrinsic_metadata_get(self): | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin]) | storage.origin_intrinsic_metadata_add([metadata_origin]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1, 'no://where'])) | [data.origin_url_1, 'no://where'])) | ||||
expected_metadata = [{ | expected_metadata = [{ | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata) | assert actual_metadata == expected_metadata | ||||
def test_origin_intrinsic_metadata_delete(self): | def test_origin_intrinsic_metadata_delete( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
metadata_origin2 = metadata_origin.copy() | metadata_origin2 = metadata_origin.copy() | ||||
metadata_origin2['id'] = self.origin_url_2 | metadata_origin2['id'] = data.origin_url_2 | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([ | storage.origin_intrinsic_metadata_add([ | ||||
metadata_origin, metadata_origin2]) | metadata_origin, metadata_origin2]) | ||||
self.storage.origin_intrinsic_metadata_delete([ | storage.origin_intrinsic_metadata_delete([ | ||||
{ | { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1, self.origin_url_2, 'no://where'])) | [data.origin_url_1, data.origin_url_2, 'no://where'])) | ||||
for item in actual_metadata: | for item in actual_metadata: | ||||
item['indexer_configuration_id'] = item.pop('tool')['id'] | item['indexer_configuration_id'] = item.pop('tool')['id'] | ||||
self.assertEqual(actual_metadata, [metadata_origin2]) | assert actual_metadata == [metadata_origin2] | ||||
def test_origin_intrinsic_metadata_delete_nonexisting(self): | def test_origin_intrinsic_metadata_delete_nonexisting( | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | self, swh_indexer_storage_with_data): | ||||
self.storage.origin_intrinsic_metadata_delete([ | storage, data = swh_indexer_storage_with_data | ||||
tool_id = data.tools['swh-metadata-detector']['id'] | |||||
storage.origin_intrinsic_metadata_delete([ | |||||
{ | { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
def test_origin_intrinsic_metadata_add_drop_duplicate(self): | def test_origin_intrinsic_metadata_add_drop_duplicate( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_1, | 'id': data.revision_id_1, | ||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1, 'no://where'])) | [data.origin_url_1, 'no://where'])) | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | assert actual_metadata == expected_metadata_v1 | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2['metadata'] = metadata_v2 | metadata_origin_v2['metadata'] = metadata_v2 | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1])) | [data.origin_url_1])) | ||||
# metadata did not change as the v2 was dropped. | # metadata did not change as the v2 was dropped. | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | assert actual_metadata == expected_metadata_v1 | ||||
def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): | def test_origin_intrinsic_metadata_add_update_in_place_duplicate( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1])) | [data.origin_url_1])) | ||||
# then | # then | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | assert actual_metadata == expected_metadata_v1 | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_update_duplicated_metadata', | 'name': 'test_update_duplicated_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2 = { | metadata_origin_v2 = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v2.copy(), | 'metadata': metadata_v2.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
} | } | ||||
self.storage.revision_intrinsic_metadata_add( | storage.revision_intrinsic_metadata_add( | ||||
[metadata_rev_v2], conflict_update=True) | [metadata_rev_v2], conflict_update=True) | ||||
self.storage.origin_intrinsic_metadata_add( | storage.origin_intrinsic_metadata_add( | ||||
[metadata_origin_v2], conflict_update=True) | [metadata_origin_v2], conflict_update=True) | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(storage.origin_intrinsic_metadata_get( | ||||
[self.origin_url_1])) | [data.origin_url_1])) | ||||
expected_metadata_v2 = [{ | expected_metadata_v2 = [{ | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata_v2, | 'metadata': metadata_v2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
}] | }] | ||||
# metadata did change as the v2 was used to overwrite v1 | # metadata did change as the v2 was used to overwrite v1 | ||||
self.assertEqual(actual_metadata, expected_metadata_v2) | assert actual_metadata == expected_metadata_v2 | ||||
def test_origin_intrinsic_metadata_add__update_in_place_deadlock(self): | def test_origin_intrinsic_metadata_add__update_in_place_deadlock( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
ids = list(range(10)) | ids = list(range(10)) | ||||
example_data1 = { | example_data1 = { | ||||
'metadata': { | 'metadata': { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
} | } | ||||
example_data2 = { | example_data2 = { | ||||
'metadata': { | 'metadata': { | ||||
'version': 'v1.1.1', | 'version': 'v1.1.1', | ||||
'name': 'foo', | 'name': 'foo', | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': { | 'metadata': { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
data_v1 = [ | data_v1 = [ | ||||
{ | { | ||||
'id': 'file:///tmp/origin%d' % id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
data_v2 = [ | data_v2 = [ | ||||
{ | { | ||||
'id': 'file:///tmp/origin%d' % id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
# all items to be in the DB. | # all items to be in the DB. | ||||
data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add(data_v1) | storage.origin_intrinsic_metadata_add(data_v1) | ||||
# when | # when | ||||
origins = ['file:///tmp/origin%d' % i for i in ids] | origins = ['file:///tmp/origin%d' % i for i in ids] | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
expected_data_v1 = [ | expected_data_v1 = [ | ||||
{ | { | ||||
'id': 'file:///tmp/origin%d' % id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# then | # then | ||||
self.assertEqual(actual_data, expected_data_v1) | assert actual_data == expected_data_v1 | ||||
# given | # given | ||||
def f1(): | def f1(): | ||||
self.storage.origin_intrinsic_metadata_add( | storage.origin_intrinsic_metadata_add( | ||||
data_v2a, conflict_update=True) | data_v2a, conflict_update=True) | ||||
def f2(): | def f2(): | ||||
self.storage.origin_intrinsic_metadata_add( | storage.origin_intrinsic_metadata_add( | ||||
data_v2b, conflict_update=True) | data_v2b, conflict_update=True) | ||||
t1 = threading.Thread(target=f1) | t1 = threading.Thread(target=f1) | ||||
t2 = threading.Thread(target=f2) | t2 = threading.Thread(target=f2) | ||||
t2.start() | t2.start() | ||||
t1.start() | t1.start() | ||||
t1.join() | t1.join() | ||||
t2.join() | t2.join() | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(origins)) | actual_data = list(storage.origin_intrinsic_metadata_get(origins)) | ||||
expected_data_v2 = [ | expected_data_v2 = [ | ||||
{ | { | ||||
'id': 'file:///tmp/origin%d' % id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': data.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
self.maxDiff = None | assert len(actual_data) == len(expected_data_v2) | ||||
self.assertCountEqual(actual_data, expected_data_v2) | assert sorted(actual_data, key=lambda x: x['id']) == expected_data_v2 | ||||
def test_origin_intrinsic_metadata_add__duplicate_twice(self): | def test_origin_intrinsic_metadata_add__duplicate_twice( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'developmentStatus': None, | 'developmentStatus': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
with self.assertRaises(ValueError): | with pytest.raises(ValueError): | ||||
self.storage.origin_intrinsic_metadata_add([ | storage.origin_intrinsic_metadata_add([ | ||||
metadata_origin, metadata_origin]) | metadata_origin, metadata_origin]) | ||||
def test_origin_intrinsic_metadata_search_fulltext(self): | def test_origin_intrinsic_metadata_search_fulltext( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
metadata1 = { | metadata1 = { | ||||
'author': 'John Doe', | 'author': 'John Doe', | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': data.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'origin': self.origin_url_1, | |||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_url_2, | 'id': data.origin_url_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertCountEqual( | assert set([res['id'] for res in search(['Doe'])]) \ | ||||
[res['id'] for res in search(['Doe'])], | == set([data.origin_url_1, data.origin_url_2]) | ||||
[self.origin_url_1, self.origin_url_2]) | assert [res['id'] for res in search(['John', 'Doe'])] \ | ||||
self.assertEqual( | == [data.origin_url_1] | ||||
[res['id'] for res in search(['John', 'Doe'])], | assert [res['id'] for res in search(['John'])] \ | ||||
[self.origin_url_1]) | == [data.origin_url_1] | ||||
self.assertEqual( | assert not list(search(['John', 'Jane'])) | ||||
[res['id'] for res in search(['John'])], | |||||
[self.origin_url_1]) | def test_origin_intrinsic_metadata_search_fulltext_rank( | ||||
self.assertEqual( | self, swh_indexer_storage_with_data): | ||||
[res['id'] for res in search(['John', 'Jane'])], | storage, data = swh_indexer_storage_with_data | ||||
[]) | |||||
def test_origin_intrinsic_metadata_search_fulltext_rank(self): | |||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = data.tools['swh-metadata-detector']['id'] | ||||
# The following authors have "Random Person" to add some more content | # The following authors have "Random Person" to add some more content | ||||
# to the JSON data, to work around normalization quirks when there | # to the JSON data, to work around normalization quirks when there | ||||
# are few words (rank/(1+ln(nb_words)) is very sensitive to nb_words | # are few words (rank/(1+ln(nb_words)) is very sensitive to nb_words | ||||
# for small values of nb_words). | # for small values of nb_words). | ||||
metadata1 = { | metadata1 = { | ||||
'author': [ | 'author': [ | ||||
'Random Person', | 'Random Person', | ||||
'John Doe', | 'John Doe', | ||||
'Jane Doe', | 'Jane Doe', | ||||
] | ] | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': data.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': [ | 'author': [ | ||||
'Random Person', | 'Random Person', | ||||
'Jane Doe', | 'Jane Doe', | ||||
] | ] | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_url_2, | 'id': data.origin_url_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertEqual( | assert [res['id'] for res in search(['Doe'])] \ | ||||
[res['id'] for res in search(['Doe'])], | == [data.origin_url_1, data.origin_url_2] | ||||
[self.origin_url_1, self.origin_url_2]) | assert [res['id'] for res in search(['Doe'], limit=1)] \ | ||||
self.assertEqual( | == [data.origin_url_1] | ||||
[res['id'] for res in search(['Doe'], limit=1)], | assert [res['id'] for res in search(['John'])] \ | ||||
[self.origin_url_1]) | == [data.origin_url_1] | ||||
self.assertEqual( | assert [res['id'] for res in search(['Jane'])] \ | ||||
[res['id'] for res in search(['John'])], | == [data.origin_url_2, data.origin_url_1] | ||||
[self.origin_url_1]) | assert [res['id'] for res in search(['John', 'Jane'])] \ | ||||
self.assertEqual( | == [data.origin_url_1] | ||||
[res['id'] for res in search(['Jane'])], | |||||
[self.origin_url_2, self.origin_url_1]) | def _fill_origin_intrinsic_metadata( | ||||
self.assertEqual( | self, swh_indexer_storage_with_data): | ||||
[res['id'] for res in search(['John', 'Jane'])], | storage, data = swh_indexer_storage_with_data | ||||
[self.origin_url_1]) | tool1_id = data.tools['swh-metadata-detector']['id'] | ||||
tool2_id = data.tools['swh-metadata-detector2']['id'] | |||||
def _fill_origin_intrinsic_metadata(self): | |||||
tool1_id = self.tools['swh-metadata-detector']['id'] | |||||
tool2_id = self.tools['swh-metadata-detector2']['id'] | |||||
metadata1 = { | metadata1 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'John Doe', | 'author': 'John Doe', | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': data.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_url_1, | 'id': data.origin_url_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': data.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': data.revision_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_url_2, | 'id': data.origin_url_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
} | } | ||||
metadata3 = { | metadata3 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
} | } | ||||
metadata3_rev = { | metadata3_rev = { | ||||
'id': self.revision_id_3, | 'id': data.revision_id_3, | ||||
'metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata3_origin = { | metadata3_origin = { | ||||
'id': self.origin_url_3, | 'id': data.origin_url_3, | ||||
'metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['pkg-info'], | 'mappings': ['pkg-info'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_3, | 'from_revision': data.revision_id_3, | ||||
} | } | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata3_rev]) | storage.revision_intrinsic_metadata_add([metadata3_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata3_origin]) | storage.origin_intrinsic_metadata_add([metadata3_origin]) | ||||
def test_origin_intrinsic_metadata_search_by_producer(self): | def test_origin_intrinsic_metadata_search_by_producer( | ||||
self._fill_origin_intrinsic_metadata() | self, swh_indexer_storage_with_data): | ||||
tool1 = self.tools['swh-metadata-detector'] | storage, data = swh_indexer_storage_with_data | ||||
tool2 = self.tools['swh-metadata-detector2'] | self._fill_origin_intrinsic_metadata( | ||||
endpoint = self.storage.origin_intrinsic_metadata_search_by_producer | swh_indexer_storage_with_data) | ||||
tool1 = data.tools['swh-metadata-detector'] | |||||
tool2 = data.tools['swh-metadata-detector2'] | |||||
endpoint = storage.origin_intrinsic_metadata_search_by_producer | |||||
# test pagination | # test pagination | ||||
# no 'page_token' param, return all origins | # no 'page_token' param, return all origins | ||||
self.assertCountEqual( | assert endpoint(ids_only=True)['origins'] \ | ||||
endpoint(ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2, data.origin_url_3] | ||||
[self.origin_url_1, self.origin_url_2, self.origin_url_3]) | |||||
# 'page_token' is < than origin_1, return everything | # 'page_token' is < than origin_1, return everything | ||||
self.assertCountEqual( | assert endpoint( | ||||
endpoint( | page_token=data.origin_url_1[:-1], ids_only=True)['origins'] \ | ||||
page_token=self.origin_url_1[:-1], ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2, data.origin_url_3] | ||||
[self.origin_url_1, self.origin_url_2, self.origin_url_3]) | |||||
# 'page_token' is origin_3, return nothing | # 'page_token' is origin_3, return nothing | ||||
self.assertCountEqual( | assert not endpoint( | ||||
endpoint(page_token=self.origin_url_3, ids_only=True)['origins'], | page_token=data.origin_url_3, ids_only=True)['origins'] | ||||
[]) | |||||
# test limit argument | # test limit argument | ||||
self.assertCountEqual( | assert endpoint(page_token=data.origin_url_1[:-1], | ||||
endpoint(page_token=self.origin_url_1[:-1], | limit=2, ids_only=True)['origins'] \ | ||||
limit=2, ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2] | ||||
[self.origin_url_1, self.origin_url_2]) | assert endpoint(page_token=data.origin_url_1, | ||||
self.assertCountEqual( | limit=2, ids_only=True)['origins'] \ | ||||
endpoint(page_token=self.origin_url_1, | == [data.origin_url_2, data.origin_url_3] | ||||
limit=2, ids_only=True)['origins'], | assert endpoint(page_token=data.origin_url_2, | ||||
[self.origin_url_2, self.origin_url_3]) | limit=2, ids_only=True)['origins'] \ | ||||
self.assertCountEqual( | == [data.origin_url_3] | ||||
endpoint(page_token=self.origin_url_2, | |||||
limit=2, ids_only=True)['origins'], | |||||
[self.origin_url_3]) | |||||
# test mappings filtering | # test mappings filtering | ||||
self.assertCountEqual( | assert endpoint(mappings=['npm'], ids_only=True)['origins'] \ | ||||
endpoint(mappings=['npm'], ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2] | ||||
[self.origin_url_1, self.origin_url_2]) | assert endpoint(mappings=['npm', 'gemspec'], | ||||
self.assertCountEqual( | ids_only=True)['origins'] \ | ||||
endpoint(mappings=['npm', 'gemspec'], ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2] | ||||
[self.origin_url_1, self.origin_url_2]) | assert endpoint(mappings=['gemspec'], ids_only=True)['origins'] \ | ||||
self.assertCountEqual( | == [data.origin_url_2] | ||||
endpoint(mappings=['gemspec'], ids_only=True)['origins'], | assert endpoint(mappings=['pkg-info'], ids_only=True)['origins'] \ | ||||
[self.origin_url_2]) | == [data.origin_url_3] | ||||
self.assertCountEqual( | assert not endpoint(mappings=['foobar'], ids_only=True)['origins'] | ||||
endpoint(mappings=['pkg-info'], ids_only=True)['origins'], | |||||
[self.origin_url_3]) | |||||
self.assertCountEqual( | |||||
endpoint(mappings=['foobar'], ids_only=True)['origins'], | |||||
[]) | |||||
# test pagination + mappings | # test pagination + mappings | ||||
self.assertCountEqual( | assert endpoint(mappings=['npm'], limit=1, ids_only=True)['origins'] \ | ||||
endpoint(mappings=['npm'], limit=1, ids_only=True)['origins'], | == [data.origin_url_1] | ||||
[self.origin_url_1]) | |||||
# test tool filtering | # test tool filtering | ||||
self.assertCountEqual( | assert endpoint( | ||||
endpoint(tool_ids=[tool1['id']], ids_only=True)['origins'], | tool_ids=[tool1['id']], ids_only=True)['origins'] \ | ||||
[self.origin_url_1]) | == [data.origin_url_1] | ||||
self.assertCountEqual( | assert sorted(endpoint( | ||||
endpoint(tool_ids=[tool2['id']], ids_only=True)['origins'], | tool_ids=[tool2['id']], ids_only=True)['origins']) \ | ||||
[self.origin_url_2, self.origin_url_3]) | == [data.origin_url_2, data.origin_url_3] | ||||
self.assertCountEqual( | assert sorted(endpoint( | ||||
endpoint(tool_ids=[tool1['id'], tool2['id']], | tool_ids=[tool1['id'], tool2['id']], ids_only=True)['origins']) \ | ||||
ids_only=True)['origins'], | == [data.origin_url_1, data.origin_url_2, data.origin_url_3] | ||||
[self.origin_url_1, self.origin_url_2, self.origin_url_3]) | |||||
# test ids_only=False | # test ids_only=False | ||||
self.assertEqual(endpoint(mappings=['gemspec'])['origins'], [{ | assert endpoint(mappings=['gemspec'])['origins'] \ | ||||
'id': self.origin_url_2, | == [{ | ||||
'id': data.origin_url_2, | |||||
'metadata': { | 'metadata': { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
}, | }, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'tool': tool2, | 'tool': tool2, | ||||
'from_revision': self.revision_id_2, | 'from_revision': data.revision_id_2, | ||||
}]) | }] | ||||
def test_origin_intrinsic_metadata_stats(self): | def test_origin_intrinsic_metadata_stats( | ||||
self._fill_origin_intrinsic_metadata() | self, swh_indexer_storage_with_data): | ||||
storage, data = swh_indexer_storage_with_data | |||||
self._fill_origin_intrinsic_metadata( | |||||
swh_indexer_storage_with_data) | |||||
result = self.storage.origin_intrinsic_metadata_stats() | result = storage.origin_intrinsic_metadata_stats() | ||||
self.assertEqual(result, { | assert result == { | ||||
'per_mapping': { | 'per_mapping': { | ||||
'gemspec': 1, | 'gemspec': 1, | ||||
'npm': 2, | 'npm': 2, | ||||
'pkg-info': 1, | 'pkg-info': 1, | ||||
'codemeta': 0, | 'codemeta': 0, | ||||
'maven': 0, | 'maven': 0, | ||||
}, | }, | ||||
'total': 3, | 'total': 3, | ||||
'non_empty': 2, | 'non_empty': 2, | ||||
}) | } | ||||
def test_indexer_configuration_add(self): | def test_indexer_configuration_add( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'some-unknown-tool', | 'tool_name': 'some-unknown-tool', | ||||
'tool_version': 'some-version', | 'tool_version': 'some-version', | ||||
'tool_configuration': {"debian-package": "some-package"}, | 'tool_configuration': {"debian-package": "some-package"}, | ||||
} | } | ||||
actual_tool = self.storage.indexer_configuration_get(tool) | actual_tool = storage.indexer_configuration_get(tool) | ||||
self.assertIsNone(actual_tool) # does not exist | assert actual_tool is None # does not exist | ||||
# add it | # add it | ||||
actual_tools = list(self.storage.indexer_configuration_add([tool])) | actual_tools = list(storage.indexer_configuration_add([tool])) | ||||
self.assertEqual(len(actual_tools), 1) | assert len(actual_tools) == 1 | ||||
actual_tool = actual_tools[0] | actual_tool = actual_tools[0] | ||||
self.assertIsNotNone(actual_tool) # now it exists | assert actual_tool is not None # now it exists | ||||
new_id = actual_tool.pop('id') | new_id = actual_tool.pop('id') | ||||
self.assertEqual(actual_tool, tool) | assert actual_tool == tool | ||||
actual_tools2 = list(self.storage.indexer_configuration_add([tool])) | actual_tools2 = list(storage.indexer_configuration_add([tool])) | ||||
actual_tool2 = actual_tools2[0] | actual_tool2 = actual_tools2[0] | ||||
self.assertIsNotNone(actual_tool2) # now it exists | assert actual_tool2 is not None # now it exists | ||||
new_id2 = actual_tool2.pop('id') | new_id2 = actual_tool2.pop('id') | ||||
self.assertEqual(new_id, new_id2) | assert new_id == new_id2 | ||||
self.assertEqual(actual_tool, actual_tool2) | assert actual_tool == actual_tool2 | ||||
def test_indexer_configuration_add_multiple(self): | def test_indexer_configuration_add_multiple( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'some-unknown-tool', | 'tool_name': 'some-unknown-tool', | ||||
'tool_version': 'some-version', | 'tool_version': 'some-version', | ||||
'tool_configuration': {"debian-package": "some-package"}, | 'tool_configuration': {"debian-package": "some-package"}, | ||||
} | } | ||||
actual_tools = list(self.storage.indexer_configuration_add([tool])) | actual_tools = list(storage.indexer_configuration_add([tool])) | ||||
self.assertEqual(len(actual_tools), 1) | assert len(actual_tools) == 1 | ||||
new_tools = [tool, { | new_tools = [tool, { | ||||
'tool_name': 'yet-another-tool', | 'tool_name': 'yet-another-tool', | ||||
'tool_version': 'version', | 'tool_version': 'version', | ||||
'tool_configuration': {}, | 'tool_configuration': {}, | ||||
}] | }] | ||||
actual_tools = list(self.storage.indexer_configuration_add(new_tools)) | actual_tools = list(storage.indexer_configuration_add(new_tools)) | ||||
self.assertEqual(len(actual_tools), 2) | assert len(actual_tools) == 2 | ||||
# order not guaranteed, so we iterate over results to check | # order not guaranteed, so we iterate over results to check | ||||
for tool in actual_tools: | for tool in actual_tools: | ||||
_id = tool.pop('id') | _id = tool.pop('id') | ||||
self.assertIsNotNone(_id) | assert _id is not None | ||||
self.assertIn(tool, new_tools) | assert tool in new_tools | ||||
def test_indexer_configuration_get_missing(self): | def test_indexer_configuration_get_missing( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'unknown-tool', | 'tool_name': 'unknown-tool', | ||||
'tool_version': '3.1.0rc2-31-ga2cbb8c', | 'tool_version': '3.1.0rc2-31-ga2cbb8c', | ||||
'tool_configuration': {"command_line": "nomossa <filepath>"}, | 'tool_configuration': {"command_line": "nomossa <filepath>"}, | ||||
} | } | ||||
actual_tool = self.storage.indexer_configuration_get(tool) | actual_tool = storage.indexer_configuration_get(tool) | ||||
self.assertIsNone(actual_tool) | assert actual_tool is None | ||||
def test_indexer_configuration_get(self): | def test_indexer_configuration_get( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'nomos', | 'tool_name': 'nomos', | ||||
'tool_version': '3.1.0rc2-31-ga2cbb8c', | 'tool_version': '3.1.0rc2-31-ga2cbb8c', | ||||
'tool_configuration': {"command_line": "nomossa <filepath>"}, | 'tool_configuration': {"command_line": "nomossa <filepath>"}, | ||||
} | } | ||||
self.storage.indexer_configuration_add([tool]) | actual_tool = storage.indexer_configuration_get(tool) | ||||
actual_tool = self.storage.indexer_configuration_get(tool) | assert actual_tool | ||||
expected_tool = tool.copy() | expected_tool = tool.copy() | ||||
del actual_tool['id'] | del actual_tool['id'] | ||||
self.assertEqual(expected_tool, actual_tool) | assert expected_tool == actual_tool | ||||
def test_indexer_configuration_metadata_get_missing_context(self): | def test_indexer_configuration_metadata_get_missing_context( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'swh-metadata-translator', | 'tool_name': 'swh-metadata-translator', | ||||
'tool_version': '0.0.1', | 'tool_version': '0.0.1', | ||||
'tool_configuration': {"context": "unknown-context"}, | 'tool_configuration': {"context": "unknown-context"}, | ||||
} | } | ||||
actual_tool = self.storage.indexer_configuration_get(tool) | actual_tool = storage.indexer_configuration_get(tool) | ||||
self.assertIsNone(actual_tool) | assert actual_tool is None | ||||
def test_indexer_configuration_metadata_get(self): | def test_indexer_configuration_metadata_get( | ||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
tool = { | tool = { | ||||
'tool_name': 'swh-metadata-translator', | 'tool_name': 'swh-metadata-translator', | ||||
'tool_version': '0.0.1', | 'tool_version': '0.0.1', | ||||
'tool_configuration': {"type": "local", "context": "NpmMapping"}, | 'tool_configuration': {"type": "local", "context": "NpmMapping"}, | ||||
} | } | ||||
self.storage.indexer_configuration_add([tool]) | storage.indexer_configuration_add([tool]) | ||||
actual_tool = self.storage.indexer_configuration_get(tool) | actual_tool = storage.indexer_configuration_get(tool) | ||||
assert actual_tool | |||||
expected_tool = tool.copy() | expected_tool = tool.copy() | ||||
expected_tool['id'] = actual_tool['id'] | expected_tool['id'] = actual_tool['id'] | ||||
self.assertEqual(expected_tool, actual_tool) | assert expected_tool == actual_tool | ||||
@pytest.mark.property_based | def test_generate_content_mimetype_get_range_limit_none( | ||||
def test_generate_content_mimetype_get_range_limit_none(self): | self, swh_indexer_storage): | ||||
storage = swh_indexer_storage | |||||
"""mimetype_get_range call with wrong limit input should fail""" | """mimetype_get_range call with wrong limit input should fail""" | ||||
with self.assertRaises(ValueError) as e: | with pytest.raises(ValueError) as e: | ||||
self.storage.content_mimetype_get_range( | storage.content_mimetype_get_range( | ||||
start=None, end=None, indexer_configuration_id=None, | start=None, end=None, indexer_configuration_id=None, | ||||
limit=None) | limit=None) | ||||
self.assertEqual(e.exception.args, ( | assert e.value.args == ( | ||||
'Development error: limit should not be None',)) | 'Development error: limit should not be None',) | ||||
@pytest.mark.property_based | def test_generate_content_mimetype_get_range_no_limit( | ||||
@given(gen_content_mimetypes(min_size=1, max_size=4)) | self, swh_indexer_storage_with_data): | ||||
def test_generate_content_mimetype_get_range_no_limit(self, mimetypes): | |||||
"""mimetype_get_range returns mimetypes within range provided""" | """mimetype_get_range returns mimetypes within range provided""" | ||||
self.reset_storage_tables() | storage, data = swh_indexer_storage_with_data | ||||
# add mimetypes to storage | mimetypes = data.mimetypes | ||||
self.storage.content_mimetype_add(mimetypes) | |||||
# All ids from the db | # All ids from the db | ||||
content_ids = sorted([c['id'] for c in mimetypes]) | content_ids = sorted([c['id'] for c in mimetypes]) | ||||
start = content_ids[0] | start = content_ids[0] | ||||
end = content_ids[-1] | end = content_ids[-1] | ||||
# retrieve mimetypes | # retrieve mimetypes | ||||
tool_id = mimetypes[0]['indexer_configuration_id'] | tool_id = mimetypes[0]['indexer_configuration_id'] | ||||
actual_result = self.storage.content_mimetype_get_range( | actual_result = storage.content_mimetype_get_range( | ||||
start, end, indexer_configuration_id=tool_id) | start, end, indexer_configuration_id=tool_id) | ||||
actual_ids = actual_result['ids'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | actual_next = actual_result['next'] | ||||
self.assertEqual(len(mimetypes), len(actual_ids)) | assert len(mimetypes) == len(actual_ids) | ||||
self.assertIsNone(actual_next) | assert actual_next is None | ||||
self.assertEqual(content_ids, actual_ids) | assert content_ids == actual_ids | ||||
@pytest.mark.property_based | |||||
@given(gen_content_mimetypes(min_size=4, max_size=4)) | |||||
def test_generate_content_mimetype_get_range_limit(self, mimetypes): | |||||
"""mimetype_get_range paginates results if limit exceeded""" | |||||
self.reset_storage_tables() | |||||
# add mimetypes to storage | def test_generate_content_mimetype_get_range_limit( | ||||
self.storage.content_mimetype_add(mimetypes) | self, swh_indexer_storage_with_data): | ||||
"""mimetype_get_range paginates results if limit exceeded""" | |||||
storage, data = swh_indexer_storage_with_data | |||||
# input the list of sha1s we want from storage | # input the list of sha1s we want from storage | ||||
content_ids = sorted([c['id'] for c in mimetypes]) | content_ids = sorted( | ||||
[c['id'] for c in data.mimetypes]) | |||||
mimetypes = list(storage.content_mimetype_get(content_ids)) | |||||
assert len(mimetypes) == len(data.mimetypes) | |||||
start = content_ids[0] | start = content_ids[0] | ||||
end = content_ids[-1] | end = content_ids[-1] | ||||
# retrieve mimetypes limited to 10 results | |||||
# retrieve mimetypes limited to 3 results | actual_result = storage.content_mimetype_get_range( | ||||
limited_results = len(mimetypes) - 1 | |||||
tool_id = mimetypes[0]['indexer_configuration_id'] | |||||
actual_result = self.storage.content_mimetype_get_range( | |||||
start, end, | start, end, | ||||
indexer_configuration_id=tool_id, limit=limited_results) | indexer_configuration_id=1, | ||||
limit=10) | |||||
assert actual_result | |||||
assert set(actual_result.keys()) == {'ids', 'next'} | |||||
actual_ids = actual_result['ids'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | actual_next = actual_result['next'] | ||||
self.assertEqual(limited_results, len(actual_ids)) | assert len(actual_ids) == 10 | ||||
self.assertIsNotNone(actual_next) | assert actual_next is not None | ||||
self.assertEqual(actual_next, content_ids[-1]) | assert actual_next == content_ids[10] | ||||
expected_mimetypes = content_ids[:-1] | expected_mimetypes = content_ids[:10] | ||||
self.assertEqual(expected_mimetypes, actual_ids) | assert expected_mimetypes == actual_ids | ||||
# retrieve next part | # retrieve next part | ||||
actual_results2 = self.storage.content_mimetype_get_range( | actual_result = storage.content_mimetype_get_range( | ||||
start=end, end=end, indexer_configuration_id=tool_id) | start=end, end=end, indexer_configuration_id=1) | ||||
actual_ids2 = actual_results2['ids'] | assert set(actual_result.keys()) == {'ids', 'next'} | ||||
actual_next2 = actual_results2['next'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | |||||
self.assertIsNone(actual_next2) | |||||
expected_mimetypes2 = [content_ids[-1]] | |||||
self.assertEqual(expected_mimetypes2, actual_ids2) | |||||
@pytest.mark.property_based | assert actual_next is None | ||||
def test_generate_content_fossology_license_get_range_limit_none(self): | expected_mimetypes = [content_ids[-1]] | ||||
assert expected_mimetypes == actual_ids | |||||
def test_generate_content_fossology_license_get_range_limit_none( | |||||
self, swh_indexer_storage_with_data): | |||||
storage, data = swh_indexer_storage_with_data | |||||
"""license_get_range call with wrong limit input should fail""" | """license_get_range call with wrong limit input should fail""" | ||||
with self.assertRaises(ValueError) as e: | with pytest.raises(ValueError) as e: | ||||
self.storage.content_fossology_license_get_range( | storage.content_fossology_license_get_range( | ||||
start=None, end=None, indexer_configuration_id=None, | start=None, end=None, indexer_configuration_id=None, | ||||
limit=None) | limit=None) | ||||
self.assertEqual(e.exception.args, ( | assert e.value.args == ( | ||||
'Development error: limit should not be None',)) | 'Development error: limit should not be None',) | ||||
@pytest.mark.property_based | |||||
def prepare_mimetypes_from(self, fossology_licenses): | |||||
"""Fossology license needs some consistent data in db to run. | |||||
""" | |||||
mimetypes = [] | |||||
for c in fossology_licenses: | |||||
mimetypes.append({ | |||||
'id': c['id'], | |||||
'mimetype': 'text/plain', | |||||
'encoding': 'utf-8', | |||||
'indexer_configuration_id': c['indexer_configuration_id'], | |||||
}) | |||||
return mimetypes | |||||
@pytest.mark.property_based | |||||
@given(gen_content_fossology_licenses(min_size=1, max_size=4)) | |||||
def test_generate_content_fossology_license_get_range_no_limit( | def test_generate_content_fossology_license_get_range_no_limit( | ||||
self, fossology_licenses): | self, swh_indexer_storage_with_data): | ||||
"""license_get_range returns licenses within range provided""" | """license_get_range returns licenses within range provided""" | ||||
self.reset_storage_tables() | storage, data = swh_indexer_storage_with_data | ||||
# craft some consistent mimetypes | # craft some consistent mimetypes | ||||
mimetypes = self.prepare_mimetypes_from(fossology_licenses) | fossology_licenses = data.fossology_licenses | ||||
mimetypes = prepare_mimetypes_from(fossology_licenses) | |||||
self.storage.content_mimetype_add(mimetypes) | storage.content_mimetype_add(mimetypes, conflict_update=True) | ||||
# add fossology_licenses to storage | # add fossology_licenses to storage | ||||
self.storage.content_fossology_license_add(fossology_licenses) | storage.content_fossology_license_add(fossology_licenses) | ||||
# All ids from the db | # All ids from the db | ||||
content_ids = sorted([c['id'] for c in fossology_licenses]) | content_ids = sorted([c['id'] for c in fossology_licenses]) | ||||
start = content_ids[0] | start = content_ids[0] | ||||
end = content_ids[-1] | end = content_ids[-1] | ||||
# retrieve fossology_licenses | # retrieve fossology_licenses | ||||
tool_id = fossology_licenses[0]['indexer_configuration_id'] | tool_id = fossology_licenses[0]['indexer_configuration_id'] | ||||
actual_result = self.storage.content_fossology_license_get_range( | actual_result = storage.content_fossology_license_get_range( | ||||
start, end, indexer_configuration_id=tool_id) | start, end, indexer_configuration_id=tool_id) | ||||
actual_ids = actual_result['ids'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | actual_next = actual_result['next'] | ||||
self.assertEqual(len(fossology_licenses), len(actual_ids)) | assert len(fossology_licenses) == len(actual_ids) | ||||
self.assertIsNone(actual_next) | assert actual_next is None | ||||
self.assertEqual(content_ids, actual_ids) | assert content_ids == actual_ids | ||||
@pytest.mark.property_based | |||||
@given(gen_content_fossology_licenses(min_size=1, max_size=4), | |||||
gen_content_mimetypes(min_size=1, max_size=1)) | |||||
def test_generate_content_fossology_license_get_range_no_limit_with_filter( | def test_generate_content_fossology_license_get_range_no_limit_with_filter( | ||||
self, fossology_licenses, mimetypes): | self, swh_indexer_storage_with_data): | ||||
"""This filters non textual, then returns results within range""" | """This filters non textual, then returns results within range""" | ||||
self.reset_storage_tables() | storage, data = swh_indexer_storage_with_data | ||||
fossology_licenses = data.fossology_licenses | |||||
mimetypes = data.mimetypes | |||||
# craft some consistent mimetypes | # craft some consistent mimetypes | ||||
_mimetypes = self.prepare_mimetypes_from(fossology_licenses) | _mimetypes = prepare_mimetypes_from(fossology_licenses) | ||||
# add binary mimetypes which will get filtered out in results | # add binary mimetypes which will get filtered out in results | ||||
for m in mimetypes: | for m in mimetypes: | ||||
_mimetypes.append({ | _mimetypes.append({ | ||||
'mimetype': 'binary', | 'mimetype': 'binary', | ||||
**m, | **m, | ||||
}) | }) | ||||
self.storage.content_mimetype_add(_mimetypes) | storage.content_mimetype_add(_mimetypes, conflict_update=True) | ||||
# add fossology_licenses to storage | # add fossology_licenses to storage | ||||
self.storage.content_fossology_license_add(fossology_licenses) | storage.content_fossology_license_add(fossology_licenses) | ||||
# All ids from the db | # All ids from the db | ||||
content_ids = sorted([c['id'] for c in fossology_licenses]) | content_ids = sorted([c['id'] for c in fossology_licenses]) | ||||
start = content_ids[0] | start = content_ids[0] | ||||
end = content_ids[-1] | end = content_ids[-1] | ||||
# retrieve fossology_licenses | # retrieve fossology_licenses | ||||
tool_id = fossology_licenses[0]['indexer_configuration_id'] | tool_id = fossology_licenses[0]['indexer_configuration_id'] | ||||
actual_result = self.storage.content_fossology_license_get_range( | actual_result = storage.content_fossology_license_get_range( | ||||
start, end, indexer_configuration_id=tool_id) | start, end, indexer_configuration_id=tool_id) | ||||
actual_ids = actual_result['ids'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | actual_next = actual_result['next'] | ||||
self.assertEqual(len(fossology_licenses), len(actual_ids)) | assert len(fossology_licenses) == len(actual_ids) | ||||
self.assertIsNone(actual_next) | assert actual_next is None | ||||
self.assertEqual(content_ids, actual_ids) | assert content_ids == actual_ids | ||||
@pytest.mark.property_based | |||||
@given(gen_content_fossology_licenses(min_size=4, max_size=4)) | |||||
def test_generate_fossology_license_get_range_limit( | def test_generate_fossology_license_get_range_limit( | ||||
self, fossology_licenses): | self, swh_indexer_storage_with_data): | ||||
"""fossology_license_get_range paginates results if limit exceeded""" | """fossology_license_get_range paginates results if limit exceeded""" | ||||
self.reset_storage_tables() | storage, data = swh_indexer_storage_with_data | ||||
fossology_licenses = data.fossology_licenses | |||||
# craft some consistent mimetypes | # craft some consistent mimetypes | ||||
mimetypes = self.prepare_mimetypes_from(fossology_licenses) | mimetypes = prepare_mimetypes_from(fossology_licenses) | ||||
# add fossology_licenses to storage | # add fossology_licenses to storage | ||||
self.storage.content_mimetype_add(mimetypes) | storage.content_mimetype_add(mimetypes, conflict_update=True) | ||||
self.storage.content_fossology_license_add(fossology_licenses) | storage.content_fossology_license_add(fossology_licenses) | ||||
# input the list of sha1s we want from storage | # input the list of sha1s we want from storage | ||||
content_ids = sorted([c['id'] for c in fossology_licenses]) | content_ids = sorted([c['id'] for c in fossology_licenses]) | ||||
start = content_ids[0] | start = content_ids[0] | ||||
end = content_ids[-1] | end = content_ids[-1] | ||||
# retrieve fossology_licenses limited to 3 results | # retrieve fossology_licenses limited to 3 results | ||||
limited_results = len(fossology_licenses) - 1 | limited_results = len(fossology_licenses) - 1 | ||||
tool_id = fossology_licenses[0]['indexer_configuration_id'] | tool_id = fossology_licenses[0]['indexer_configuration_id'] | ||||
actual_result = self.storage.content_fossology_license_get_range( | actual_result = storage.content_fossology_license_get_range( | ||||
start, end, | start, end, | ||||
indexer_configuration_id=tool_id, limit=limited_results) | indexer_configuration_id=tool_id, limit=limited_results) | ||||
actual_ids = actual_result['ids'] | actual_ids = actual_result['ids'] | ||||
actual_next = actual_result['next'] | actual_next = actual_result['next'] | ||||
self.assertEqual(limited_results, len(actual_ids)) | assert limited_results == len(actual_ids) | ||||
self.assertIsNotNone(actual_next) | assert actual_next is not None | ||||
self.assertEqual(actual_next, content_ids[-1]) | assert actual_next == content_ids[-1] | ||||
expected_fossology_licenses = content_ids[:-1] | expected_fossology_licenses = content_ids[:-1] | ||||
self.assertEqual(expected_fossology_licenses, actual_ids) | assert expected_fossology_licenses == actual_ids | ||||
# retrieve next part | # retrieve next part | ||||
actual_results2 = self.storage.content_fossology_license_get_range( | actual_results2 = storage.content_fossology_license_get_range( | ||||
start=end, end=end, indexer_configuration_id=tool_id) | start=end, end=end, indexer_configuration_id=tool_id) | ||||
actual_ids2 = actual_results2['ids'] | actual_ids2 = actual_results2['ids'] | ||||
actual_next2 = actual_results2['next'] | actual_next2 = actual_results2['next'] | ||||
self.assertIsNone(actual_next2) | assert actual_next2 is None | ||||
expected_fossology_licenses2 = [content_ids[-1]] | expected_fossology_licenses2 = [content_ids[-1]] | ||||
self.assertEqual(expected_fossology_licenses2, actual_ids2) | assert expected_fossology_licenses2 == actual_ids2 | ||||
@pytest.mark.db | |||||
class IndexerTestStorage(CommonTestStorage, BasePgTestStorage, | |||||
unittest.TestCase): | |||||
"""Running the tests locally. | |||||
For the client api tests (remote storage), see | |||||
`class`:swh.indexer.storage.test_api_client:TestRemoteStorage | |||||
class. | |||||
""" | |||||
pass | |||||
def test_mapping_names(): | |||||
assert set(MAPPING_NAMES) == {m.name for m in MAPPINGS.values()} |