Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
Show First 20 Lines • Show All 444 Lines • ▼ Show 20 Lines | def setUp(self, *args, **kwargs): | ||||
self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') | self.sha1_1 = hash_to_bytes('34973274ccef6ab4dfaaf86599792fa9c3fe4689') | ||||
self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7') | self.sha1_2 = hash_to_bytes('61c2b3a30496d329e21af70dd2d7e097046d07b7') | ||||
self.revision_id_1 = hash_to_bytes( | self.revision_id_1 = hash_to_bytes( | ||||
'7026b7c1a2af56521e951c01ed20f255fa054238') | '7026b7c1a2af56521e951c01ed20f255fa054238') | ||||
self.revision_id_2 = hash_to_bytes( | self.revision_id_2 = hash_to_bytes( | ||||
'7026b7c1a2af56521e9587659012345678904321') | '7026b7c1a2af56521e9587659012345678904321') | ||||
self.revision_id_3 = hash_to_bytes( | self.revision_id_3 = hash_to_bytes( | ||||
'7026b7c1a2af56521e9587659012345678904320') | '7026b7c1a2af56521e9587659012345678904320') | ||||
self.origin_id_1 = 44434341 | self.origin_url_1 = 'file:///dev/0/zero' # 44434341 | ||||
self.origin_id_2 = 44434342 | self.origin_url_2 = 'file:///dev/1/one' # 44434342 | ||||
self.origin_id_3 = 54974445 | self.origin_url_3 = 'file:///dev/2/two' # 54974445 | ||||
vlorentz: `self.origin_url_` | |||||
def test_check_config(self): | def test_check_config(self): | ||||
self.assertTrue(self.storage.check_config(check_write=True)) | self.assertTrue(self.storage.check_config(check_write=True)) | ||||
self.assertTrue(self.storage.check_config(check_write=False)) | self.assertTrue(self.storage.check_config(check_write=False)) | ||||
# generate content_mimetype tests | # generate content_mimetype tests | ||||
( | ( | ||||
test_content_mimetype_missing, | test_content_mimetype_missing, | ||||
test_content_mimetype_add__drop_duplicate, | test_content_mimetype_add__drop_duplicate, | ||||
▲ Show 20 Lines • Show All 532 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_get(self): | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin]) | self.storage.origin_intrinsic_metadata_add([metadata_origin]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, 42])) | [self.origin_url_1, 'no://where'])) | ||||
expected_metadata = [{ | expected_metadata = [{ | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata, | 'metadata': metadata, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata) | self.assertEqual(actual_metadata, expected_metadata) | ||||
def test_origin_intrinsic_metadata_delete(self): | def test_origin_intrinsic_metadata_delete(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
metadata_origin2 = metadata_origin.copy() | metadata_origin2 = metadata_origin.copy() | ||||
metadata_origin2['id'] = self.origin_id_2 | metadata_origin2['id'] = self.origin_url_2 | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([ | self.storage.origin_intrinsic_metadata_add([ | ||||
metadata_origin, metadata_origin2]) | metadata_origin, metadata_origin2]) | ||||
self.storage.origin_intrinsic_metadata_delete([ | self.storage.origin_intrinsic_metadata_delete([ | ||||
{ | { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, self.origin_id_2, 42])) | [self.origin_url_1, self.origin_url_2, 'no://where'])) | ||||
for item in actual_metadata: | for item in actual_metadata: | ||||
item['indexer_configuration_id'] = item.pop('tool')['id'] | item['indexer_configuration_id'] = item.pop('tool')['id'] | ||||
self.assertEqual(actual_metadata, [metadata_origin2]) | self.assertEqual(actual_metadata, [metadata_origin2]) | ||||
def test_origin_intrinsic_metadata_delete_nonexisting(self): | def test_origin_intrinsic_metadata_delete_nonexisting(self): | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
self.storage.origin_intrinsic_metadata_delete([ | self.storage.origin_intrinsic_metadata_delete([ | ||||
{ | { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
def test_origin_intrinsic_metadata_add_drop_duplicate(self): | def test_origin_intrinsic_metadata_add_drop_duplicate(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, 42])) | [self.origin_url_1, 'no://where'])) | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2['metadata'] = metadata_v2 | metadata_origin_v2['metadata'] = metadata_v2 | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_url_1])) | ||||
# metadata did not change as the v2 was dropped. | # metadata did not change as the v2 was dropped. | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): | def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_url_1])) | ||||
# then | # then | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_update_duplicated_metadata', | 'name': 'test_update_duplicated_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2 = { | metadata_origin_v2 = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/null', | |||||
'metadata': metadata_v2.copy(), | 'metadata': metadata_v2.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
self.storage.revision_intrinsic_metadata_add( | self.storage.revision_intrinsic_metadata_add( | ||||
[metadata_rev_v2], conflict_update=True) | [metadata_rev_v2], conflict_update=True) | ||||
self.storage.origin_intrinsic_metadata_add( | self.storage.origin_intrinsic_metadata_add( | ||||
[metadata_origin_v2], conflict_update=True) | [metadata_origin_v2], conflict_update=True) | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_url_1])) | ||||
expected_metadata_v2 = [{ | expected_metadata_v2 = [{ | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/null', | |||||
'metadata': metadata_v2, | 'metadata': metadata_v2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
}] | }] | ||||
# metadata did change as the v2 was used to overwrite v1 | # metadata did change as the v2 was used to overwrite v1 | ||||
self.assertEqual(actual_metadata, expected_metadata_v2) | self.assertEqual(actual_metadata, expected_metadata_v2) | ||||
Show All 26 Lines | def test_origin_intrinsic_metadata_add__update_in_place_deadlock(self): | ||||
'name': None, | 'name': None, | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
data_v1 = [ | data_v1 = [ | ||||
{ | { | ||||
'id': id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'origin_url': 'file:///tmp/origin%d' % id_, | |||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
data_v2 = [ | data_v2 = [ | ||||
{ | { | ||||
'id': id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'origin_url': 'file:///tmp/origin%d' % id_, | |||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
# all items to be in the DB. | # all items to be in the DB. | ||||
data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
# given | # given | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add(data_v1) | self.storage.origin_intrinsic_metadata_add(data_v1) | ||||
# when | # when | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | origins = ['file:///tmp/origin%d' % i for i in ids] | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(origins)) | |||||
expected_data_v1 = [ | expected_data_v1 = [ | ||||
{ | { | ||||
'id': id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'origin_url': 'file:///tmp/origin%d' % id_, | |||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# then | # then | ||||
Show All 11 Lines | def test_origin_intrinsic_metadata_add__update_in_place_deadlock(self): | ||||
t1 = threading.Thread(target=f1) | t1 = threading.Thread(target=f1) | ||||
t2 = threading.Thread(target=f2) | t2 = threading.Thread(target=f2) | ||||
t2.start() | t2.start() | ||||
t1.start() | t1.start() | ||||
t1.join() | t1.join() | ||||
t2.join() | t2.join() | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | actual_data = list(self.storage.origin_intrinsic_metadata_get(origins)) | ||||
expected_data_v2 = [ | expected_data_v2 = [ | ||||
{ | { | ||||
'id': id_, | 'id': 'file:///tmp/origin%d' % id_, | ||||
'origin_url': 'file:///tmp/origin%d' % id_, | |||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
self.maxDiff = None | self.maxDiff = None | ||||
Show All 9 Lines | def test_origin_intrinsic_metadata_add__duplicate_twice(self): | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
Show All 11 Lines | def test_origin_intrinsic_metadata_search_fulltext(self): | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'origin_url': 'file:///dev/zero', | 'origin': self.origin_url_1, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_id_2, | 'id': self.origin_url_2, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = self.storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
[res['id'] for res in search(['Doe'])], | [res['id'] for res in search(['Doe'])], | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_url_1, self.origin_url_2]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['John', 'Doe'])], | [res['id'] for res in search(['John', 'Doe'])], | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['John'])], | [res['id'] for res in search(['John'])], | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['John', 'Jane'])], | [res['id'] for res in search(['John', 'Jane'])], | ||||
[]) | []) | ||||
def test_origin_intrinsic_metadata_search_fulltext_rank(self): | def test_origin_intrinsic_metadata_search_fulltext_rank(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
Show All 10 Lines | def test_origin_intrinsic_metadata_search_fulltext_rank(self): | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': [ | 'author': [ | ||||
'Random Person', | 'Random Person', | ||||
'Jane Doe', | 'Jane Doe', | ||||
] | ] | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_id_2, | 'id': self.origin_url_2, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = self.storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['Doe'])], | [res['id'] for res in search(['Doe'])], | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_url_1, self.origin_url_2]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['Doe'], limit=1)], | [res['id'] for res in search(['Doe'], limit=1)], | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['John'])], | [res['id'] for res in search(['John'])], | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['Jane'])], | [res['id'] for res in search(['Jane'])], | ||||
[self.origin_id_2, self.origin_id_1]) | [self.origin_url_2, self.origin_url_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['id'] for res in search(['John', 'Jane'])], | [res['id'] for res in search(['John', 'Jane'])], | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
def _fill_origin_intrinsic_metadata(self): | def _fill_origin_intrinsic_metadata(self): | ||||
tool1_id = self.tools['swh-metadata-detector']['id'] | tool1_id = self.tools['swh-metadata-detector']['id'] | ||||
tool2_id = self.tools['swh-metadata-detector2']['id'] | tool2_id = self.tools['swh-metadata-detector2']['id'] | ||||
metadata1 = { | metadata1 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'John Doe', | 'author': 'John Doe', | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'id': self.origin_id_1, | 'id': self.origin_url_1, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'id': self.origin_id_2, | 'id': self.origin_url_2, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
metadata3 = { | metadata3 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
} | } | ||||
metadata3_rev = { | metadata3_rev = { | ||||
'id': self.revision_id_3, | 'id': self.revision_id_3, | ||||
'metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata3_origin = { | metadata3_origin = { | ||||
'id': self.origin_id_3, | 'id': self.origin_url_3, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['pkg-info'], | 'mappings': ['pkg-info'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_3, | 'from_revision': self.revision_id_3, | ||||
} | } | ||||
self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
self.storage.revision_intrinsic_metadata_add([metadata3_rev]) | self.storage.revision_intrinsic_metadata_add([metadata3_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata3_origin]) | self.storage.origin_intrinsic_metadata_add([metadata3_origin]) | ||||
def test_origin_intrinsic_metadata_search_by_producer(self): | def test_origin_intrinsic_metadata_search_by_producer(self): | ||||
self._fill_origin_intrinsic_metadata() | self._fill_origin_intrinsic_metadata() | ||||
tool1 = self.tools['swh-metadata-detector'] | tool1 = self.tools['swh-metadata-detector'] | ||||
tool2 = self.tools['swh-metadata-detector2'] | tool2 = self.tools['swh-metadata-detector2'] | ||||
endpoint = self.storage.origin_intrinsic_metadata_search_by_producer | endpoint = self.storage.origin_intrinsic_metadata_search_by_producer | ||||
# test pagination | # test pagination | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(ids_only=True), | endpoint(ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2, self.origin_id_3]) | [self.origin_url_1, self.origin_url_2, self.origin_url_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(start=0, ids_only=True), | endpoint(start=self.origin_url_1, ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2, self.origin_id_3]) | [self.origin_url_1, self.origin_url_2, self.origin_url_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(start=0, limit=2, ids_only=True), | endpoint(start=self.origin_url_1, limit=2, ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_url_1, self.origin_url_2]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(start=self.origin_id_1+1, ids_only=True), | endpoint(start=self.origin_url_1+'2', ids_only=True), | ||||
[self.origin_id_2, self.origin_id_3]) | [self.origin_url_2, self.origin_url_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(start=self.origin_id_1+1, end=self.origin_id_3-1, | endpoint(start=self.origin_url_1+'2', end=self.origin_url_3[:-1], | ||||
ids_only=True), | ids_only=True), | ||||
[self.origin_id_2]) | [self.origin_url_2]) | ||||
Not Done Inline Actionsthat doesn't test how a consumer of the API would use it. It needs to start with start='url', then incrementally get new results using only the results from the previous call. vlorentz: that doesn't test how a consumer of the API would use it. It needs to start with `start='url'`… | |||||
Not Done Inline ActionsSorry, I meant it should start with start=''. vlorentz: Sorry, I meant it should start with `start=''`. | |||||
Done Inline ActionsI agree, but I just replicated what the tests used to do. douardda: I agree, but I just replicated what the tests used to do.
Once again, I believe the kind of… | |||||
# test mappings filtering | # test mappings filtering | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['npm'], ids_only=True), | endpoint(mappings=['npm'], ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_url_1, self.origin_url_2]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['npm', 'gemspec'], ids_only=True), | endpoint(mappings=['npm', 'gemspec'], ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_url_1, self.origin_url_2]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['gemspec'], ids_only=True), | endpoint(mappings=['gemspec'], ids_only=True), | ||||
[self.origin_id_2]) | [self.origin_url_2]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['pkg-info'], ids_only=True), | endpoint(mappings=['pkg-info'], ids_only=True), | ||||
[self.origin_id_3]) | [self.origin_url_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['foobar'], ids_only=True), | endpoint(mappings=['foobar'], ids_only=True), | ||||
[]) | []) | ||||
# test pagination + mappings | # test pagination + mappings | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(mappings=['npm'], limit=1, ids_only=True), | endpoint(mappings=['npm'], limit=1, ids_only=True), | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
# test tool filtering | # test tool filtering | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(tool_ids=[tool1['id']], ids_only=True), | endpoint(tool_ids=[tool1['id']], ids_only=True), | ||||
[self.origin_id_1]) | [self.origin_url_1]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(tool_ids=[tool2['id']], ids_only=True), | endpoint(tool_ids=[tool2['id']], ids_only=True), | ||||
[self.origin_id_2, self.origin_id_3]) | [self.origin_url_2, self.origin_url_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True), | endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2, self.origin_id_3]) | [self.origin_url_1, self.origin_url_2, self.origin_url_3]) | ||||
# test ids_only=False | # test ids_only=False | ||||
self.assertEqual(list(endpoint(mappings=['gemspec'])), [{ | self.assertEqual(list(endpoint(mappings=['gemspec'])), [{ | ||||
'id': self.origin_id_2, | 'id': self.origin_url_2, | ||||
'origin_url': 'file:///dev/zero', | |||||
'metadata': { | 'metadata': { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
}, | }, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'tool': tool2, | 'tool': tool2, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
}]) | }]) | ||||
▲ Show 20 Lines • Show All 356 Lines • Show Last 20 Lines |
self.origin_url_