Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/storage/test_storage.py
Show First 20 Lines • Show All 925 Lines • ▼ Show 20 Lines | ( | ||||
test_content_metadata_add__duplicate_twice, | test_content_metadata_add__duplicate_twice, | ||||
test_content_metadata_get, | test_content_metadata_get, | ||||
_, # test_content_metadata_delete, | _, # test_content_metadata_delete, | ||||
_, # test_content_metadata_delete_nonexisting, | _, # test_content_metadata_delete_nonexisting, | ||||
) = gen_generic_endpoint_tests( | ) = gen_generic_endpoint_tests( | ||||
endpoint_type='content_metadata', | endpoint_type='content_metadata', | ||||
tool_name='swh-metadata-detector', | tool_name='swh-metadata-detector', | ||||
example_data1={ | example_data1={ | ||||
'translated_metadata': { | 'metadata': { | ||||
'other': {}, | 'other': {}, | ||||
'codeRepository': { | 'codeRepository': { | ||||
'type': 'git', | 'type': 'git', | ||||
'url': 'https://github.com/moranegg/metadata_test' | 'url': 'https://github.com/moranegg/metadata_test' | ||||
}, | }, | ||||
'description': 'Simple package.json test for indexer', | 'description': 'Simple package.json test for indexer', | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
}, | }, | ||||
example_data2={ | example_data2={ | ||||
'translated_metadata': { | 'metadata': { | ||||
'other': {}, | 'other': {}, | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
# revision_metadata tests | # revision_intrinsic_metadata tests | ||||
( | ( | ||||
test_revision_metadata_missing, | test_revision_intrinsic_metadata_missing, | ||||
test_revision_metadata_add__drop_duplicate, | test_revision_intrinsic_metadata_add__drop_duplicate, | ||||
test_revision_metadata_add__update_in_place_duplicate, | test_revision_intrinsic_metadata_add__update_in_place_duplicate, | ||||
test_revision_metadata_add__update_in_place_deadlock, | test_revision_intrinsic_metadata_add__update_in_place_deadlock, | ||||
test_revision_metadata_add__duplicate_twice, | test_revision_intrinsic_metadata_add__duplicate_twice, | ||||
test_revision_metadata_get, | test_revision_intrinsic_metadata_get, | ||||
test_revision_metadata_delete, | test_revision_intrinsic_metadata_delete, | ||||
test_revision_metadata_delete_nonexisting, | test_revision_intrinsic_metadata_delete_nonexisting, | ||||
) = gen_generic_endpoint_tests( | ) = gen_generic_endpoint_tests( | ||||
endpoint_type='revision_metadata', | endpoint_type='revision_intrinsic_metadata', | ||||
tool_name='swh-metadata-detector', | tool_name='swh-metadata-detector', | ||||
example_data1={ | example_data1={ | ||||
'translated_metadata': { | 'metadata': { | ||||
'other': {}, | 'other': {}, | ||||
'codeRepository': { | 'codeRepository': { | ||||
'type': 'git', | 'type': 'git', | ||||
'url': 'https://github.com/moranegg/metadata_test' | 'url': 'https://github.com/moranegg/metadata_test' | ||||
}, | }, | ||||
'description': 'Simple package.json test for indexer', | 'description': 'Simple package.json test for indexer', | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
}, | }, | ||||
example_data2={ | example_data2={ | ||||
'translated_metadata': { | 'metadata': { | ||||
'other': {}, | 'other': {}, | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'version': '0.0.1' | 'version': '0.0.1' | ||||
}, | }, | ||||
'mappings': ['mapping2'], | 'mappings': ['mapping2'], | ||||
}, | }, | ||||
) | ) | ||||
def test_origin_intrinsic_metadata_get(self): | def test_origin_intrinsic_metadata_get(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin]) | self.storage.origin_intrinsic_metadata_add([metadata_origin]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, 42])) | [self.origin_id_1, 42])) | ||||
expected_metadata = [{ | expected_metadata = [{ | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata) | self.assertEqual(actual_metadata, expected_metadata) | ||||
def test_origin_intrinsic_metadata_delete(self): | def test_origin_intrinsic_metadata_delete(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
metadata_origin2 = metadata_origin.copy() | metadata_origin2 = metadata_origin.copy() | ||||
metadata_origin2['origin_id'] = self.origin_id_2 | metadata_origin2['id'] = self.origin_id_2 | ||||
# when | # when | ||||
self.storage.revision_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([ | self.storage.origin_intrinsic_metadata_add([ | ||||
metadata_origin, metadata_origin2]) | metadata_origin, metadata_origin2]) | ||||
self.storage.origin_intrinsic_metadata_delete([ | self.storage.origin_intrinsic_metadata_delete([ | ||||
{ | { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, self.origin_id_2, 42])) | [self.origin_id_1, self.origin_id_2, 42])) | ||||
for item in actual_metadata: | for item in actual_metadata: | ||||
item['indexer_configuration_id'] = item.pop('tool')['id'] | item['indexer_configuration_id'] = item.pop('tool')['id'] | ||||
self.assertEqual(actual_metadata, [metadata_origin2]) | self.assertEqual(actual_metadata, [metadata_origin2]) | ||||
def test_origin_intrinsic_metadata_delete_nonexisting(self): | def test_origin_intrinsic_metadata_delete_nonexisting(self): | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
self.storage.origin_intrinsic_metadata_delete([ | self.storage.origin_intrinsic_metadata_delete([ | ||||
{ | { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'indexer_configuration_id': tool_id | 'indexer_configuration_id': tool_id | ||||
} | } | ||||
]) | ]) | ||||
def test_origin_intrinsic_metadata_add_drop_duplicate(self): | def test_origin_intrinsic_metadata_add_drop_duplicate(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'translated_metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1, 42])) | [self.origin_id_1, 42])) | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_metadata', | 'name': 'test_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['translated_metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2['translated_metadata'] = metadata_v2 | metadata_origin_v2['metadata'] = metadata_v2 | ||||
self.storage.revision_metadata_add([metadata_rev_v2]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v2]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v2]) | ||||
# then | # then | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_id_1])) | ||||
# metadata did not change as the v2 was dropped. | # metadata did not change as the v2 was dropped. | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): | def test_origin_intrinsic_metadata_add_update_in_place_duplicate(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata_v1 = { | metadata_v1 = { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin_v1 = { | metadata_origin_v1 = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata_v1.copy(), | 'metadata': metadata_v1.copy(), | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': [], | 'mappings': [], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# given | # given | ||||
self.storage.revision_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | self.storage.origin_intrinsic_metadata_add([metadata_origin_v1]) | ||||
# when | # when | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_id_1])) | ||||
# then | # then | ||||
expected_metadata_v1 = [{ | expected_metadata_v1 = [{ | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata_v1, | 'metadata': metadata_v1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
self.assertEqual(actual_metadata, expected_metadata_v1) | self.assertEqual(actual_metadata, expected_metadata_v1) | ||||
# given | # given | ||||
metadata_v2 = metadata_v1.copy() | metadata_v2 = metadata_v1.copy() | ||||
metadata_v2.update({ | metadata_v2.update({ | ||||
'name': 'test_update_duplicated_metadata', | 'name': 'test_update_duplicated_metadata', | ||||
'author': 'MG', | 'author': 'MG', | ||||
}) | }) | ||||
metadata_rev_v2 = metadata_rev_v1.copy() | metadata_rev_v2 = metadata_rev_v1.copy() | ||||
metadata_origin_v2 = metadata_origin_v1.copy() | metadata_origin_v2 = metadata_origin_v1.copy() | ||||
metadata_rev_v2['translated_metadata'] = metadata_v2 | metadata_rev_v2['metadata'] = metadata_v2 | ||||
metadata_origin_v2['metadata'] = metadata_v2 | metadata_origin_v2['metadata'] = metadata_v2 | ||||
self.storage.revision_metadata_add([metadata_rev_v2], | self.storage.revision_intrinsic_metadata_add( | ||||
conflict_update=True) | [metadata_rev_v2], conflict_update=True) | ||||
self.storage.origin_intrinsic_metadata_add([metadata_origin_v2], | self.storage.origin_intrinsic_metadata_add( | ||||
conflict_update=True) | [metadata_origin_v2], conflict_update=True) | ||||
actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | actual_metadata = list(self.storage.origin_intrinsic_metadata_get( | ||||
[self.origin_id_1])) | [self.origin_id_1])) | ||||
expected_metadata_v2 = [{ | expected_metadata_v2 = [{ | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata_v2, | 'metadata': metadata_v2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
'mappings': [], | 'mappings': [], | ||||
}] | }] | ||||
# metadata did change as the v2 was used to overwrite v1 | # metadata did change as the v2 was used to overwrite v1 | ||||
self.assertEqual(actual_metadata, expected_metadata_v2) | self.assertEqual(actual_metadata, expected_metadata_v2) | ||||
Show All 16 Lines | def test_origin_intrinsic_metadata_add__update_in_place_deadlock(self): | ||||
'version': 'v1.1.1', | 'version': 'v1.1.1', | ||||
'name': 'foo', | 'name': 'foo', | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
} | } | ||||
metadata_rev_v1 = { | metadata_rev_v1 = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': { | 'metadata': { | ||||
'version': None, | 'version': None, | ||||
'name': None, | 'name': None, | ||||
}, | }, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
data_v1 = [ | data_v1 = [ | ||||
{ | { | ||||
'origin_id': id_, | 'id': id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
data_v2 = [ | data_v2 = [ | ||||
{ | { | ||||
'origin_id': id_, | 'id': id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# Remove one item from each, so that both queries have to succeed for | # Remove one item from each, so that both queries have to succeed for | ||||
# all items to be in the DB. | # all items to be in the DB. | ||||
data_v2a = data_v2[1:] | data_v2a = data_v2[1:] | ||||
data_v2b = list(reversed(data_v2[0:-1])) | data_v2b = list(reversed(data_v2[0:-1])) | ||||
# given | # given | ||||
self.storage.revision_metadata_add([metadata_rev_v1]) | self.storage.revision_intrinsic_metadata_add([metadata_rev_v1]) | ||||
self.storage.origin_intrinsic_metadata_add(data_v1) | self.storage.origin_intrinsic_metadata_add(data_v1) | ||||
# when | # when | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | ||||
expected_data_v1 = [ | expected_data_v1 = [ | ||||
{ | { | ||||
'origin_id': id_, | 'id': id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data1, | **example_data1, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
# then | # then | ||||
Show All 15 Lines | def test_origin_intrinsic_metadata_add__update_in_place_deadlock(self): | ||||
t1.join() | t1.join() | ||||
t2.join() | t2.join() | ||||
actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | actual_data = list(self.storage.origin_intrinsic_metadata_get(ids)) | ||||
expected_data_v2 = [ | expected_data_v2 = [ | ||||
{ | { | ||||
'origin_id': id_, | 'id': id_, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
**example_data2, | **example_data2, | ||||
'tool': self.tools['swh-metadata-detector'], | 'tool': self.tools['swh-metadata-detector'], | ||||
} | } | ||||
for id_ in ids | for id_ in ids | ||||
] | ] | ||||
self.maxDiff = None | self.maxDiff = None | ||||
self.assertCountEqual(actual_data, expected_data_v2) | self.assertCountEqual(actual_data, expected_data_v2) | ||||
def test_origin_intrinsic_metadata_add__duplicate_twice(self): | def test_origin_intrinsic_metadata_add__duplicate_twice(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata = { | metadata = { | ||||
'developmentStatus': None, | 'developmentStatus': None, | ||||
'name': None, | 'name': None, | ||||
} | } | ||||
metadata_rev = { | metadata_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata, | 'metadata': metadata, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata_origin = { | metadata_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata, | 'metadata': metadata, | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'mappings': ['mapping1'], | 'mappings': ['mapping1'], | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_metadata_add([metadata_rev]) | self.storage.revision_intrinsic_metadata_add([metadata_rev]) | ||||
with self.assertRaises(ValueError): | with self.assertRaises(ValueError): | ||||
self.storage.origin_intrinsic_metadata_add([ | self.storage.origin_intrinsic_metadata_add([ | ||||
metadata_origin, metadata_origin]) | metadata_origin, metadata_origin]) | ||||
def test_origin_intrinsic_metadata_search_fulltext(self): | def test_origin_intrinsic_metadata_search_fulltext(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
metadata1 = { | metadata1 = { | ||||
'author': 'John Doe', | 'author': 'John Doe', | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'translated_metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'origin_id': self.origin_id_2, | 'id': self.origin_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = self.storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
[res['origin_id'] for res in search(['Doe'])], | [res['id'] for res in search(['Doe'])], | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_id_1, self.origin_id_2]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['John', 'Doe'])], | [res['id'] for res in search(['John', 'Doe'])], | ||||
[self.origin_id_1]) | [self.origin_id_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['John'])], | [res['id'] for res in search(['John'])], | ||||
[self.origin_id_1]) | [self.origin_id_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['John', 'Jane'])], | [res['id'] for res in search(['John', 'Jane'])], | ||||
[]) | []) | ||||
def test_origin_intrinsic_metadata_search_fulltext_rank(self): | def test_origin_intrinsic_metadata_search_fulltext_rank(self): | ||||
# given | # given | ||||
tool_id = self.tools['swh-metadata-detector']['id'] | tool_id = self.tools['swh-metadata-detector']['id'] | ||||
# The following authors have "Random Person" to add some more content | # The following authors have "Random Person" to add some more content | ||||
# to the JSON data, to work around normalization quirks when there | # to the JSON data, to work around normalization quirks when there | ||||
# are few words (rank/(1+ln(nb_words)) is very sensitive to nb_words | # are few words (rank/(1+ln(nb_words)) is very sensitive to nb_words | ||||
# for small values of nb_words). | # for small values of nb_words). | ||||
metadata1 = { | metadata1 = { | ||||
'author': [ | 'author': [ | ||||
'Random Person', | 'Random Person', | ||||
'John Doe', | 'John Doe', | ||||
'Jane Doe', | 'Jane Doe', | ||||
] | ] | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'translated_metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'author': [ | 'author': [ | ||||
'Random Person', | 'Random Person', | ||||
'Jane Doe', | 'Jane Doe', | ||||
] | ] | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'origin_id': self.origin_id_2, | 'id': self.origin_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': [], | 'mappings': [], | ||||
'indexer_configuration_id': tool_id, | 'indexer_configuration_id': tool_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
# when | # when | ||||
self.storage.revision_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
# then | # then | ||||
search = self.storage.origin_intrinsic_metadata_search_fulltext | search = self.storage.origin_intrinsic_metadata_search_fulltext | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['Doe'])], | [res['id'] for res in search(['Doe'])], | ||||
[self.origin_id_1, self.origin_id_2]) | [self.origin_id_1, self.origin_id_2]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['Doe'], limit=1)], | [res['id'] for res in search(['Doe'], limit=1)], | ||||
[self.origin_id_1]) | [self.origin_id_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['John'])], | [res['id'] for res in search(['John'])], | ||||
[self.origin_id_1]) | [self.origin_id_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['Jane'])], | [res['id'] for res in search(['Jane'])], | ||||
[self.origin_id_2, self.origin_id_1]) | [self.origin_id_2, self.origin_id_1]) | ||||
self.assertEqual( | self.assertEqual( | ||||
[res['origin_id'] for res in search(['John', 'Jane'])], | [res['id'] for res in search(['John', 'Jane'])], | ||||
[self.origin_id_1]) | [self.origin_id_1]) | ||||
def _fill_origin_intrinsic_metadata(self): | def _fill_origin_intrinsic_metadata(self): | ||||
tool1_id = self.tools['swh-metadata-detector']['id'] | tool1_id = self.tools['swh-metadata-detector']['id'] | ||||
tool2_id = self.tools['swh-metadata-detector2']['id'] | tool2_id = self.tools['swh-metadata-detector2']['id'] | ||||
metadata1 = { | metadata1 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'John Doe', | 'author': 'John Doe', | ||||
} | } | ||||
metadata1_rev = { | metadata1_rev = { | ||||
'id': self.revision_id_1, | 'id': self.revision_id_1, | ||||
'translated_metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
} | } | ||||
metadata1_origin = { | metadata1_origin = { | ||||
'origin_id': self.origin_id_1, | 'id': self.origin_id_1, | ||||
'metadata': metadata1, | 'metadata': metadata1, | ||||
'mappings': ['npm'], | 'mappings': ['npm'], | ||||
'indexer_configuration_id': tool1_id, | 'indexer_configuration_id': tool1_id, | ||||
'from_revision': self.revision_id_1, | 'from_revision': self.revision_id_1, | ||||
} | } | ||||
metadata2 = { | metadata2 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
} | } | ||||
metadata2_rev = { | metadata2_rev = { | ||||
'id': self.revision_id_2, | 'id': self.revision_id_2, | ||||
'translated_metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata2_origin = { | metadata2_origin = { | ||||
'origin_id': self.origin_id_2, | 'id': self.origin_id_2, | ||||
'metadata': metadata2, | 'metadata': metadata2, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
} | } | ||||
metadata3 = { | metadata3 = { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
} | } | ||||
metadata3_rev = { | metadata3_rev = { | ||||
'id': self.revision_id_3, | 'id': self.revision_id_3, | ||||
'translated_metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
} | } | ||||
metadata3_origin = { | metadata3_origin = { | ||||
'origin_id': self.origin_id_3, | 'id': self.origin_id_3, | ||||
'metadata': metadata3, | 'metadata': metadata3, | ||||
'mappings': ['pkg-info'], | 'mappings': ['pkg-info'], | ||||
'indexer_configuration_id': tool2_id, | 'indexer_configuration_id': tool2_id, | ||||
'from_revision': self.revision_id_3, | 'from_revision': self.revision_id_3, | ||||
} | } | ||||
self.storage.revision_metadata_add([metadata1_rev]) | self.storage.revision_intrinsic_metadata_add([metadata1_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | self.storage.origin_intrinsic_metadata_add([metadata1_origin]) | ||||
self.storage.revision_metadata_add([metadata2_rev]) | self.storage.revision_intrinsic_metadata_add([metadata2_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | self.storage.origin_intrinsic_metadata_add([metadata2_origin]) | ||||
self.storage.revision_metadata_add([metadata3_rev]) | self.storage.revision_intrinsic_metadata_add([metadata3_rev]) | ||||
self.storage.origin_intrinsic_metadata_add([metadata3_origin]) | self.storage.origin_intrinsic_metadata_add([metadata3_origin]) | ||||
def test_origin_intrinsic_metadata_search_by_producer(self): | def test_origin_intrinsic_metadata_search_by_producer(self): | ||||
self._fill_origin_intrinsic_metadata() | self._fill_origin_intrinsic_metadata() | ||||
tool1 = self.tools['swh-metadata-detector'] | tool1 = self.tools['swh-metadata-detector'] | ||||
tool2 = self.tools['swh-metadata-detector2'] | tool2 = self.tools['swh-metadata-detector2'] | ||||
endpoint = self.storage.origin_intrinsic_metadata_search_by_producer | endpoint = self.storage.origin_intrinsic_metadata_search_by_producer | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_search_by_producer(self): | ||||
endpoint(tool_ids=[tool2['id']], ids_only=True), | endpoint(tool_ids=[tool2['id']], ids_only=True), | ||||
[self.origin_id_2, self.origin_id_3]) | [self.origin_id_2, self.origin_id_3]) | ||||
self.assertCountEqual( | self.assertCountEqual( | ||||
endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True), | endpoint(tool_ids=[tool1['id'], tool2['id']], ids_only=True), | ||||
[self.origin_id_1, self.origin_id_2, self.origin_id_3]) | [self.origin_id_1, self.origin_id_2, self.origin_id_3]) | ||||
# test ids_only=False | # test ids_only=False | ||||
self.assertEqual(list(endpoint(mappings=['gemspec'])), [{ | self.assertEqual(list(endpoint(mappings=['gemspec'])), [{ | ||||
'origin_id': self.origin_id_2, | 'id': self.origin_id_2, | ||||
'metadata': { | 'metadata': { | ||||
'@context': 'foo', | '@context': 'foo', | ||||
'author': 'Jane Doe', | 'author': 'Jane Doe', | ||||
}, | }, | ||||
'mappings': ['npm', 'gemspec'], | 'mappings': ['npm', 'gemspec'], | ||||
'tool': tool2, | 'tool': tool2, | ||||
'from_revision': self.revision_id_2, | 'from_revision': self.revision_id_2, | ||||
}]) | }]) | ||||
▲ Show 20 Lines • Show All 356 Lines • Show Last 20 Lines |