diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -1126,7 +1126,7 @@ metadata_indexer.idx_storage.content_metadata_add([{ 'indexer_configuration_id': tool['id'], - 'id': b'cde', + 'id': b'aab', 'metadata': YARN_PARSER_METADATA, }]) @@ -1161,20 +1161,16 @@ # Add a parent directory, that is the only directory at the root # of the revision rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') - subdir_id = metadata_indexer.storage._revisions[rev_id]['directory'] - metadata_indexer.storage._revisions[rev_id]['directory'] = b'123456' + rev = metadata_indexer.storage._revisions[rev_id] + subdir_id = rev.directory + rev.directory = b'123456' metadata_indexer.storage.directory_add([{ 'id': b'123456', 'entries': [{ - 'target': subdir_id, - 'type': 'dir', - 'length': None, 'name': b'foobar-1.0.0', - 'sha1': None, + 'type': 'dir', + 'target': subdir_id, 'perms': 16384, - 'sha1_git': None, - 'status': None, - 'sha256': None }], }]) @@ -1184,7 +1180,7 @@ metadata_indexer.idx_storage.content_metadata_add([{ 'indexer_configuration_id': tool['id'], - 'id': b'cde', + 'id': b'aab', 'metadata': YARN_PARSER_METADATA, }]) diff --git a/swh/indexer/tests/test_origin_metadata.py b/swh/indexer/tests/test_origin_metadata.py --- a/swh/indexer/tests/test_origin_metadata.py +++ b/swh/indexer/tests/test_origin_metadata.py @@ -10,13 +10,22 @@ from swh.indexer.metadata import OriginMetadataIndexer from .utils import YARN_PARSER_METADATA -from .test_metadata import REVISION_METADATA_CONFIG +from .test_metadata import REVISION_METADATA_CONFIG, TRANSLATOR_TOOL def test_origin_metadata_indexer( idx_storage, storage, obj_storage): - indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) + indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = storage.origin_get({ @@ -38,6 +47,7 @@ results = list( indexer.idx_storage.revision_intrinsic_metadata_get([rev_id])) + for result in results: del result['tool'] assert results == [rev_metadata] @@ -52,8 +62,15 @@ def test_origin_metadata_indexer_duplicate_origin( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) - indexer.storage = storage - indexer.idx_storage = idx_storage + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) indexer.run(["https://github.com/librariesio/yarn-parser"]) indexer.run(["https://github.com/librariesio/yarn-parser"]*2) @@ -73,16 +90,25 @@ def test_origin_metadata_indexer_missing_head( idx_storage, storage, obj_storage): + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None - storage.origin_add([{ + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) + + indexer.storage.origin_add([{ 'type': 'git', 'url': 'https://example.com' }]) - indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://example.com"]) - origin = storage.origin_get({ + origin = indexer.storage.origin_get({ 'url': 'https://example.com'}) results = list(indexer.idx_storage.origin_intrinsic_metadata_get([ @@ -93,18 +119,27 @@ def test_origin_metadata_indexer_partial_missing_head( idx_storage, storage, obj_storage): - storage.origin_add([{ + indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) + indexer.storage.origin_add([{ 'type': 'git', 'url': 'https://example.com' }]) - indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) indexer.run(["https://example.com", "https://github.com/librariesio/yarn-parser"]) - origin1 = storage.origin_get({ + origin1 = indexer.storage.origin_get({ 'url': 'https://example.com'}) - origin2 = storage.origin_get({ + origin2 = indexer.storage.origin_get({ 'url': 'https://github.com/librariesio/yarn-parser'}) rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') @@ -137,14 +172,21 @@ def test_origin_metadata_indexer_duplicate_revision( idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) - indexer.storage = storage - indexer.idx_storage = idx_storage + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) indexer.run(["https://github.com/librariesio/yarn-parser", "https://github.com/librariesio/yarn-parser.git"]) - origin1 = storage.origin_get({ + origin1 = indexer.storage.origin_get({ 'url': 'https://github.com/librariesio/yarn-parser'}) - origin2 = storage.origin_get({ + origin2 = indexer.storage.origin_get({ 'url': 'https://github.com/librariesio/yarn-parser.git'}) assert origin1['id'] != origin2['id'] rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') @@ -160,7 +202,6 @@ def test_origin_metadata_indexer_no_metadata_file( idx_storage, storage, obj_storage): - indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) with patch('swh.indexer.metadata_dictionary.npm.NpmMapping.filename', b'foo.json'): @@ -227,6 +268,15 @@ idx_storage, storage, obj_storage): indexer = OriginMetadataIndexer(config=REVISION_METADATA_CONFIG) + tool = indexer.idx_storage.indexer_configuration_get( + {'tool_'+k: v for (k, v) in TRANSLATOR_TOOL.items()}) + assert tool is not None + + indexer.idx_storage.content_metadata_add([{ + 'indexer_configuration_id': tool['id'], + 'id': b'aab', + 'metadata': YARN_PARSER_METADATA, + }]) indexer.run(["https://github.com/librariesio/yarn-parser"]) origin = storage.origin_get({ diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -36,38 +36,24 @@ ORIGINS = [ { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/SoftwareHeritage/swh-storage'}, { - 'lister': None, - 'project': None, 'type': 'ftp', 'url': 'rsync://ftp.gnu.org/gnu/3dldf'}, { - 'lister': None, - 'project': None, 'type': 'deposit', 'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'}, { - 'lister': None, - 'project': None, 'type': 'pypi', 'url': 'https://pypi.org/project/limnoria/'}, { - 'lister': None, - 'project': None, 'type': 'svn', 'url': 'http://0-512-md.googlecode.com/svn/'}, { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser'}, { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser.git'}, ] @@ -111,7 +97,7 @@ b'3DLDF-2.0.tar.gz': { 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G' b'\xd3\xd1m', - b'target_type': 'revision'} + 'target_type': 'revision'} }}, { 'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/', @@ -167,18 +153,26 @@ REVISIONS = [{ 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), + 'message': 'Improve search functionality', 'author': { - 'id': 26, 'name': b'Andrew Nesbitt', 'fullname': b'Andrew Nesbitt ', 'email': b'andrewnez@gmail.com' }, 'committer': { - 'id': 26, 'name': b'Andrew Nesbitt', 'fullname': b'Andrew Nesbitt ', 'email': b'andrewnez@gmail.com' }, + 'committer_date': { + 'negative_utc': None, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1380883849 + } + }, + 'type': 'git', 'synthetic': False, 'date': { 'negative_utc': False, @@ -193,36 +187,23 @@ DIRECTORY_ID = b'10' -DIRECTORY = [{ - 'sha1_git': b'abc', +DIRECTORY_ENTRIES = [{ 'name': b'index.js', - 'target': b'abc', - 'length': 897, - 'status': 'visible', 'type': 'file', + 'target': b'abc', 'perms': 33188, - 'sha1': b'bcd' }, { - 'sha1_git': b'aab', 'name': b'package.json', - 'target': b'aab', - 'length': 712, - 'status': 'visible', 'type': 'file', + 'target': b'aab', 'perms': 33188, - 'sha1': b'cde' }, { - 'target': b'11', - 'type': 'dir', - 'length': None, 'name': b'.github', - 'sha1': None, + 'type': 'dir', + 'target': b'11', 'perms': 16384, - 'sha1_git': None, - 'status': None, - 'sha256': None } ] @@ -344,7 +325,10 @@ 'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" """, 'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', - '636465': b""" + # 626364 + hash_to_hex(b'bcd'): b'unimportant content for bcd', + # 636465 + hash_to_hex(b'cde'): b""" { "name": "yarn-parser", "version": "1.0.0", @@ -385,10 +369,10 @@ "test": "^0.6.0" } } + """ } - YARN_PARSER_METADATA = { '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'url': @@ -565,13 +549,11 @@ storage.origin_visit_update( origin_url, visit['visit'], status='full', snapshot=snap_id) storage.revision_add(REVISIONS) - storage.directory_add([{ - 'id': DIRECTORY_ID, - 'entries': DIRECTORY, - }]) + + contents = [] for (obj_id, content) in OBJ_STORAGE_DATA.items(): content_hashes = hashutil.MultiHash.from_data(content).digest() - storage.content_add([{ + contents.append({ 'data': content, 'length': len(content), 'status': 'visible', @@ -579,7 +561,31 @@ 'sha1_git': hash_to_bytes(obj_id), 'sha256': content_hashes['sha256'], 'blake2s256': content_hashes['blake2s256'] - }]) + }) + + # Directory entries must target existing contents in storage + for i, entry in enumerate(DIRECTORY_ENTRIES): + if entry['type'] == 'dir': + continue + _id = entry['target'] + + raw_content = 'raw content for entry %s' % i + # bytes + contents.append({ + 'data': raw_content, + 'length': len(content), + 'status': 'visible', + 'sha1': _id, + 'sha1_git': _id, + 'sha256': _id, + 'blake2s256': _id, + }) + + storage.content_add(contents) + storage.directory_add([{ + 'id': DIRECTORY_ID, + 'entries': DIRECTORY_ENTRIES, + }]) class CommonContentIndexerTest(metaclass=abc.ABCMeta):