diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py --- a/swh/indexer/tests/test_metadata.py +++ b/swh/indexer/tests/test_metadata.py @@ -1161,20 +1161,16 @@ # Add a parent directory, that is the only directory at the root # of the revision rev_id = hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f') - subdir_id = metadata_indexer.storage._revisions[rev_id]['directory'] - metadata_indexer.storage._revisions[rev_id]['directory'] = b'123456' + rev = metadata_indexer.storage._revisions[rev_id] + subdir_id = rev.directory + rev.directory = b'123456' metadata_indexer.storage.directory_add([{ 'id': b'123456', 'entries': [{ - 'target': subdir_id, - 'type': 'dir', - 'length': None, 'name': b'foobar-1.0.0', - 'sha1': None, + 'type': 'dir', + 'target': subdir_id, 'perms': 16384, - 'sha1_git': None, - 'status': None, - 'sha256': None }], }]) diff --git a/swh/indexer/tests/utils.py b/swh/indexer/tests/utils.py --- a/swh/indexer/tests/utils.py +++ b/swh/indexer/tests/utils.py @@ -36,38 +36,24 @@ ORIGINS = [ { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/SoftwareHeritage/swh-storage'}, { - 'lister': None, - 'project': None, 'type': 'ftp', 'url': 'rsync://ftp.gnu.org/gnu/3dldf'}, { - 'lister': None, - 'project': None, 'type': 'deposit', 'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'}, { - 'lister': None, - 'project': None, 'type': 'pypi', 'url': 'https://pypi.org/project/limnoria/'}, { - 'lister': None, - 'project': None, 'type': 'svn', 'url': 'http://0-512-md.googlecode.com/svn/'}, { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser'}, { - 'lister': None, - 'project': None, 'type': 'git', 'url': 'https://github.com/librariesio/yarn-parser.git'}, ] @@ -111,7 +97,7 @@ b'3DLDF-2.0.tar.gz': { 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G' b'\xd3\xd1m', - b'target_type': 'revision'} + 'target_type': 'revision'} }}, { 'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/', @@ -167,18 +153,26 @@ REVISIONS = [{ 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), + 'message': 'Improve search functionality', 'author': { - 'id': 26, 'name': b'Andrew Nesbitt', 'fullname': b'Andrew Nesbitt ', 'email': b'andrewnez@gmail.com' }, 'committer': { - 'id': 26, 'name': b'Andrew Nesbitt', 'fullname': b'Andrew Nesbitt ', 'email': b'andrewnez@gmail.com' }, + 'committer_date': { + 'negative_utc': None, + 'offset': 120, + 'timestamp': { + 'microseconds': 0, + 'seconds': 1380883849 + } + }, + 'type': 'git', 'synthetic': False, 'date': { 'negative_utc': False, @@ -193,36 +187,23 @@ DIRECTORY_ID = b'10' -DIRECTORY = [{ - 'sha1_git': b'abc', +DIRECTORY_ENTRIES = [{ 'name': b'index.js', - 'target': b'abc', - 'length': 897, - 'status': 'visible', 'type': 'file', + 'target': b'abc', 'perms': 33188, - 'sha1': b'bcd' }, { - 'sha1_git': b'aab', 'name': b'package.json', - 'target': b'aab', - 'length': 712, - 'status': 'visible', 'type': 'file', + 'target': b'aab', 'perms': 33188, - 'sha1': b'cde' }, { - 'target': b'11', - 'type': 'dir', - 'length': None, 'name': b'.github', - 'sha1': None, + 'type': 'dir', + 'target': b'11', 'perms': 16384, - 'sha1_git': None, - 'status': None, - 'sha256': None } ] @@ -345,6 +326,10 @@ """, 'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', '636465': b""" + # 626364 + hash_to_hex(b'bcd'): b'unimportant content for bcd', + # 636465 + hash_to_hex(b'cde'): b""" { "name": "yarn-parser", "version": "1.0.0", @@ -385,10 +370,10 @@ "test": "^0.6.0" } } + """ } - YARN_PARSER_METADATA = { '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', 'url': @@ -565,13 +550,11 @@ storage.origin_visit_update( origin_url, visit['visit'], status='full', snapshot=snap_id) storage.revision_add(REVISIONS) - storage.directory_add([{ - 'id': DIRECTORY_ID, - 'entries': DIRECTORY, - }]) + + contents = [] for (obj_id, content) in OBJ_STORAGE_DATA.items(): content_hashes = hashutil.MultiHash.from_data(content).digest() - storage.content_add([{ + contents.append({ 'data': content, 'length': len(content), 'status': 'visible', @@ -579,8 +562,31 @@ 'sha1_git': hash_to_bytes(obj_id), 'sha256': content_hashes['sha256'], 'blake2s256': content_hashes['blake2s256'] - }]) + }) + + # Directory entries must target existing contents in storage + for i, entry in enumerate(DIRECTORY_ENTRIES): + if entry['type'] == 'dir': + continue + _id = entry['target'] + + raw_content = 'raw content for entry %s' % i + # bytes + contents.append({ + 'data': raw_content, + 'length': len(content), + 'status': 'visible', + 'sha1': _id, + 'sha1_git': _id, + 'sha256': _id, + 'blake2s256': _id, + }) + storage.content_add(contents) + storage.directory_add([{ + 'id': DIRECTORY_ID, + 'entries': DIRECTORY_ENTRIES, + }]) class CommonContentIndexerTest(metaclass=abc.ABCMeta): legacy_get_format = False