Changeset View
Standalone View
swh/indexer/tests/utils.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import abc | import abc | ||||
import datetime | import datetime | ||||
import functools | import functools | ||||
import random | import random | ||||
Show All 21 Lines | INDEXER_CFG_KEY: { | ||||
'cls': 'memory', | 'cls': 'memory', | ||||
'args': { | 'args': { | ||||
}, | }, | ||||
}, | }, | ||||
} | } | ||||
ORIGINS = [ | ORIGINS = [ | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'git', | 'type': 'git', | ||||
'url': 'https://github.com/SoftwareHeritage/swh-storage'}, | 'url': 'https://github.com/SoftwareHeritage/swh-storage'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'ftp', | 'type': 'ftp', | ||||
'url': 'rsync://ftp.gnu.org/gnu/3dldf'}, | 'url': 'rsync://ftp.gnu.org/gnu/3dldf'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'deposit', | 'type': 'deposit', | ||||
'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'}, | 'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'pypi', | 'type': 'pypi', | ||||
'url': 'https://pypi.org/project/limnoria/'}, | 'url': 'https://pypi.org/project/limnoria/'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'svn', | 'type': 'svn', | ||||
'url': 'http://0-512-md.googlecode.com/svn/'}, | 'url': 'http://0-512-md.googlecode.com/svn/'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'git', | 'type': 'git', | ||||
'url': 'https://github.com/librariesio/yarn-parser'}, | 'url': 'https://github.com/librariesio/yarn-parser'}, | ||||
{ | { | ||||
'lister': None, | |||||
'project': None, | |||||
'type': 'git', | 'type': 'git', | ||||
'url': 'https://github.com/librariesio/yarn-parser.git'}, | 'url': 'https://github.com/librariesio/yarn-parser.git'}, | ||||
] | ] | ||||
SNAPSHOTS = [ | SNAPSHOTS = [ | ||||
{ | { | ||||
'origin': 'https://github.com/SoftwareHeritage/swh-storage', | 'origin': 'https://github.com/SoftwareHeritage/swh-storage', | ||||
'branches': { | 'branches': { | ||||
Show All 27 Lines | SNAPSHOTS = [ | ||||
'target_type': 'revision'}, | 'target_type': 'revision'}, | ||||
b'3DLDF-2.0.3.tar.gz': { | b'3DLDF-2.0.3.tar.gz': { | ||||
'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee' | 'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee' | ||||
b'\xcc\x1a\xb4`\x8c\x8by', | b'\xcc\x1a\xb4`\x8c\x8by', | ||||
'target_type': 'revision'}, | 'target_type': 'revision'}, | ||||
b'3DLDF-2.0.tar.gz': { | b'3DLDF-2.0.tar.gz': { | ||||
'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G' | 'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G' | ||||
b'\xd3\xd1m', | b'\xd3\xd1m', | ||||
b'target_type': 'revision'} | 'target_type': 'revision'} | ||||
}}, | }}, | ||||
{ | { | ||||
'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/', | 'origin': 'https://forge.softwareheritage.org/source/jesuisgpl/', | ||||
'branches': { | 'branches': { | ||||
b'master': { | b'master': { | ||||
'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{' | 'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{' | ||||
b'\xa6\xe9\x99\xb1\x9e]q\xeb', | b'\xa6\xe9\x99\xb1\x9e]q\xeb', | ||||
'target_type': 'revision'} | 'target_type': 'revision'} | ||||
Show All 39 Lines | SNAPSHOTS = [ | ||||
'target': hash_to_bytes( | 'target': hash_to_bytes( | ||||
'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | '8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | ||||
'target_type': 'revision'}}}, | 'target_type': 'revision'}}}, | ||||
] | ] | ||||
REVISIONS = [{ | REVISIONS = [{ | ||||
'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | 'id': hash_to_bytes('8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f'), | ||||
'message': 'Improve search functionality', | |||||
'author': { | 'author': { | ||||
'id': 26, | |||||
'name': b'Andrew Nesbitt', | 'name': b'Andrew Nesbitt', | ||||
'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | ||||
'email': b'andrewnez@gmail.com' | 'email': b'andrewnez@gmail.com' | ||||
}, | }, | ||||
'committer': { | 'committer': { | ||||
'id': 26, | |||||
'name': b'Andrew Nesbitt', | 'name': b'Andrew Nesbitt', | ||||
'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>', | ||||
'email': b'andrewnez@gmail.com' | 'email': b'andrewnez@gmail.com' | ||||
}, | }, | ||||
'committer_date': { | |||||
'negative_utc': None, | |||||
'offset': 120, | |||||
'timestamp': { | |||||
vlorentz: nitpick: use the same formatting as the author date | |||||
'microseconds': 0, | |||||
'seconds': 1380883849 | |||||
} | |||||
}, | |||||
'type': 'git', | |||||
'synthetic': False, | 'synthetic': False, | ||||
'date': { | 'date': { | ||||
'negative_utc': False, | 'negative_utc': False, | ||||
'timestamp': { | 'timestamp': { | ||||
'seconds': 1487596456, | 'seconds': 1487596456, | ||||
'microseconds': 0 | 'microseconds': 0 | ||||
}, | }, | ||||
'offset': 0 | 'offset': 0 | ||||
}, | }, | ||||
'directory': b'10' | 'directory': b'10' | ||||
}] | }] | ||||
DIRECTORY_ID = b'10' | DIRECTORY_ID = b'10' | ||||
DIRECTORY = [{ | DIRECTORY_ENTRIES = [{ | ||||
Done Inline Actionsthe var name should be DIRECTORY_ENTRIES, btw vlorentz: the var name should be `DIRECTORY_ENTRIES`, btw | |||||
'sha1_git': b'abc', | |||||
'name': b'index.js', | 'name': b'index.js', | ||||
'target': b'abc', | |||||
'length': 897, | |||||
'status': 'visible', | |||||
'type': 'file', | 'type': 'file', | ||||
'target': b'abc', | |||||
'perms': 33188, | 'perms': 33188, | ||||
'sha1': b'bcd' | |||||
}, | }, | ||||
{ | { | ||||
'sha1_git': b'aab', | |||||
'name': b'package.json', | 'name': b'package.json', | ||||
'target': b'aab', | |||||
'length': 712, | |||||
'status': 'visible', | |||||
'type': 'file', | 'type': 'file', | ||||
'target': b'cde', | |||||
'perms': 33188, | 'perms': 33188, | ||||
'sha1': b'cde' | |||||
}, | }, | ||||
Not Done Inline Actionsbecause of this, directory_ls used to return {'target': b'aab', .., 'sha1': b'cde'} (b'aab' being a a sha1_git). vlorentz: because of this, directory_ls used to return `{'target': b'aab', .., 'sha1': b'cde'}` (`b'aab'`… | |||||
{ | { | ||||
'target': b'11', | |||||
'type': 'dir', | |||||
'length': None, | |||||
'name': b'.github', | 'name': b'.github', | ||||
'sha1': None, | 'type': 'dir', | ||||
'target': b'11', | |||||
'perms': 16384, | 'perms': 16384, | ||||
'sha1_git': None, | |||||
'status': None, | |||||
'sha256': None | |||||
} | } | ||||
] | ] | ||||
SHA1_TO_LICENSES = { | SHA1_TO_LICENSES = { | ||||
'01c9379dfc33803963d07c1ccc748d3fe4c96bb5': ['GPL'], | '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': ['GPL'], | ||||
'02fb2c89e14f7fab46701478c83779c7beb7b069': ['Apache2.0'], | '02fb2c89e14f7fab46701478c83779c7beb7b069': ['Apache2.0'], | ||||
'103bc087db1d26afc3a0283f38663d081e9b01e6': ['MIT'], | '103bc087db1d26afc3a0283f38663d081e9b01e6': ['MIT'], | ||||
'688a5ef812c53907562fe379d4b3851e69c7cb15': ['AGPL'], | '688a5ef812c53907562fe379d4b3851e69c7cb15': ['AGPL'], | ||||
▲ Show 20 Lines • Show All 105 Lines • ▼ Show 20 Lines | 'd4c647f0fc257591cc9ba1722484229780d1c607': b""" | ||||
}, | }, | ||||
"license": "Artistic-2.0" | "license": "Artistic-2.0" | ||||
} | } | ||||
""", | """, | ||||
'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" | 'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b""" | ||||
""", | """, | ||||
'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', | 'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'', | ||||
'636465': b""" | # 626364 | ||||
hash_to_hex(b'bcd'): b'unimportant content for bcd', | |||||
# 636465 | |||||
hash_to_hex(b'cde'): b""" | |||||
{ | { | ||||
"name": "yarn-parser", | "name": "yarn-parser", | ||||
"version": "1.0.0", | "version": "1.0.0", | ||||
"description": "Tiny web service for parsing yarn.lock files", | "description": "Tiny web service for parsing yarn.lock files", | ||||
"main": "index.js", | "main": "index.js", | ||||
"scripts": { | "scripts": { | ||||
"start": "node index.js", | "start": "node index.js", | ||||
"test": "mocha" | "test": "mocha" | ||||
Show All 24 Lines | hash_to_hex(b'cde'): b""" | ||||
}, | }, | ||||
"devDependencies": { | "devDependencies": { | ||||
"chai": "^4.1.2", | "chai": "^4.1.2", | ||||
"mocha": "^5.2.0", | "mocha": "^5.2.0", | ||||
"request": "^2.87.0", | "request": "^2.87.0", | ||||
"test": "^0.6.0" | "test": "^0.6.0" | ||||
} | } | ||||
} | } | ||||
""" | """ | ||||
} | } | ||||
YARN_PARSER_METADATA = { | YARN_PARSER_METADATA = { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'url': | 'url': | ||||
'https://github.com/librariesio/yarn-parser#readme', | 'https://github.com/librariesio/yarn-parser#readme', | ||||
'codeRepository': | 'codeRepository': | ||||
'git+git+https://github.com/librariesio/yarn-parser.git', | 'git+git+https://github.com/librariesio/yarn-parser.git', | ||||
'author': [{ | 'author': [{ | ||||
'type': 'Person', | 'type': 'Person', | ||||
▲ Show 20 Lines • Show All 160 Lines • ▼ Show 20 Lines | for snap in SNAPSHOTS: | ||||
bytes([random.randint(0, 255) for _ in range(32)]) | bytes([random.randint(0, 255) for _ in range(32)]) | ||||
storage.snapshot_add([{ | storage.snapshot_add([{ | ||||
'id': snap_id, | 'id': snap_id, | ||||
'branches': snap['branches'] | 'branches': snap['branches'] | ||||
}]) | }]) | ||||
storage.origin_visit_update( | storage.origin_visit_update( | ||||
origin_url, visit['visit'], status='full', snapshot=snap_id) | origin_url, visit['visit'], status='full', snapshot=snap_id) | ||||
storage.revision_add(REVISIONS) | storage.revision_add(REVISIONS) | ||||
storage.directory_add([{ | |||||
'id': DIRECTORY_ID, | contents = [] | ||||
'entries': DIRECTORY, | |||||
}]) | |||||
for (obj_id, content) in OBJ_STORAGE_DATA.items(): | for (obj_id, content) in OBJ_STORAGE_DATA.items(): | ||||
content_hashes = hashutil.MultiHash.from_data(content).digest() | content_hashes = hashutil.MultiHash.from_data(content).digest() | ||||
storage.content_add([{ | contents.append({ | ||||
'data': content, | 'data': content, | ||||
'length': len(content), | 'length': len(content), | ||||
'status': 'visible', | 'status': 'visible', | ||||
'sha1': hash_to_bytes(obj_id), | 'sha1': hash_to_bytes(obj_id), | ||||
'sha1_git': hash_to_bytes(obj_id), | 'sha1_git': hash_to_bytes(obj_id), | ||||
Not Done Inline ActionsStill not happy about this, but let's fix it later. vlorentz: Still not happy about this, but let's fix it later. | |||||
Done Inline Actionsneither am i but yeah, not right now ;) ardumont: neither am i but yeah, not right now ;) | |||||
'sha256': content_hashes['sha256'], | 'sha256': content_hashes['sha256'], | ||||
'blake2s256': content_hashes['blake2s256'] | 'blake2s256': content_hashes['blake2s256'] | ||||
}) | |||||
storage.content_add(contents) | |||||
storage.directory_add([{ | |||||
'id': DIRECTORY_ID, | |||||
'entries': DIRECTORY_ENTRIES, | |||||
}]) | }]) | ||||
class CommonContentIndexerTest(metaclass=abc.ABCMeta): | class CommonContentIndexerTest(metaclass=abc.ABCMeta): | ||||
legacy_get_format = False | legacy_get_format = False | ||||
"""True if and only if the tested indexer uses the legacy format. | """True if and only if the tested indexer uses the legacy format. | ||||
see: https://forge.softwareheritage.org/T1433 | see: https://forge.softwareheritage.org/T1433 | ||||
""" | """ | ||||
def get_indexer_results(self, ids): | def get_indexer_results(self, ids): | ||||
"""Override this for indexers that don't have a mock storage.""" | """Override this for indexers that don't have a mock storage.""" | ||||
return self.indexer.idx_storage.state | return self.indexer.idx_storage.state | ||||
Not Done Inline ActionsBut, this new code adds {'target': b'aab', .., 'sha1': b'aab'}; so the sha1 changed. You should not populate storage.content_add from DIRECTORY_ENTRIES. Add a new constant (with the same data that was deleted) instead. vlorentz: But, this new code adds `{'target': b'aab', .., 'sha1': b'aab'}`; so the sha1 changed.
You… | |||||
Done Inline Actions
yes, i hear you. But, I remember i tried to add a constants in the objstorage (b'aab', b'abc') first but it failed the same. ardumont: > But, this new code adds {'target': b'aab', .., 'sha1': b'aab'}; so the sha1 changed.
yes, i… | |||||
Done Inline ActionsThe actual fix is to target b'cde' for that particular entry... ardumont: The actual fix is to target b'cde' for that particular entry...
(then i don't have to fix any… | |||||
Not Done Inline ActionsNo. The actual fix is to use storage.content_add([{'target': b'aab', .., 'sha1': b'cde'}]) vlorentz: No. The actual fix is to use `storage.content_add([{'target': b'aab', .., 'sha1': b'cde'}])` | |||||
Done Inline ActionsWell, i mean the real fix would be to untangle this, this is not maintainable as is. As a first approximation though, just changing the less cogs possible (only the requisite from the validation introduction) to have the tests green as before is the way. And
Even though correct, that does not add up to clarity... ardumont: Well, i mean the real fix would be to untangle this, this is not maintainable as is.
It's not… | |||||
def assert_legacy_results_ok(self, sha1s, expected_results=None): | def assert_legacy_results_ok(self, sha1s, expected_results=None): | ||||
# XXX old format, remove this when all endpoints are | # XXX old format, remove this when all endpoints are | ||||
# updated to the new one | # updated to the new one | ||||
# see: https://forge.softwareheritage.org/T1433 | # see: https://forge.softwareheritage.org/T1433 | ||||
sha1s = [sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) | sha1s = [sha1 if isinstance(sha1, bytes) else hash_to_bytes(sha1) | ||||
for sha1 in sha1s] | for sha1 in sha1s] | ||||
actual_results = list(self.get_indexer_results(sha1s)) | actual_results = list(self.get_indexer_results(sha1s)) | ||||
▲ Show 20 Lines • Show All 161 Lines • Show Last 20 Lines |
nitpick: use the same formatting as the author date