Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/test_loader_verifier.py
- This file was moved from swh/loader/mercurial/loader_verifier.py.
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import click | |||||
import code | import code | ||||
import datetime | import datetime | ||||
import hglib | import hglib | ||||
import logging | import logging | ||||
import os | import os | ||||
import random | import random | ||||
import sys | import sys | ||||
import time | import time | ||||
import urllib.parse | |||||
from binascii import hexlify, unhexlify | from binascii import hexlify, unhexlify | ||||
from swh.model.hashutil import MultiHash | from swh.model.hashutil import MultiHash | ||||
from ..converters import PRIMARY_ALGO as ALGO | |||||
from ..objects import SimpleTree | |||||
from swh.loader.core.tests import BaseLoaderTest | |||||
from .common import HgLoaderMemoryStorage | |||||
from .loader import HgBundle20Loader | |||||
from .converters import PRIMARY_ALGO as ALGO | |||||
from .objects import SimpleTree | |||||
class HgLoaderValidater: | |||||
"""Loader validater | |||||
""" | |||||
def __init__(self, loader): | |||||
self.loader = loader | |||||
class HgLoaderValidater(HgBundle20Loader): | |||||
def generate_all_blobs(self, validate=True, frequency=1): | def generate_all_blobs(self, validate=True, frequency=1): | ||||
logging.debug('GENERATING BLOBS') | logging.debug('GENERATING BLOBS') | ||||
i = 0 | i = 0 | ||||
start = time.time() | start = time.time() | ||||
u = set() | u = set() | ||||
for blob, node_info in self.br.yield_all_blobs(): | for blob, node_info in self.loader.br.yield_all_blobs(): | ||||
filename = node_info[0] | filename = node_info[0] | ||||
header = node_info[2] | header = node_info[2] | ||||
i += 1 | i += 1 | ||||
hashes = MultiHash.from_data(blob, hash_names=set([ALGO])).digest() | hashes = MultiHash.from_data(blob, hash_names=set([ALGO])).digest() | ||||
bhash = hashes[ALGO] | bhash = hashes[ALGO] | ||||
self.file_node_to_hash[header['node']] = bhash | self.file_node_to_hash[header['node']] = bhash | ||||
Show All 38 Lines | def generate_all_trees(self, validate=True, frequency=1): | ||||
c = 0 | c = 0 | ||||
n = 0 | n = 0 | ||||
u = set() | u = set() | ||||
start = time.time() | start = time.time() | ||||
validated = 0 | validated = 0 | ||||
for header, tree, new_dirs in self.load_directories(): | for header, tree, new_dirs in self.loader.load_directories(): | ||||
if validate and (c >= validated) and (random.random() < frequency): | if validate and (c >= validated) and (random.random() < frequency): | ||||
self.validate_tree(tree, header, c) | self.validate_tree(tree, header, c) | ||||
for d in new_dirs: | for d in new_dirs: | ||||
u.add(d['id']) | u.add(d['id']) | ||||
c += 1 | c += 1 | ||||
n += len(new_dirs) | n += len(new_dirs) | ||||
Show All 38 Lines | def validate_tree(self, tree, header, i): | ||||
keys = [k['name'] for k in a['entries']] | keys = [k['name'] for k in a['entries']] | ||||
return b''.join(sorted(keys)) | return b''.join(sorted(keys)) | ||||
tree_dirs = [d for d in tree.yield_swh_directories()] | tree_dirs = [d for d in tree.yield_swh_directories()] | ||||
base_dirs = [d for d in base_tree.yield_swh_directories()] | base_dirs = [d for d in base_tree.yield_swh_directories()] | ||||
tree_dirs.sort(key=so1) | tree_dirs.sort(key=so1) | ||||
base_dirs.sort(key=so1) | base_dirs.sort(key=so1) | ||||
# for i in range(len(tree_dirs)): | |||||
# if tree_dirs[i] != base_dirs[i]: | |||||
# logging.debug(i) | |||||
# code.interact(local=dict(globals(), **locals())) | |||||
logging.debug('Program will quit after your next Ctrl-D') | logging.debug('Program will quit after your next Ctrl-D') | ||||
code.interact(local=dict(globals(), **locals())) | code.interact(local=dict(globals(), **locals())) | ||||
quit() | quit() | ||||
else: | else: | ||||
logging.debug('v') | logging.debug('v') | ||||
def generate_all_commits(self, validate=True, frequency=1): | def generate_all_commits(self, validate=True, frequency=1): | ||||
i = 0 | i = 0 | ||||
start = time.time() | start = time.time() | ||||
for rev in self.get_revisions(): | for rev in self.get_revisions(): | ||||
logging.debug('.', end='') | logging.debug('.', end='') | ||||
i += 1 | i += 1 | ||||
if i % 20 == 0: | if i % 20 == 0: | ||||
sys.stdout.flush() | sys.stdout.flush() | ||||
logging.debug('') | logging.debug('') | ||||
logging.debug('\nFOUND: %s COMMITS' % i) | logging.debug('\nFOUND: %s COMMITS' % i) | ||||
logging.debug('ELAPSED: %s' % (time.time()-start)) | logging.debug('ELAPSED: %s' % (time.time()-start)) | ||||
def runtest(self, hgdir, validate_blobs=False, validate_trees=False, | def runtest(self, hgdir, validate_blobs=False, validate_trees=False, | ||||
frequency=1.0, test_iterative=False): | frequency=1.0): | ||||
anlambert: This comment is strange. | |||||
Done Inline ActionsTypo! ardumont: Typo! | |||||
"""HgLoaderValidater().runtest('/home/avi/SWH/mozilla-unified') | """loader = HgLoaderMemoryStorage(0 | ||||
HgLoaderValidater(loader).runtest('/home/avi/SWH/mozilla-unified') | |||||
""" | """ | ||||
self.origin_id = 'test' | self.origin_id = 'test' | ||||
dt = datetime.datetime.now(tz=datetime.timezone.utc) | dt = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
if test_iterative: | |||||
dt = dt - datetime.timedelta(10) | |||||
hgrepo = None | hgrepo = None | ||||
if (hgdir.lower().startswith('http:') | if (hgdir.lower().startswith('http:') | ||||
or hgdir.lower().startswith('https:')): | or hgdir.lower().startswith('https:')): | ||||
hgrepo, hgdir = hgdir, hgrepo | hgrepo, hgdir = hgdir, hgrepo | ||||
self.hgdir = hgdir | self.hgdir = hgdir | ||||
try: | try: | ||||
logging.debug('preparing') | logging.debug('preparing') | ||||
self.prepare(origin_url=hgrepo, visit_date=dt, directory=hgdir) | self.loader.prepare( | ||||
origin_url=hgrepo, visit_date=dt, directory=hgdir) | |||||
self.file_node_to_hash = {} | self.file_node_to_hash = {} | ||||
# self.generate_all_blobs(validate=validate_blobs, | |||||
# frequency=frequency) | |||||
# self.generate_all_trees(validate=validate_trees, frequency=frequency) | |||||
# self.generate_all_commits() | |||||
logging.debug('getting contents') | logging.debug('getting contents') | ||||
cs = 0 | cs = 0 | ||||
for c in self.get_contents(): | for c in self.loader.get_contents(): | ||||
cs += 1 | cs += 1 | ||||
pass | pass | ||||
logging.debug('getting directories') | logging.debug('getting directories') | ||||
ds = 0 | ds = 0 | ||||
for d in self.get_directories(): | for d in self.loader.get_directories(): | ||||
ds += 1 | ds += 1 | ||||
pass | pass | ||||
revs = 0 | revs = 0 | ||||
logging.debug('getting revisions') | logging.debug('getting revisions') | ||||
for rev in self.get_revisions(): | for rev in self.loader.get_revisions(): | ||||
revs += 1 | revs += 1 | ||||
pass | pass | ||||
logging.debug('getting releases') | logging.debug('getting releases') | ||||
rels = 0 | rels = 0 | ||||
for rel in self.get_releases(): | for rel in self.loader.get_releases(): | ||||
rels += 1 | rels += 1 | ||||
logging.debug(rel) | logging.debug(rel) | ||||
self.visit = 'foo' | self.visit = 'foo' | ||||
snps = 0 | |||||
logging.debug('getting snapshot') | logging.debug('getting snapshot') | ||||
o = self.get_snapshot() | o = self.loader.get_snapshot() | ||||
Not Done Inline ActionsShouldn't we query the storage instead? vlorentz: Shouldn't we query the storage instead? | |||||
Done Inline ActionsI do not think so. Then again, I only ported the existing code. [1] IIRC, when i took other the loader mercurial, that did not work properly with the storage for example. ardumont: I do not think so.
As we do not call the loader's `load` function, because of history reasons i… | |||||
logging.debug('Snapshot: %s' % o) | logging.debug('Snapshot: %s' % o) | ||||
if o: | |||||
snps += 1 | |||||
finally: | finally: | ||||
self.cleanup() | self.loader.cleanup() | ||||
logging.info('final count: cs %s ds %s revs %s rels %s' % ( | |||||
cs, ds, revs, rels)) | |||||
@click.command() | return cs, ds, revs, rels, snps | ||||
@click.option('--verbose', is_flag=True, default=False) | |||||
@click.option('--validate-frequency', default=0.001, type=click.FLOAT) | |||||
@click.option('--test-iterative', default=False, type=click.BOOL) | |||||
@click.argument('repository-url', required=1) | |||||
def main(verbose, validate_frequency, test_iterative, repository_url): | |||||
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) | |||||
while repository_url[-1] == '/': | |||||
repository_url = repository_url[:-1] | |||||
HgLoaderValidater().runtest( | |||||
repository_url, | |||||
validate_blobs=True, validate_trees=True, | |||||
frequency=validate_frequency, | |||||
test_iterative=test_iterative) | |||||
if __name__ == '__main__': | class BaseLoaderVerifierTest(BaseLoaderTest): | ||||
main() | def setUp(self, archive_name='the-sandbox.tgz', filename='the-sandbox'): | ||||
super().setUp(archive_name=archive_name, filename=filename, | |||||
prefix_tmp_folder_name='swh.loader.mercurial.', | |||||
start_path=os.path.dirname(__file__)) | |||||
loader = HgLoaderMemoryStorage() | |||||
self.validator = HgLoaderValidater(loader) | |||||
class LoaderVerifierTest1(BaseLoaderVerifierTest): | |||||
def test_data(self): | |||||
repo_path = urllib.parse.urlparse(self.repo_url).path | |||||
cs, ds, revs, rels, snps = self.validator.runtest( | |||||
repo_path, | |||||
validate_blobs=True, | |||||
validate_trees=True, | |||||
frequency=0.001) | |||||
self.assertEqual(cs, 2) | |||||
self.assertEqual(ds, 3) | |||||
self.assertEqual(revs, 58) | |||||
self.assertEqual(rels, 0) | |||||
self.assertEqual(snps, 1) | |||||
class LoaderVerifierTest2(BaseLoaderVerifierTest): | |||||
def setUp(self, archive_name='hello.tgz', filename='hello'): | |||||
super().setUp(archive_name=archive_name, filename=filename) | |||||
def test_data(self): | |||||
repo_path = urllib.parse.urlparse(self.repo_url).path | |||||
cs, ds, revs, rels, snps = self.validator.runtest( | |||||
repo_path, | |||||
validate_blobs=True, | |||||
validate_trees=True, | |||||
frequency=0.001) | |||||
self.assertEqual(cs, 3) | |||||
self.assertEqual(ds, 3) | |||||
self.assertEqual(rels, 1) | |||||
self.assertEqual(revs, 3) | |||||
self.assertEqual(snps, 1) |
This comment is strange.