Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/gnu/tests/test_lister.py
# Copyright (C) 2019 the Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import logging | ||||
import re | import re | ||||
from os.path import join, dirname | from swh.lister.gnu.tests import get_response_cb | ||||
from urllib.parse import urlparse | |||||
from swh.lister.gnu.lister import ( | |||||
find_tarballs, filter_directories, file_extension_check | |||||
) | |||||
logger = logging.getLogger(__name__) | |||||
DATADIR = join(dirname(__file__), 'data') | |||||
def get_response_cb(request, context): | |||||
"""Mock the http request to send back the on-disk file handler | |||||
""" | |||||
url = urlparse(request.url) | |||||
dirname = url.hostname | |||||
filename = url.path[1:].replace('/', '_') | |||||
return open(join(DATADIR, dirname, filename), 'rb') | |||||
def test_lister_no_page_check_results(swh_listers, requests_mock): | def test_lister_no_page_check_results(swh_listers, requests_mock): | ||||
lister = swh_listers['gnu'] | lister = swh_listers['gnu'] | ||||
requests_mock.get(re.compile('https://ftp.gnu.org'), body=get_response_cb) | requests_mock.get(re.compile('https://ftp.gnu.org'), body=get_response_cb) | ||||
lister.run() | lister.run() | ||||
r = lister.scheduler.search_tasks(task_type='load-tar') | r = lister.scheduler.search_tasks(task_type='load-tar') | ||||
assert len(r) == 382 | |||||
# r | assert len(r) == 383 | ||||
vlorentz: what is this comment? | |||||
Done Inline ActionsThe raw output of the search_tasks call as an example ardumont: The raw output of the search_tasks call as an example | |||||
# ('type', 'load-tar'), | |||||
# ('arguments', | |||||
# {'args': ['libmatheval', 'https://ftp.gnu.org/gnu/libmatheval/'], | |||||
# 'kwargs': {'tarballs': [{'archive': 'https://ftp.gnu.org/gnu/libmatheval/libmatheval-1.0.0.tar.gz', # noqa | |||||
# 'length': 362222, | |||||
# 'time': 1068754394}, | |||||
# ... | |||||
# ] | |||||
# ('next_run', | |||||
# datetime.datetime(2019, 10, 5, 12, 23, 28, 889293, | |||||
# tzinfo=psycopg2.tz.FixedOffsetTimezone(offset=120, name=None))), | |||||
# ('current_interval', datetime.timedelta(days=64)), | |||||
# ('status', 'next_run_not_scheduled'), | |||||
# ('policy', 'oneshot'), | |||||
# ('retries_left', 3), | |||||
# ('priority', None)]) | |||||
for row in r: | for row in r: | ||||
assert row['type'] == 'load-tar' | assert row['type'] == 'load-tar' | ||||
# arguments check | # arguments check | ||||
args = row['arguments']['args'] | args = row['arguments']['args'] | ||||
assert len(args) == 2 | assert len(args) == 1 | ||||
package = args[0] | |||||
assert package != '' | |||||
url = args[1] | |||||
url = args[0] | |||||
assert url.startswith('https://ftp.gnu.org') | assert url.startswith('https://ftp.gnu.org') | ||||
assert package in url | |||||
url_suffix = url.split('https://ftp.gnu.org')[1] | |||||
assert 'gnu' in url_suffix or 'old-gnu' in url_suffix | |||||
# kwargs | # kwargs | ||||
kwargs = row['arguments']['kwargs'] | kwargs = row['arguments']['kwargs'] | ||||
assert list(kwargs.keys()) == ['tarballs'] | assert list(kwargs.keys()) == ['tarballs'] | ||||
tarballs = kwargs['tarballs'] | tarballs = kwargs['tarballs'] | ||||
# check the tarball's structure | # check the tarball's structure | ||||
tarball = tarballs[0] | tarball = tarballs[0] | ||||
assert set(tarball.keys()) == set(['archive', 'length', 'time']) | assert set(tarball.keys()) == set(['archive', 'length', 'time']) | ||||
assert row['policy'] == 'oneshot' | assert row['policy'] == 'oneshot' | ||||
def test_filter_directories(): | |||||
f = open('swh/lister/gnu/tests/api_response.json') | |||||
api_response = json.load(f) | |||||
cleared_api_response = filter_directories(api_response) | |||||
for directory in cleared_api_response: | |||||
if directory['name'] not in ('gnu', 'old-gnu'): | |||||
assert False | |||||
def test_find_tarballs_small_sample(): | |||||
expected_tarballs = [ | |||||
{ | |||||
'archive': '/root/artanis/artanis-0.2.1.tar.bz2', | |||||
'time': 1495205979, | |||||
'length': 424081, | |||||
}, | |||||
{ | |||||
'archive': '/root/xboard/winboard/winboard-4_0_0-src.zip', # noqa | |||||
'time': 898422900, | |||||
'length': 1514448 | |||||
}, | |||||
{ | |||||
'archive': '/root/xboard/xboard-3.6.2.tar.gz', # noqa | |||||
'time': 869814000, | |||||
'length': 450164, | |||||
}, | |||||
{ | |||||
'archive': '/root/xboard/xboard-4.0.0.tar.gz', # noqa | |||||
'time': 898422900, | |||||
'length': 514951, | |||||
}, | |||||
] | |||||
file_structure = json.load(open('swh/lister/gnu/tests/tree.min.json')) | |||||
actual_tarballs = find_tarballs(file_structure, '/root/') | |||||
assert actual_tarballs == expected_tarballs | |||||
def test_find_tarballs(): | |||||
file_structure = json.load(open('swh/lister/gnu/tests/tree.json')) | |||||
actual_tarballs = find_tarballs(file_structure, '/root/') | |||||
assert len(actual_tarballs) == 42 + 3 # tar + zip | |||||
def test_file_extension_check(): | |||||
assert file_extension_check('abc.xy.zip') | |||||
assert file_extension_check('cvb.zip') | |||||
assert file_extension_check('abc.tar.bz2') | |||||
assert file_extension_check('abc') is False |
what is this comment?