diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ - `swh.lister.debian` - `swh.lister.github` - `swh.lister.gitlab` +- `swh.lister.gnu` - `swh.lister.pypi` - `swh.lister.npm` - `swh.lister.phabricator` @@ -177,6 +178,18 @@ incremental_phabricator_lister(forge_url='https://forge.softwareheritage.org', api_token='XXXX') ``` +## lister-gnu + +Once configured, you can execute a PyPI lister using the following instructions in a `python3` script: + +```lang=python +import logging +from swh.lister.gnu.tasks import gnu_lister + +logging.basicConfig(level=logging.DEBUG) +gnu_lister() +``` + Licensing --------- diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator'] + 'npm', 'phabricator', 'gnu'] @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @@ -115,6 +115,11 @@ api_token='', override_config=override_conf) + elif lister == 'gnu': + from .gnu.models import ModelBase + from .gnu.lister import GNULister + _lister = GNULister(override_config=override_conf) + else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -9,6 +9,7 @@ 'swh.lister.debian.tasks', 'swh.lister.github.tasks', 'swh.lister.gitlab.tasks', + 'swh.lister.gnu.tasks', 'swh.lister.npm.tasks', 'swh.lister.pypi.tasks', 'swh.lister.phabricator.tasks', diff --git a/swh/lister/gnu/__init__.py b/swh/lister/gnu/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/lister.py @@ -0,0 +1,223 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import random +import gzip +import json +import requests +from pathlib import Path +from collections import defaultdict + +from .models import GNUModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister + + +class GNULister(SimpleLister): + MODEL = GNUModel + LISTER_NAME = 'gnu' + TREE_URL = 'https://ftp.gnu.org/tree.json.gz' + BASE_URL = 'https://ftp.gnu.org' + instance = 'gnu' + tarballs = defaultdict(dict) # Dict of key with project name value the + # associated is list of tarballs of package to ingest from the gnu mirror + + def task_dict(self, origin_type, origin_url, **kwargs): + """ + Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + """ + return utils.create_task_dict( + 'load-%s' % origin_type, 'recurring', kwargs.get('name'), + origin_url, tarballs=self.tarballs[kwargs.get('name')]) + + def get_file(self): + ''' + Download and unzip tree.json.gz file and returns its content + in JSON format + + Returns + File content in dictionary format + ''' + response = requests.get(self.TREE_URL, + allow_redirects=True) + uncompressed_content = gzip.decompress(response.content) + return json.loads(uncompressed_content.decode('utf-8')) + + def safely_issue_request(self, identifier): + ''' + Make network request to download the file which + has file structure of the GNU website. + + Args: + identifier: resource identifier + Returns: + Server response + ''' + return self.get_file() + + def list_packages(self, response): + """ + List the actual gnu origins with their names,url and the list + of all the tarball for a package from the response. + + Args: + response : File structure of the website + in dictionary format + + Returns: + A list of all the packages with their names, url of their root + directory and the tarballs present for the particular package. + [ + {'name': '3dldf', 'url': 'https://ftp.gnu.org/gnu/3dldf/', + 'tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'}, + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'}} + ] + }, + {'name': '8sync', 'url': 'https://ftp.gnu.org/gnu/8sync/', + 'tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', + 'date': '1461357336'}, + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz', + 'date': '1480991830'} + ] + ] + """ + response = filter_directories(response) + packages = [] + for directory in response: + content = directory['contents'] + for repo in content: + if repo['type'] == 'directory': + package_url = '%s/%s/%s/' % (self.BASE_URL, + directory['name'], + repo['name']) + package_tarballs = find_tarballs( + repo['contents'], package_url) + if package_tarballs != []: + repo_details = { + 'name': repo['name'], + 'url': package_url, + 'time_modified': repo['time'], + } + self.tarballs[repo['name']] = package_tarballs + packages.append(repo_details) + random.shuffle(packages) + return packages + + def get_model_from_repo(self, repo): + """Transform from repository representation to model + + """ + return { + 'uid': repo['name'], + 'name': repo['name'], + 'full_name': repo['name'], + 'html_url': repo['url'], + 'origin_url': repo['url'], + 'time_last_updated': repo['time_modified'], + 'origin_type': 'gnu', + 'description': None, + } + + def transport_response_simplified(self, response): + """Transform response to list for model manipulation + + """ + return [self.get_model_from_repo(repo) for repo in response] + + +def find_tarballs(package_file_structure, url): + ''' + Recursively lists all the tarball present in the folder and + subfolders for a particular package url. + + Args + package_file_structure : File structure of the package root directory + url : URL of the corresponding package + + Returns + List of all the tarball urls and the last their time of update + example- + For a package called 3dldf + + [ + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz', + 'date': '1074278633'} + ... + ] + ''' + tarballs = [] + for single_file in package_file_structure: + file_type = single_file['type'] + file_name = single_file['name'] + if file_type == 'file': + if file_extension_check(file_name): + tarballs .append({ + "archive": url + file_name, + "date": single_file['time'] + }) + # It will recursively check for tarballs in all sub-folders + elif file_type == 'directory': + tarballs_in_dir = find_tarballs( + single_file['contents'], + url + file_name + '/') + tarballs.extend(tarballs_in_dir) + + return tarballs + + +def filter_directories(response): + ''' + Keep only gnu and old-gnu folders from JSON + ''' + final_response = [] + file_system = response[0]['contents'] + for directory in file_system: + if directory['name'] in ('gnu', 'old-gnu'): + final_response.append(directory) + return final_response + + +def file_extension_check(file_name): + ''' + Check for the extension of the file, if the file is of zip format of + .tar.x format, where x could be anything, then returns true. + + Args: + file_name : name of the file for which the extensions is needs to + be checked. + + Returns: + True or False + + example + file_extension_check('abc.zip') will return True + file_extension_check('abc.tar.gz') will return True + file_extension_check('abc.tar.gz.sig') will return False + + ''' + file_suffixes = Path(file_name).suffixes + if len(file_suffixes) == 1 and file_suffixes[-1] == '.zip': + return True + elif len(file_suffixes) > 1: + if file_suffixes[-1] == '.zip' or file_suffixes[-2] == '.tar': + return True + return False diff --git a/swh/lister/gnu/models.py b/swh/lister/gnu/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/models.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String, Integer + +from ..core.models import ModelBase + + +class GNUModel(ModelBase): + """a GNU repository representation + + """ + __tablename__ = 'gnu_repo' + + uid = Column(String, primary_key=True) + time_last_updated = Column(Integer) diff --git a/swh/lister/gnu/tasks.py b/swh/lister/gnu/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import GNULister + + +@app.task(name=__name__ + '.GNUListerTask') +def gnu_lister(**lister_args): + GNULister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/gnu/tests/__init__.py b/swh/lister/gnu/tests/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/tests/api_response.json b/swh/lister/gnu/tests/api_response.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/api_response.json @@ -0,0 +1,37 @@ +[{"type":"directory","name": ".","contents":[ + {"type":"file","name":".footer.shtml","size":444,"time":"1359994299"}, + {"type":"file","name":"find.txt.gz","size":261428,"time":"1557684608"}, + {"type":"directory","name":"gnu","size":12288,"time":"1556742017","contents":[]}, + {"type":"directory","name":"gnu+linux-distros","size":4096,"time":"1299783002","contents":[ + {"type":"directory","name":"ututo-e","size":4096,"time":"1487780066","contents":[ + {"type":"file","name":"README","size":48,"time":"1487780066"}, + {"type":"file","name":"index.html","size":158,"time":"1487780054"} + ]} + ]}, + {"type":"file","name":"ls-lrRt.txt.gz","size":480054,"time":"1557684607"}, + {"type":"directory","name":"mirrors","size":4096,"time":"1114010630","contents":[ + {"type":"directory","name":"dynebolic","size":4096,"time":"1317827602","contents":[ + {"type":"file","name":"MOVED_TO_mirror.fsf.org_dynebolic","size":0,"time":"1317826935"}, + {"type":"file","name":"index.html","size":107,"time":"1317827601"} + ]} + ]}, + {"type":"link","name":"non-gnu","target":"gnu/non-gnu","size":11,"time":"1082055542","contents":[]}, + {"type":"directory","name":"old-gnu","size":4096,"time":"1548360019","contents":[]}, + {"type":"link","name":"pub","target":".","size":1,"time":"1060090003","contents":[]}, + {"type":"directory","name":"savannah","size":4096,"time":"1194544006","contents":[ + {"type":"file","name":"README","size":473,"time":"1143758028"} + ]}, + {"type":"directory","name":"third-party","size":4096,"time":"1059825710","contents":[ + {"type":"file","name":"README","size":374,"time":"983824071"} + ]}, + {"type":"directory","name":"tmp","size":4096,"time":"1239072509","contents":[ + ]}, + {"type":"file","name":"tree.json.gz","size":0,"time":"1557684608"}, + {"type":"directory","name":"video","size":4096,"time":"1367963189","contents":[ + {"type":"file","name":".bash_history","size":27,"time":"1307027604"}, + {"type":"file","name":"stallmanupv.ogg.sig","size":536,"time":"1299776853"} + ]}, + {"type":"file","name":"welcome.msg","size":2830,"time":"1545163301"} +]}, +{"type":"report","directories":2743,"files":63983} +] diff --git a/swh/lister/gnu/tests/conftest.py b/swh/lister/gnu/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/conftest.py @@ -0,0 +1 @@ +from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/gnu/tests/file_structure.json b/swh/lister/gnu/tests/file_structure.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/file_structure.json @@ -0,0 +1,73 @@ +[ + {"type":"directory","name":"artanis","size":4096,"time":"1546205705","contents":[ + {"type":"file","name":"artanis-0.2.1.tar.bz2","size":424081,"time":"1495205979"}, + {"type":"file","name":"artanis-0.2.1.tar.bz2.sig","size":833,"time":"1495205982"}, + {"type":"file","name":"artanis-0.2.1.tar.gz","size":506599,"time":"1495205967"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz","size":504759,"time":"1494994222"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":833,"time":"1494994224"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2","size":439269,"time":"1520284021"}, + {"type":"file","name":"artanis-0.2.3.tar.gz","size":526293,"time":"1520284007"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2","size":426626,"time":"1521742071"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2.sig","size":833,"time":"1521742074"}, + {"type":"file","name":"artanis-0.2.4.tar.gz","size":508420,"time":"1521742057"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2","size":440350,"time":"1525717261"}, + {"type":"file","name":"artanis-0.2.5.tar.gz","size":518316,"time":"1525717246"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2","size":448329,"time":"1546205569"}, + {"type":"file","name":"artanis-0.3.1.tar.gz","size":535098,"time":"1546205555"}, + {"type":"file","name":"artanis-0.3.tar.bz2","size":452609,"time":"1546205025"}, + {"type":"file","name":"artanis-0.3.tar.bz2.sig","size":833,"time":"1546205027"}, + {"type":"file","name":"artanis-0.3.tar.gz","size":550938,"time":"1546205012"}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2","target":"artanis-0.2.12-f39e-dirty.tar.bz2","size":33,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz","target":"artanis-0.2.12-f39e-dirty.tar.gz","size":32,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2","target":"artanis-0.3.1.tar.bz2","size":21,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz","target":"artanis-0.3.1.tar.gz","size":20,"time":"1546205703","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz.sig","target":"artanis-0.3.1.tar.gz.sig","size":24,"time":"1546205703","contents":[]} + ]}, + {"type":"directory","name":"xboard","size":4096,"time":"1254860068","contents":[ + {"type":"directory","name":"winboard","size":4096,"time":"1181795103","contents":[ + {"type":"file","name":"README","size":107,"time":"1070058107"}, + {"type":"file","name":"README.sig","size":65,"time":"1070058115"}, + {"type":"file","name":"winboard-4_0_0-src.zip","size":1514448,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_0.exe","size":1652037,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_2-src.zip","size":1482621,"time":"920018269"}, + {"type":"file","name":"winboard-4_0_3-src.zip","size":1499275,"time":"936750503"}, + {"type":"file","name":"winboard-4_0_4-src.tar.gz","size":1753506,"time":"944290190"}, + {"type":"file","name":"winboard-4_0_5-src.tar.gz","size":1752189,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_6-src.tar.gz","size":1761396,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.README","size":1592,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_7-src.tar.gz","size":1764000,"time":"952313061"}, + {"type":"file","name":"winboard-4_1_0-src.tar.gz","size":1902251,"time":"969299378"}, + {"type":"file","name":"winboard-4_2_0beta-src.tar.gz","size":2000471,"time":"977027031"}, + {"type":"file","name":"winboard-4_2_0beta.README","size":3048,"time":"977033442"}, + {"type":"file","name":"winboard-4_2_0beta.exe","size":2292716,"time":"977027033"}, + {"type":"file","name":"winboard-4_2_1-src.tar.gz","size":2090945,"time":"981323331"}, + {"type":"file","name":"winboard-4_2_2-src.tar.gz","size":2025689,"time":"981570576"}, + {"type":"file","name":"winboard-4_2_3-src.tar.gz","size":2001746,"time":"982656672"}, + {"type":"file","name":"winboard-4_2_4-src.tar.gz","size":2388388,"time":"1007952574"}, + {"type":"file","name":"winboard-4_2_5-src.tar.gz","size":1962754,"time":"1008502483"}, + {"type":"file","name":"winboard-4_2_6-src.tar.gz","size":1982333,"time":"1012641285"}, + {"type":"file","name":"winboard-4_2_7.exe.sig","size":65,"time":"1070057687"}, + {"type":"file","name":"winboard-4_2_7b.exe","size":6213290,"time":"1181794790"} + ]}, + {"type":"file","name":"xboard-3.6.2.tar.gz","size":450164,"time":"869814000"}, + {"type":"file","name":"xboard-4.0.0.tar.gz","size":514951,"time":"898422900"}, + {"type":"file","name":"xboard-4.0.2.tar.gz","size":564856,"time":"920018202"}, + {"type":"file","name":"xboard-4.0.3.tar.gz","size":577351,"time":"936750512"}, + {"type":"file","name":"xboard-4.0.4.tar.gz","size":575421,"time":"944290148"}, + {"type":"file","name":"xboard-4.0.5.tar.gz","size":576300,"time":"944599461"}, + {"type":"file","name":"xboard-4.0.6.README","size":1592,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.6.tar.gz","size":579076,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.7.README","size":1721,"time":"952313082"}, + {"type":"file","name":"xboard-4.0.7.tar.gz","size":578350,"time":"952313085"}, + {"type":"file","name":"xboard-4.1.0.tar.gz","size":1069507,"time":"969299287"}, + {"type":"file","name":"xboard-4.2.0beta.README","size":3048,"time":"977027107"}, + {"type":"file","name":"xboard-4.2.0beta.tar.gz","size":1093901,"time":"977027108"}, + {"type":"file","name":"xboard-4.2.1.tar.gz","size":1097200,"time":"981323501"}, + {"type":"file","name":"xboard-4.2.2.tar.gz","size":1097682,"time":"981562809"}, + {"type":"file","name":"xboard-4.2.3.tar.gz","size":1100059,"time":"982657006"}, + {"type":"file","name":"xboard-4.2.4.tar.gz","size":1034728,"time":"1007952745"}, + {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"}, + {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"} + ]} +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/find_tarballs_output.json b/swh/lister/gnu/tests/find_tarballs_output.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/find_tarballs_output.json @@ -0,0 +1,182 @@ +[ + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.bz2", + "date": "1495205979" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.gz", + "date": "1495205967" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.12-f39e-dirty.tar.gz", + "date": "1494994222" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.bz2", + "date": "1520284021" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.gz", + "date": "1520284007" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.bz2", + "date": "1521742071" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.gz", + "date": "1521742057" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.bz2", + "date": "1525717261" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.gz", + "date": "1525717246" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.bz2", + "date": "1546205569" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.gz", + "date": "1546205555" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.bz2", + "date": "1546205025" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.gz", + "date": "1546205012" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_0-src.zip", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_2-src.zip", + "date": "920018269" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_3-src.zip", + "date": "936750503" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_4-src.tar.gz", + "date": "944290190" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_5-src.tar.gz", + "date": "944600462" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_6-src.tar.gz", + "date": "952156231" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_7-src.tar.gz", + "date": "952313061" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_1_0-src.tar.gz", + "date": "969299378" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_0beta-src.tar.gz", + "date": "977027031" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_1-src.tar.gz", + "date": "981323331" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_2-src.tar.gz", + "date": "981570576" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_3-src.tar.gz", + "date": "982656672" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_4-src.tar.gz", + "date": "1007952574" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_5-src.tar.gz", + "date": "1008502483" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_6-src.tar.gz", + "date": "1012641285" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-3.6.2.tar.gz", + "date": "869814000" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.0.tar.gz", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.2.tar.gz", + "date": "920018202" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.3.tar.gz", + "date": "936750512" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.4.tar.gz", + "date": "944290148" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.5.tar.gz", + "date": "944599461" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.6.tar.gz", + "date": "952156235" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.7.tar.gz", + "date": "952313085" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.1.0.tar.gz", + "date": "969299287" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.0beta.tar.gz", + "date": "977027108" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.1.tar.gz", + "date": "981323501" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.2.tar.gz", + "date": "981562809" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.3.tar.gz", + "date": "982657006" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.4.tar.gz", + "date": "1007952745" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.5.tar.gz", + "date": "1008466945" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.6.tar.gz", + "date": "1012641715" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.7.tar.gz", + "date": "1070057764" + } + ] \ No newline at end of file diff --git a/swh/lister/gnu/tests/test_lister.py b/swh/lister/gnu/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_lister.py @@ -0,0 +1,40 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json + +from swh.lister.gnu.lister import find_tarballs, filter_directories +from swh.lister.gnu.lister import file_extension_check + + +def test_filter_directories(): + f = open('swh/lister/gnu/tests/api_response.json') + api_response = json.load(f) + cleared_api_response = filter_directories(api_response) + for directory in cleared_api_response: + if directory['name'] not in ('gnu', 'old-gnu'): + assert False + + +def test_find_tarballs(): + f = open('swh/lister/gnu/tests/find_tarballs_output.json') + expected_list_of_all_tarballs = json.load(f) + + f = open('swh/lister/gnu/tests/file_structure.json') + file_structure = json.load(f) + list_of_all_tarballs = [] + list_of_all_tarballs.extend( + find_tarballs(file_structure[0]['contents'], + "https://ftp.gnu.org/gnu/artanis/")) + list_of_all_tarballs.extend( + find_tarballs(file_structure[1]['contents'], + "https://ftp.gnu.org/old-gnu/xboard/")) + assert list_of_all_tarballs == expected_list_of_all_tarballs + + +def test_file_extension_check(): + assert file_extension_check('abc.xy.zip') + assert file_extension_check('cvb.zip') + assert file_extension_check('abc.tar.bz2') + assert file_extension_check('abc') is False diff --git a/swh/lister/gnu/tests/test_tasks.py b/swh/lister/gnu/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_tasks.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.gnu.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.gnu.tasks.GNULister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked GNULister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.gnu.tasks.GNUListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with()