diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ - `swh.lister.debian` - `swh.lister.github` - `swh.lister.gitlab` +- `swh.lister.gnu` - `swh.lister.pypi` - `swh.lister.npm` - `swh.lister.phabricator` @@ -177,6 +178,18 @@ incremental_phabricator_lister(forge_url='https://forge.softwareheritage.org', api_token='XXXX') ``` +## lister-gnu + +Once configured, you can execute a PyPI lister using the following instructions in a `python3` script: + +```lang=python +import logging +from swh.lister.gnu.tasks import gnu_lister + +logging.basicConfig(level=logging.DEBUG) +gnu_lister() +``` + Licensing --------- diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -12,7 +12,7 @@ logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator'] + 'npm', 'phabricator', 'gnu'] @click.group(name='lister', context_settings=CONTEXT_SETTINGS) @@ -115,6 +115,11 @@ api_token='', override_config=override_conf) + elif lister == 'gnu': + from .gnu.models import ModelBase + from .gnu.lister import GNULister + _lister = GNULister(override_config=override_conf) + else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -9,6 +9,7 @@ 'swh.lister.debian.tasks', 'swh.lister.github.tasks', 'swh.lister.gitlab.tasks', + 'swh.lister.gnu.tasks', 'swh.lister.npm.tasks', 'swh.lister.pypi.tasks', 'swh.lister.phabricator.tasks', diff --git a/swh/lister/gnu/__init__.py b/swh/lister/gnu/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/lister.py @@ -0,0 +1,223 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import random +import gzip +import json +import requests +from pathlib import Path +from collections import defaultdict + +from .models import GNUModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister + + +class GNULister(SimpleLister): + MODEL = GNUModel + LISTER_NAME = 'gnu' + TREE_URL = 'https://ftp.gnu.org/tree.json.gz' + BASE_URL = 'https://ftp.gnu.org' + instance = 'gnu' + tarballs = defaultdict(dict) # Dict of key with project name value the + # associated is list of tarballs of package to ingest from the gnu mirror + + def task_dict(self, origin_type, origin_url, **kwargs): + """ + Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + """ + return utils.create_task_dict( + 'load-%s' % origin_type, 'recurring', kwargs.get('name'), + origin_url, tarballs=self.tarballs[kwargs.get('name')]) + + def get_file(self): + ''' + Download and unzip tree.json.gz file and returns its content + in JSON format + + Returns + File content in dictionary format + ''' + response = requests.get(self.TREE_URL, + allow_redirects=True) + uncompressed_content = gzip.decompress(response.content) + return json.loads(uncompressed_content.decode('utf-8')) + + def safely_issue_request(self, identifier): + ''' + Make network request to download the file which + has file structure of the GNU website. + + Args: + identifier: resource identifier + Returns: + Server response + ''' + return self.get_file() + + def list_packages(self, response): + """ + List the actual gnu origins with their names,url and the list + of all the tarball for a package from the response. + + Args: + response : File structure of the website + in dictionary format + + Returns: + A list of all the packages with their names, url of their root + directory and the tarballs present for the particular package. + [ + {'name': '3dldf', 'url': 'https://ftp.gnu.org/gnu/3dldf/', + 'tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'}, + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'}} + ] + }, + {'name': '8sync', 'url': 'https://ftp.gnu.org/gnu/8sync/', + 'tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', + 'date': '1461357336'}, + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz', + 'date': '1480991830'} + ] + ] + """ + response = filter_directories(response) + packages = [] + for directory in response: + content = directory['contents'] + for repo in content: + if repo['type'] == 'directory': + package_url = '%s/%s/%s/' % (self.BASE_URL, + directory['name'], + repo['name']) + package_tarballs = find_tarballs( + repo['contents'], package_url) + if package_tarballs != []: + repo_details = { + 'name': repo['name'], + 'url': package_url, + 'time_modified': repo['time'], + } + self.tarballs[repo['name']] = package_tarballs + packages.append(repo_details) + random.shuffle(packages) + return packages + + def get_model_from_repo(self, repo): + """Transform from repository representation to model + + """ + return { + 'uid': repo['name'], + 'name': repo['name'], + 'full_name': repo['name'], + 'html_url': repo['url'], + 'origin_url': repo['url'], + 'time_last_updated': repo['time_modified'], + 'origin_type': 'gnu', + 'description': None, + } + + def transport_response_simplified(self, response): + """Transform response to list for model manipulation + + """ + return [self.get_model_from_repo(repo) for repo in response] + + +def find_tarballs(package_file_structure, url): + ''' + Recursively lists all the tarball present in the folder and + subfolders for a particular package url. + + Args + package_file_structure : File structure of the package root directory + url : URL of the corresponding package + + Returns + List of all the tarball urls and the last their time of update + example- + For a package called 3dldf + + [ + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz', + 'date': '1074278633'} + ... + ] + ''' + tarballs = [] + for single_file in package_file_structure: + file_type = single_file['type'] + file_name = single_file['name'] + if file_type == 'file': + if file_extension_check(file_name): + tarballs .append({ + "archive": url + file_name, + "date": single_file['time'] + }) + # It will recursively check for tarballs in all sub-folders + elif file_type == 'directory': + tarballs_in_dir = find_tarballs( + single_file['contents'], + url + file_name + '/') + tarballs.extend(tarballs_in_dir) + + return tarballs + + +def filter_directories(response): + ''' + Keep only gnu and old-gnu folders from JSON + ''' + final_response = [] + file_system = response[0]['contents'] + for directory in file_system: + if directory['name'] in ('gnu', 'old-gnu'): + final_response.append(directory) + return final_response + + +def file_extension_check(file_name): + ''' + Check for the extension of the file, if the file is of zip format of + .tar.x format, where x could be anything, then returns true. + + Args: + file_name : name of the file for which the extensions is needs to + be checked. + + Returns: + True or False + + example + file_extension_check('abc.zip') will return True + file_extension_check('abc.tar.gz') will return True + file_extension_check('abc.tar.gz.sig') will return False + + ''' + file_suffixes = Path(file_name).suffixes + if len(file_suffixes) == 1 and file_suffixes[-1] == '.zip': + return True + elif len(file_suffixes) > 1: + if file_suffixes[-1] == '.zip' or file_suffixes[-2] == '.tar': + return True + return False diff --git a/swh/lister/gnu/models.py b/swh/lister/gnu/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/models.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String, Integer + +from ..core.models import ModelBase + + +class GNUModel(ModelBase): + """a GNU repository representation + + """ + __tablename__ = 'gnu_repo' + + uid = Column(String, primary_key=True) + time_last_updated = Column(Integer) diff --git a/swh/lister/gnu/tasks.py b/swh/lister/gnu/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import GNULister + + +@app.task(name=__name__ + '.GNUListerTask') +def gnu_lister(**lister_args): + GNULister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/gnu/tests/__init__.py b/swh/lister/gnu/tests/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/tests/api_response.json b/swh/lister/gnu/tests/api_response.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/api_response.json @@ -0,0 +1,141 @@ +[{"type":"directory","name": ".","contents":[ + {"type":"file","name":".footer.shtml","size":444,"time":"1359994299"}, + {"type":"file","name":".gnu-gnu-gnu.png","size":12413,"time":"1314632619"}, + {"type":"file","name":".header.shtml","size":1833,"time":"1546469072"}, + {"type":"file","name":".header.shtml~","size":1364,"time":"1454341750"}, + {"type":"file","name":".htaccess","size":334,"time":"1314639683"}, + {"type":"file","name":".message","size":1125,"time":"1507930451"}, + {"type":"link","name":"CRYPTO.README","target":".message","size":8,"time":"1093018000","contents":[]}, + {"type":"file","name":"MISSING-FILES","size":17864,"time":"1066928263"}, + {"type":"file","name":"MISSING-FILES.README","size":4178,"time":"1060815936"}, + {"type":"file","name":"README","size":2925,"time":"1554408947"}, + {"type":"file","name":"before-2003-08-01.md5sums.asc","size":405121,"time":"1066928156"}, + {"type":"file","name":"find.txt.gz","size":261428,"time":"1557684608"}, + {"type":"directory","name":"gnu","size":12288,"time":"1556742017","contents":[]}, + {"type":"directory","name":"gnu+linux-distros","size":4096,"time":"1299783002","contents":[ + {"type":"directory","name":"ututo-e","size":4096,"time":"1487780066","contents":[ + {"type":"file","name":"README","size":48,"time":"1487780066"}, + {"type":"file","name":"index.html","size":158,"time":"1487780054"} + ]} + ]}, + {"type":"file","name":"ls-lrRt.txt.gz","size":480054,"time":"1557684607"}, + {"type":"directory","name":"mirrors","size":4096,"time":"1114010630","contents":[ + {"type":"directory","name":"dynebolic","size":4096,"time":"1317827602","contents":[ + {"type":"file","name":"MOVED_TO_mirror.fsf.org_dynebolic","size":0,"time":"1317826935"}, + {"type":"file","name":"README.txt","size":41,"time":"1317827081"}, + {"type":"file","name":"index.html","size":107,"time":"1317827601"} + ]} + ]}, + {"type":"link","name":"non-gnu","target":"gnu/non-gnu","size":11,"time":"1082055542","contents":[]}, + {"type":"directory","name":"old-gnu","size":4096,"time":"1548360019","contents":[]}, + {"type":"link","name":"pub","target":".","size":1,"time":"1060090003","contents":[]}, + {"type":"directory","name":"savannah","size":4096,"time":"1194544006","contents":[ + {"type":"file","name":"README","size":473,"time":"1143758028"} + ]}, + {"type":"directory","name":"third-party","size":4096,"time":"1059825710","contents":[ + {"type":"file","name":"README","size":374,"time":"983824071"} + ]}, + {"type":"directory","name":"tmp","size":4096,"time":"1239072509","contents":[ + ]}, + {"type":"file","name":"tree.json.gz","size":0,"time":"1557684608"}, + {"type":"directory","name":"video","size":4096,"time":"1367963189","contents":[ + {"type":"file","name":".bash_history","size":27,"time":"1307027604"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-360p.webm","size":138044199,"time":"1284892010"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-480p.webm","size":365119650,"time":"1284895035"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-720p.webm","size":820524785,"time":"1284903666"}, + {"type":"file","name":"Autobuild_Status_Update.ogv","size":95034503,"time":"1281781973"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg","size":23901186,"time":"1299776852"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg.sig","size":536,"time":"1299776852"}, + {"type":"file","name":"FSF_event.ogv","size":52056292,"time":"1251497852"}, + {"type":"file","name":"FSF_event_small.ogv","size":5237196,"time":"1251840561"}, + {"type":"file","name":"GNU-Hurd_-_Its_About_Freedom,_Or_Why_you_should_care.ogv","size":177589989,"time":"1281779528"}, + {"type":"file","name":"GNU_Bazaar_-_a_distributed_version_control_system_for_free_software_communities.ogv","size":122114984,"time":"1281783734"}, + {"type":"file","name":"GNU_Psycosynth.ogv","size":69931061,"time":"1281784291"}, + {"type":"file","name":"GNU_in_the_cloud.ogv","size":84068213,"time":"1281784003"}, + {"type":"file","name":"GNUnet_-_Transports_and_Transport_Selection.ogv","size":334973582,"time":"1281801792"}, + {"type":"file","name":"GNUnet_Distributed_Data_Storage_-_DHT_and_Distance_Vector_Transport.ogv","size":630257300,"time":"1281791152"}, + {"type":"file","name":"Introduction_to_LilyPond.ogv","size":135277165,"time":"1281787990"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-100kbit_vorbis.ogg","size":4481112,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-12kbit_speex.spx","size":511060,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit.ogv","size":34370182,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit_fr.ogv","size":35983054,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit.ogv","size":8126685,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit_fr.ogv","size":8978906,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit.ogv","size":22443268,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_en.ogv","size":23467445,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_fr.ogv","size":23649669,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-vlq_192px_56kbit.ogv","size":2530887,"time":"1220091690"}, + {"type":"file","name":"TIME","size":11,"time":"1557682561"}, + {"type":"file","name":"The_GCC_Compile_Farm.ogv","size":102712932,"time":"1281788256"}, + {"type":"file","name":"The_GNU_Record_Utilities.ogv","size":156667664,"time":"1281788873"}, + {"type":"file","name":"The_GNUnet_Peer-to-Peer_Framework.ogv","size":863516287,"time":"1281792840"}, + {"type":"file","name":"The_new_CPP_Standard_and_library_Cpp0x.ogv","size":406746577,"time":"1281790439"}, + {"type":"file","name":"dyne.org_hackers_network.ogv","size":154795708,"time":"1281785482"}, + {"type":"file","name":"fry720.jpg","size":141588,"time":"1219677812"}, + {"type":"file","name":"fsf-2009-hillaryrettig.ogv","size":17557909,"time":"1262115081"}, + {"type":"file","name":"fsf-2009-jeremyallison.ogv","size":20719197,"time":"1256665061"}, + {"type":"file","name":"fsf-2009-larrylessig.ogv","size":10571717,"time":"1261667503"}, + {"type":"file","name":"fsf-2009-maryloujepsen.ogv","size":4198432,"time":"1256222925"}, + {"type":"file","name":"fsf-2009-peterbrown-final.ogv","size":14283895,"time":"1259856669"}, + {"type":"file","name":"fsf-2009-robsavoye.ogv","size":10262195,"time":"1256596109"}, + {"type":"file","name":"fsf-2009-savoye.ogv","size":13594470,"time":"1256596105"}, + {"type":"file","name":"gNewSense.ogv","size":89891356,"time":"1281783196"}, + {"type":"file","name":"gnulib_-_Contributing_reusable_code.ogv","size":120341572,"time":"1281779839"}, + {"type":"file","name":"lp2010-eben-moglen.ogv","size":63138569,"time":"1280508649"}, + {"type":"file","name":"lp2010-fri-command-line.ogg","size":102351845,"time":"1280515516"}, + {"type":"file","name":"lp2010-fri-gimp-inkscape.ogg","size":73786977,"time":"1280515751"}, + {"type":"file","name":"lp2010-fri-inkscape-gimp.ogv","size":163618762,"time":"1279906253"}, + {"type":"file","name":"lp2010-fri-intro-to-command-line.ogv","size":166684617,"time":"1279913790"}, + {"type":"file","name":"lp2010-fri-intro.ogg","size":8830316,"time":"1280515539"}, + {"type":"file","name":"lp2010-fri-speaking-workshop.ogg","size":66985650,"time":"1280515967"}, + {"type":"file","name":"lp2010-fri-welcome.ogv","size":18634141,"time":"1279914475"}, + {"type":"file","name":"lp2010-fri-workshop-speaking.ogv","size":104913218,"time":"1279920003"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogg","size":36063678,"time":"1280516115"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogv","size":63138569,"time":"1280155656"}, + {"type":"file","name":"lp2010-sat-film-discussion.ogg","size":6212226,"time":"1280516133"}, + {"type":"file","name":"lp2010-sat-gilmore-fixed.ogv","size":315091609,"time":"1281103644"}, + {"type":"file","name":"lp2010-sat-gilmore.ogv","size":278528,"time":"1281106393"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogg","size":26624876,"time":"1280516218"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogv","size":54603867,"time":"1280158479"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogg","size":25279954,"time":"1280516298"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogv","size":34687879,"time":"1280160636"}, + {"type":"file","name":"lp2010-sat-intro.ogg","size":2257992,"time":"1280516304"}, + {"type":"file","name":"lp2010-sat-john-gilmore.ogg","size":39422949,"time":"1280516426"}, + {"type":"file","name":"lp2010-sat-karen-sandler.ogg","size":37913634,"time":"1280516542"}, + {"type":"file","name":"lp2010-sat-licensing-hoedown.ogv","size":81030741,"time":"1280163330"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogg","size":41024302,"time":"1280516669"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogv","size":138070626,"time":"1280167204"}, + {"type":"file","name":"lp2010-sat-sandler-sflc.ogv","size":48057099,"time":"1280170655"}, + {"type":"file","name":"lp2010-sat-sugar-labs.ogv","size":63820951,"time":"1280173681"}, + {"type":"file","name":"lp2010-sat-symbian.ogv","size":71807650,"time":"1280176185"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-1.ogg","size":51329142,"time":"1280517315"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-2.ogg","size":29574674,"time":"1280517409"}, + {"type":"file","name":"lp2010-sun-free-network-services.ogv","size":294504464,"time":"1280180824"}, + {"type":"file","name":"lp2010-sun-free-software-mentoring.ogg","size":11780923,"time":"1280517447"}, + {"type":"file","name":"lp2010-sun-fsf-campaigns-team-update.ogv","size":166676786,"time":"1280183913"}, + {"type":"file","name":"lp2010-sun-gnash.ogv","size":129681345,"time":"1280185455"}, + {"type":"file","name":"lp2010-sun-gnu-telephony.ogv","size":205191264,"time":"1280188317"}, + {"type":"file","name":"lp2010-sun-jeff-jaffe.ogv","size":98181482,"time":"1280189804"}, + {"type":"file","name":"lp2010-sun-libredwg.ogv","size":138981040,"time":"1280191724"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogg","size":47004371,"time":"1280517591"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogv","size":105266782,"time":"1280196010"}, + {"type":"file","name":"lp2010-sun-luis-villa.ogv","size":182437632,"time":"1280200006"}, + {"type":"file","name":"lp2010-sun-mentoring.ogv","size":47095807,"time":"1280201658"}, + {"type":"file","name":"lp2010-sun-non-coding-roles.ogv","size":109852516,"time":"1280206251"}, + {"type":"file","name":"lp2010-sun-noncoding-workshop.ogg","size":23507355,"time":"1280517664"}, + {"type":"file","name":"lp2010-sun-play-ogg.ogv","size":306319862,"time":"1280210339"}, + {"type":"file","name":"lp2010-sun-recruiting-retaining-women.ogv","size":31282146,"time":"1280212310"}, + {"type":"file","name":"lp2010-sun-recruiting-women.ogg","size":37264232,"time":"1280517788"}, + {"type":"file","name":"lp2010-sun-wrapup.ogv","size":109607244,"time":"1280215323"}, + {"type":"file","name":"mediagoblin_campaign_pitch-small.webm","size":11948337,"time":"1349289671"}, + {"type":"file","name":"mediagoblin_campaign_pitch.webm","size":27197918,"time":"1349289722"}, + {"type":"file","name":"rms-education-es.webm","size":64071545,"time":"1304351680"}, + {"type":"file","name":"rms-education.es.ogv","size":21726974,"time":"1304351770"}, + {"type":"file","name":"sf-large.ogv","size":140102085,"time":"1220111045"}, + {"type":"file","name":"stallmanupv.ogg","size":18683460,"time":"1299776853"}, + {"type":"file","name":"stallmanupv.ogg.sig","size":536,"time":"1299776853"} + ]}, + {"type":"file","name":"welcome.msg","size":2830,"time":"1545163301"} +]}, +{"type":"report","directories":2743,"files":63983} +] diff --git a/swh/lister/gnu/tests/conftest.py b/swh/lister/gnu/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/conftest.py @@ -0,0 +1 @@ +from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/gnu/tests/file_structure.json b/swh/lister/gnu/tests/file_structure.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/file_structure.json @@ -0,0 +1,130 @@ +[ + {"type":"directory","name":"artanis","size":4096,"time":"1546205705","contents":[ + {"type":"file","name":"artanis-0.2.1.tar.bz2","size":424081,"time":"1495205979"}, + {"type":"file","name":"artanis-0.2.1.tar.bz2.sig","size":833,"time":"1495205982"}, + {"type":"file","name":"artanis-0.2.1.tar.gz","size":506599,"time":"1495205967"}, + {"type":"file","name":"artanis-0.2.1.tar.gz.sig","size":833,"time":"1495205970"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2","size":421984,"time":"1494994239"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":833,"time":"1494994240"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz","size":504759,"time":"1494994222"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":833,"time":"1494994224"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2","size":439269,"time":"1520284021"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2.sig","size":833,"time":"1520284023"}, + {"type":"file","name":"artanis-0.2.3.tar.gz","size":526293,"time":"1520284007"}, + {"type":"file","name":"artanis-0.2.3.tar.gz.sig","size":833,"time":"1520284009"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2","size":426626,"time":"1521742071"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2.sig","size":833,"time":"1521742074"}, + {"type":"file","name":"artanis-0.2.4.tar.gz","size":508420,"time":"1521742057"}, + {"type":"file","name":"artanis-0.2.4.tar.gz.sig","size":833,"time":"1521742060"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2","size":440350,"time":"1525717261"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2.sig","size":833,"time":"1525717263"}, + {"type":"file","name":"artanis-0.2.5.tar.gz","size":518316,"time":"1525717246"}, + {"type":"file","name":"artanis-0.2.5.tar.gz.sig","size":833,"time":"1525717249"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2","size":448329,"time":"1546205569"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2.sig","size":833,"time":"1546205571"}, + {"type":"file","name":"artanis-0.3.1.tar.gz","size":535098,"time":"1546205555"}, + {"type":"file","name":"artanis-0.3.1.tar.gz.sig","size":833,"time":"1546205558"}, + {"type":"file","name":"artanis-0.3.tar.bz2","size":452609,"time":"1546205025"}, + {"type":"file","name":"artanis-0.3.tar.bz2.sig","size":833,"time":"1546205027"}, + {"type":"file","name":"artanis-0.3.tar.gz","size":550938,"time":"1546205012"}, + {"type":"file","name":"artanis-0.3.tar.gz.sig","size":833,"time":"1546205015"}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2","target":"artanis-0.2.12-f39e-dirty.tar.bz2","size":33,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2.sig","target":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":37,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz","target":"artanis-0.2.12-f39e-dirty.tar.gz","size":32,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz.sig","target":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":36,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2","target":"artanis-0.3.1.tar.bz2","size":21,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2.sig","target":"artanis-0.3.1.tar.bz2.sig","size":25,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz","target":"artanis-0.3.1.tar.gz","size":20,"time":"1546205703","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz.sig","target":"artanis-0.3.1.tar.gz.sig","size":24,"time":"1546205703","contents":[]} + ]}, + {"type":"directory","name":"xboard","size":4096,"time":"1254860068","contents":[ + {"type":"directory","name":"winboard","size":4096,"time":"1181795103","contents":[ + {"type":"file","name":"README","size":107,"time":"1070058107"}, + {"type":"file","name":"README.sig","size":65,"time":"1070058115"}, + {"type":"file","name":"winboard-4_0_0-src.zip","size":1514448,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_0.README","size":4152,"time":"898422960"}, + {"type":"file","name":"winboard-4_0_0.exe","size":1652037,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_2-src.zip","size":1482621,"time":"920018269"}, + {"type":"file","name":"winboard-4_0_2.README","size":3617,"time":"920018270"}, + {"type":"file","name":"winboard-4_0_2.exe","size":1716772,"time":"920018294"}, + {"type":"file","name":"winboard-4_0_3-src.zip","size":1499275,"time":"936750503"}, + {"type":"file","name":"winboard-4_0_3.README","size":2218,"time":"940361675"}, + {"type":"file","name":"winboard-4_0_3.exe","size":1725023,"time":"936750506"}, + {"type":"file","name":"winboard-4_0_4-src.tar.gz","size":1753506,"time":"944290190"}, + {"type":"file","name":"winboard-4_0_4.README","size":5393,"time":"944290195"}, + {"type":"file","name":"winboard-4_0_4.exe","size":2195155,"time":"944290204"}, + {"type":"file","name":"winboard-4_0_5-src.tar.gz","size":1752189,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.README","size":2348,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.exe","size":2195822,"time":"944600468"}, + {"type":"file","name":"winboard-4_0_6-src.tar.gz","size":1761396,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.README","size":1592,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.exe","size":2026273,"time":"952156230"}, + {"type":"file","name":"winboard-4_0_7-src.tar.gz","size":1764000,"time":"952313061"}, + {"type":"file","name":"winboard-4_0_7.README","size":1721,"time":"952313072"}, + {"type":"file","name":"winboard-4_0_7.exe","size":2202166,"time":"952313082"}, + {"type":"file","name":"winboard-4_1_0-src.tar.gz","size":1902251,"time":"969299378"}, + {"type":"file","name":"winboard-4_1_0.exe","size":2126431,"time":"969299361"}, + {"type":"file","name":"winboard-4_2_0beta-src.tar.gz","size":2000471,"time":"977027031"}, + {"type":"file","name":"winboard-4_2_0beta.README","size":3048,"time":"977033442"}, + {"type":"file","name":"winboard-4_2_0beta.exe","size":2292716,"time":"977027033"}, + {"type":"file","name":"winboard-4_2_1-src.tar.gz","size":2090945,"time":"981323331"}, + {"type":"file","name":"winboard-4_2_1.README","size":1336,"time":"981323332"}, + {"type":"file","name":"winboard-4_2_1.exe","size":2298010,"time":"981323012"}, + {"type":"file","name":"winboard-4_2_2-src.tar.gz","size":2025689,"time":"981570576"}, + {"type":"file","name":"winboard-4_2_2.README","size":2705,"time":"981562643"}, + {"type":"file","name":"winboard-4_2_2.exe","size":2298407,"time":"981570908"}, + {"type":"file","name":"winboard-4_2_3-src.tar.gz","size":2001746,"time":"982656672"}, + {"type":"file","name":"winboard-4_2_3.README","size":3014,"time":"982656842"}, + {"type":"file","name":"winboard-4_2_3.exe","size":2299250,"time":"982656841"}, + {"type":"file","name":"winboard-4_2_4-src.tar.gz","size":2388388,"time":"1007952574"}, + {"type":"file","name":"winboard-4_2_4.README","size":1863,"time":"1007952575"}, + {"type":"file","name":"winboard-4_2_4.exe","size":10020579,"time":"1007952203"}, + {"type":"file","name":"winboard-4_2_5-src.tar.gz","size":1962754,"time":"1008502483"}, + {"type":"file","name":"winboard-4_2_5.README","size":2069,"time":"1008466769"}, + {"type":"file","name":"winboard-4_2_5.exe","size":2489300,"time":"1008502215"}, + {"type":"file","name":"winboard-4_2_6-src.tar.gz","size":1982333,"time":"1012641285"}, + {"type":"file","name":"winboard-4_2_6.README","size":1765,"time":"1012640603"}, + {"type":"file","name":"winboard-4_2_6.exe","size":2490333,"time":"1012641027"}, + {"type":"file","name":"winboard-4_2_7-only.exe","size":1729532,"time":"1070149476"}, + {"type":"file","name":"winboard-4_2_7-only.exe.sig","size":65,"time":"1070149498"}, + {"type":"file","name":"winboard-4_2_7.exe","size":5629711,"time":"1070057661"}, + {"type":"file","name":"winboard-4_2_7.exe.sig","size":65,"time":"1070057687"}, + {"type":"file","name":"winboard-4_2_7a.exe","size":5629711,"time":"1070082423"}, + {"type":"file","name":"winboard-4_2_7a.exe.sig","size":65,"time":"1070082566"}, + {"type":"file","name":"winboard-4_2_7b.exe","size":6213290,"time":"1181794790"}, + {"type":"file","name":"winboard-4_2_7b.exe.sig","size":65,"time":"1181794954"} + ]}, + {"type":"file","name":"xboard-3.6.2.tar.gz","size":450164,"time":"869814000"}, + {"type":"file","name":"xboard-4.0.0.README","size":4152,"time":"920018090"}, + {"type":"file","name":"xboard-4.0.0.tar.gz","size":514951,"time":"898422900"}, + {"type":"file","name":"xboard-4.0.2.README","size":3617,"time":"920018199"}, + {"type":"file","name":"xboard-4.0.2.tar.gz","size":564856,"time":"920018202"}, + {"type":"file","name":"xboard-4.0.3.README","size":2218,"time":"936750507"}, + {"type":"file","name":"xboard-4.0.3.tar.gz","size":577351,"time":"936750512"}, + {"type":"file","name":"xboard-4.0.4.README","size":5393,"time":"944290145"}, + {"type":"file","name":"xboard-4.0.4.tar.gz","size":575421,"time":"944290148"}, + {"type":"file","name":"xboard-4.0.5.README","size":2348,"time":"944599460"}, + {"type":"file","name":"xboard-4.0.5.tar.gz","size":576300,"time":"944599461"}, + {"type":"file","name":"xboard-4.0.6.README","size":1592,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.6.tar.gz","size":579076,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.7.README","size":1721,"time":"952313082"}, + {"type":"file","name":"xboard-4.0.7.tar.gz","size":578350,"time":"952313085"}, + {"type":"file","name":"xboard-4.1.0.tar.gz","size":1069507,"time":"969299287"}, + {"type":"file","name":"xboard-4.2.0beta.README","size":3048,"time":"977027107"}, + {"type":"file","name":"xboard-4.2.0beta.tar.gz","size":1093901,"time":"977027108"}, + {"type":"file","name":"xboard-4.2.1.README","size":1336,"time":"981323502"}, + {"type":"file","name":"xboard-4.2.1.tar.gz","size":1097200,"time":"981323501"}, + {"type":"file","name":"xboard-4.2.2.README","size":2705,"time":"981562810"}, + {"type":"file","name":"xboard-4.2.2.tar.gz","size":1097682,"time":"981562809"}, + {"type":"file","name":"xboard-4.2.3.README","size":3014,"time":"982657007"}, + {"type":"file","name":"xboard-4.2.3.tar.gz","size":1100059,"time":"982657006"}, + {"type":"file","name":"xboard-4.2.4.README","size":1863,"time":"1007952746"}, + {"type":"file","name":"xboard-4.2.4.tar.gz","size":1034728,"time":"1007952745"}, + {"type":"file","name":"xboard-4.2.5.README","size":2069,"time":"1008466946"}, + {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"}, + {"type":"file","name":"xboard-4.2.6.README","size":1765,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"}, + {"type":"file","name":"xboard-4.2.7.tar.gz.sig","size":65,"time":"1070057702"} + ]} +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/find_tarballs_output.json b/swh/lister/gnu/tests/find_tarballs_output.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/find_tarballs_output.json @@ -0,0 +1,186 @@ +[ + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.bz2", + "date": "1495205979" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.gz", + "date": "1495205967" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.12-f39e-dirty.tar.bz2", + "date": "1494994239" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.12-f39e-dirty.tar.gz", + "date": "1494994222" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.bz2", + "date": "1520284021" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.gz", + "date": "1520284007" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.bz2", + "date": "1521742071" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.gz", + "date": "1521742057" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.bz2", + "date": "1525717261" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.gz", + "date": "1525717246" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.bz2", + "date": "1546205569" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.gz", + "date": "1546205555" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.bz2", + "date": "1546205025" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.gz", + "date": "1546205012" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_0-src.zip", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_2-src.zip", + "date": "920018269" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_3-src.zip", + "date": "936750503" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_4-src.tar.gz", + "date": "944290190" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_5-src.tar.gz", + "date": "944600462" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_6-src.tar.gz", + "date": "952156231" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_7-src.tar.gz", + "date": "952313061" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_1_0-src.tar.gz", + "date": "969299378" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_0beta-src.tar.gz", + "date": "977027031" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_1-src.tar.gz", + "date": "981323331" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_2-src.tar.gz", + "date": "981570576" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_3-src.tar.gz", + "date": "982656672" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_4-src.tar.gz", + "date": "1007952574" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_5-src.tar.gz", + "date": "1008502483" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_6-src.tar.gz", + "date": "1012641285" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-3.6.2.tar.gz", + "date": "869814000" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.0.tar.gz", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.2.tar.gz", + "date": "920018202" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.3.tar.gz", + "date": "936750512" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.4.tar.gz", + "date": "944290148" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.5.tar.gz", + "date": "944599461" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.6.tar.gz", + "date": "952156235" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.7.tar.gz", + "date": "952313085" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.1.0.tar.gz", + "date": "969299287" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.0beta.tar.gz", + "date": "977027108" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.1.tar.gz", + "date": "981323501" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.2.tar.gz", + "date": "981562809" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.3.tar.gz", + "date": "982657006" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.4.tar.gz", + "date": "1007952745" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.5.tar.gz", + "date": "1008466945" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.6.tar.gz", + "date": "1012641715" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.7.tar.gz", + "date": "1070057764" + } +] diff --git a/swh/lister/gnu/tests/test_lister.py b/swh/lister/gnu/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_lister.py @@ -0,0 +1,40 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json + +from swh.lister.gnu.lister import find_tarballs, filter_directories +from swh.lister.gnu.lister import file_extension_check + + +def test_filter_directories(): + f = open('swh/lister/gnu/tests/api_response.json') + api_response = json.load(f) + cleared_api_response = filter_directories(api_response) + for directory in cleared_api_response: + if directory['name'] not in ('gnu', 'old-gnu'): + assert False + + +def test_find_tarballs(): + f = open('swh/lister/gnu/tests/find_tarballs_output.json') + expected_list_of_all_tarballs = json.load(f) + + f = open('swh/lister/gnu/tests/file_structure.json') + file_structure = json.load(f) + list_of_all_tarballs = [] + list_of_all_tarballs.extend( + find_tarballs(file_structure[0]['contents'], + "https://ftp.gnu.org/gnu/artanis/")) + list_of_all_tarballs.extend( + find_tarballs(file_structure[1]['contents'], + "https://ftp.gnu.org/old-gnu/xboard/")) + assert list_of_all_tarballs == expected_list_of_all_tarballs + + +def test_file_extension_check(): + assert file_extension_check('abc.xy.zip') + assert file_extension_check('cvb.zip') + assert file_extension_check('abc.tar.bz2') + assert file_extension_check('abc') is False diff --git a/swh/lister/gnu/tests/test_tasks.py b/swh/lister/gnu/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_tasks.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.gnu.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.gnu.tasks.GNULister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked GNULister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.gnu.tasks.GNUListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with()