diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ - `swh.lister.pypi` - `swh.lister.npm` - `swh.lister.phabricator` +- `swh.lister.gnu` Dependencies ------------ @@ -177,6 +178,18 @@ incremental_phabricator_lister(forge_url='https://forge.softwareheritage.org', api_token='XXXX') ``` +## lister-gnu + +Once configured, you can execute a PyPI lister using the following instructions in a `python3` script: + +```lang=python +import logging +from swh.lister.gnu.tasks import gnu_lister + +logging.basicConfig(level=logging.DEBUG) +gnu_lister() +``` + Licensing --------- diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -54,6 +54,8 @@ entry_points=''' [console_scripts] swh-lister=swh.lister.cli:cli + [swh.cli.subcommands] + lister=swh.lister.cli:lister ''', classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,6 +17,7 @@ PATH_TEMPLATE = '/repositories?after=%s' MODEL = BitBucketModel LISTER_NAME = 'bitbucket' + instance = 'bitbucket' def get_model_from_repo(self, repo): return { diff --git a/swh/lister/cli.py b/swh/lister/cli.py --- a/swh/lister/cli.py +++ b/swh/lister/cli.py @@ -6,14 +6,23 @@ import logging import click +from swh.core.cli import CONTEXT_SETTINGS + logger = logging.getLogger(__name__) SUPPORTED_LISTERS = ['github', 'gitlab', 'bitbucket', 'debian', 'pypi', - 'npm', 'phabricator'] + 'npm', 'phabricator', 'gnu'] + +@click.group(name='lister', context_settings=CONTEXT_SETTINGS) +@click.pass_context +def lister(ctx): + '''Software Heritage Lister tools.''' + pass -@click.command() + +@lister.command(name='db-init', context_settings=CONTEXT_SETTINGS) @click.option( '--db-url', '-d', default='postgres:///lister-gitlab.com', help='SQLAlchemy DB URL; see ' @@ -22,8 +31,9 @@ type=click.Choice(SUPPORTED_LISTERS + ['all'])) @click.option('--drop-tables', '-D', is_flag=True, default=False, help='Drop tables before creating the database schema') -def cli(db_url, listers, drop_tables): - """Initialize db model according to lister. +@click.pass_context +def cli(ctx, db_url, listers, drop_tables): + """Initialize the database model for given listers. """ override_conf = { @@ -105,6 +115,11 @@ api_token='', override_config=override_conf) + elif lister == 'gnu': + from .gnu.models import ModelBase + from .gnu.lister import GNULister + _lister = GNULister(override_config=override_conf) + else: raise ValueError( 'Invalid lister %s: only supported listers are %s' % diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -239,12 +239,9 @@ @property def ADDITIONAL_CONFIG(self): # noqa: N802 return { - 'credentials': - ('list[dict]', []), - 'cache_responses': - ('bool', False), - 'cache_dir': - ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), + 'credentials': ('dict', {}), + 'cache_responses': ('bool', False), + 'cache_dir': ('str', '~/.cache/swh/lister/%s' % self.LISTER_NAME), } INITIAL_BACKOFF = 10 @@ -414,7 +411,7 @@ Returns: the same information in a different form """ - _type = 'origin-update-%s' % origin_type + _type = 'load-%s' % origin_type _policy = 'recurring' return utils.create_task_dict(_type, _policy, origin_url) diff --git a/swh/lister/core/lister_transports.py b/swh/lister/core/lister_transports.py --- a/swh/lister/core/lister_transports.py +++ b/swh/lister/core/lister_transports.py @@ -62,13 +62,40 @@ """Get the full parameters passed to requests given the transport_request identifier. + This uses credentials if any are provided. The 'credentials' + configuration is expected to be a dict of multiple levels. The first + level is the lister's name, the second is the lister's instance name. + + For example: + + credentials: + github: # github lister + github: # has only one instance (so far) + - username: some + password: somekey + - username: one + password: onekey + - ... + gitlab: # gitlab lister + riseup: # has many instances + - username: someone + password: ... + - ... + gitlab: + - username: someone + password: ... + - ... + + MAY BE OVERRIDDEN if something more complex than the request headers is needed. """ params = {} params['headers'] = self.request_headers() or {} - creds = self.config['credentials'] + all_creds = self.config['credentials'] + lister_creds = all_creds.get(self.LISTER_NAME, {}) + creds = lister_creds.get(self.instance, {}) auth = random.choice(creds) if creds else None if auth: params['auth'] = (auth['username'], auth['password']) diff --git a/swh/lister/core/tests/conftest.py b/swh/lister/core/tests/conftest.py --- a/swh/lister/core/tests/conftest.py +++ b/swh/lister/core/tests/conftest.py @@ -12,4 +12,5 @@ 'swh.lister.npm.tasks', 'swh.lister.pypi.tasks', 'swh.lister.phabricator.tasks', + 'swh.lister.gnu.tasks' ] diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -32,6 +32,7 @@ MODEL = Package PATH_TEMPLATE = None LISTER_NAME = 'debian' + instance = 'debian' def __init__(self, override_config=None): SWHListerHttpTransport.__init__(self, api_baseurl="bogus") diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py --- a/swh/lister/github/lister.py +++ b/swh/lister/github/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ MODEL = GitHubModel API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)') LISTER_NAME = 'github' + instance = 'github' # There is only 1 instance of such lister def get_model_from_repo(self, repo): return { diff --git a/swh/lister/gitlab/lister.py b/swh/lister/gitlab/lister.py --- a/swh/lister/gitlab/lister.py +++ b/swh/lister/gitlab/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -28,19 +28,6 @@ self.PATH_TEMPLATE = '%s&per_page=%s' % ( self.PATH_TEMPLATE, per_page) - @property - def ADDITIONAL_CONFIG(self): - """Override additional config as the 'credentials' structure change - between the ancestor classes and this class. - - cf. request_params method below - - """ - default_config = super().ADDITIONAL_CONFIG - # 'credentials' is a dict of (instance, {username, password}) dict - default_config['credentials'] = ('dict', {}) - return default_config - def request_params(self, identifier): """Get the full parameters passed to requests given the transport_request identifier. diff --git a/swh/lister/gnu/__init__.py b/swh/lister/gnu/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/lister.py @@ -0,0 +1,201 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import random +import gzip +import json +import requests + +from .models import GNUModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister + + +class GNULister(SimpleLister): + MODEL = GNUModel + LISTER_NAME = 'gnu' + TREE_URL = 'https://ftp.gnu.org/tree.json.gz' + BASE_URL = 'https://ftp.gnu.org' + + def task_dict(self, origin_type, origin_url, **kwargs): + """ + Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + """ + return utils.create_task_dict( + 'load-%s' % origin_type, 'recurring', kwargs.get('name'), + origin_url, list_of_tarballs=kwargs.get('list_of_tarballs')) + + def get_file(self): + ''' + Downloads and unzip tree.json.gz file and returns its content + in JSON format + + Returns + File content in JSON format + ''' + response = requests.get(self.TREE_URL, + allow_redirects=True) + uncompressed_content = gzip.decompress(response.content) + return json.loads(uncompressed_content.decode('utf-8')) + + def safely_issue_request(self, identifier): + ''' + Make network request with to download the file which + has file structure of the GNU website. + + Args: + identifier: resource identifier + Returns: + server response + ''' + return self.get_file() + + def list_packages(self, response): + """ + List the actual gnu origins with their names and + time last updated from the response. + + Args: + response : File structure of the website + in JSON format + + Returns: + a list of all the packages with their names, url of their root + directory and the tarballs present for the particular package. + [ + {'name': '3dldf', 'url': 'https://ftp.gnu.org/gnu/3dldf/', + 'list_of_tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'}, + {'archive': + 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'}} + ] + }, + {'name': '8sync', 'url': 'https://ftp.gnu.org/gnu/8sync/', + 'list_of_tarballs': + [ + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz', + 'date': '1461357336'}, + {'archive': + 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz', + 'date': '1480991830'} + ] + ] + """ + response = filter_directories(response) + packages = [] + for directory in response: + content = directory['contents'] + for repo in content: + if repo['type'] == 'directory': + package_url = '%s/%s/%s/' % (self.BASE_URL, + directory['name'], + repo['name']) + list_of_tarballs = find_tarballs( + repo['contents'], package_url) + if list_of_tarballs != []: + repo_details = { + 'name': repo['name'], + 'url': package_url, + 'list_of_tarballs ': list_of_tarballs + } + packages.append(repo_details) + random.shuffle(packages) + return packages + + def get_model_from_repo(self, repo): + """Transform from repository representation to model + + """ + return { + 'uid': repo['name'], + 'name': repo['name'], + 'full_name': repo['name'], + 'html_url': repo['url'], + 'origin_url': repo['url'], + 'time_last_upated': repo['time_modified'], + 'origin_type': 'gnu', + 'description': None, + } + + def transport_response_simplified(self, response): + """Transform response to list for model manipulation + + """ + return [self.get_model_from_repo(repo) for repo in response] + + def transport_request(self): + pass + + def transport_response_to_string(self): + pass + + def transport_quota_check(self): + pass + + +def find_tarballs(package_file_structure, url): + ''' + Recursively lists all the tarball present in the folder and + subfolders for a particular package url. + + Args + package_file_structure : File structure of the package root directory + url : URL of the corresponding package + + Returns + List of all the tarball urls and the last their time of update + example- + For a package called 3dldf + + [ + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz', + 'date': '1071002600'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz', + 'date': '1071078759'} + {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz', + 'date': '1074278633'} + ... + ] + ''' + list_of_tarballs = [] + for single_file in package_file_structure: + file_type = single_file['type'] + file_name = single_file['name'] + if file_type == 'file': + if(file_name[-6:-3] == "tar" or + file_name[-3:] == "zip"): + list_of_tarballs .append({ + "archive": url + file_name, + "date": single_file['time'] + }) + # It will recursively check for tarballs in all sub-folders + elif file_type == 'directory': + list_of_tarballs_in_dir = find_tarballs( + single_file['contents'], + url + file_name + '/') + list_of_tarballs .extend(list_of_tarballs_in_dir) + + return list_of_tarballs + + +def filter_directories(response): + ''' + Removes unnecessary directories from JSON response and + keep only gnu/ and old-gnu/ + ''' + final_response = [] + file_system = response[0]['contents'] + for directory in file_system: + if directory['name'] in ('gnu', 'old-gnu'): + final_response.append(directory) + return final_response diff --git a/swh/lister/gnu/models.py b/swh/lister/gnu/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/models.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String, Integer + +from ..core.models import ModelBase + + +class GNUModel(ModelBase): + """a GNU repository representation + + """ + __tablename__ = 'gnu_repo' + + uid = Column(String, primary_key=True) + time_last_upated = Column(Integer) diff --git a/swh/lister/gnu/tasks.py b/swh/lister/gnu/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import GNULister + + +@app.task(name=__name__ + '.GNUListerTask') +def gnu_lister(**lister_args): + GNULister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/gnu/tests/__init__.py b/swh/lister/gnu/tests/__init__.py new file mode 100644 diff --git a/swh/lister/gnu/tests/api_response.json b/swh/lister/gnu/tests/api_response.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/api_response.json @@ -0,0 +1,141 @@ +[{"type":"directory","name": ".","contents":[ + {"type":"file","name":".footer.shtml","size":444,"time":"1359994299"}, + {"type":"file","name":".gnu-gnu-gnu.png","size":12413,"time":"1314632619"}, + {"type":"file","name":".header.shtml","size":1833,"time":"1546469072"}, + {"type":"file","name":".header.shtml~","size":1364,"time":"1454341750"}, + {"type":"file","name":".htaccess","size":334,"time":"1314639683"}, + {"type":"file","name":".message","size":1125,"time":"1507930451"}, + {"type":"link","name":"CRYPTO.README","target":".message","size":8,"time":"1093018000","contents":[]}, + {"type":"file","name":"MISSING-FILES","size":17864,"time":"1066928263"}, + {"type":"file","name":"MISSING-FILES.README","size":4178,"time":"1060815936"}, + {"type":"file","name":"README","size":2925,"time":"1554408947"}, + {"type":"file","name":"before-2003-08-01.md5sums.asc","size":405121,"time":"1066928156"}, + {"type":"file","name":"find.txt.gz","size":261428,"time":"1557684608"}, + {"type":"directory","name":"gnu","size":12288,"time":"1556742017","contents":[]}, + {"type":"directory","name":"gnu+linux-distros","size":4096,"time":"1299783002","contents":[ + {"type":"directory","name":"ututo-e","size":4096,"time":"1487780066","contents":[ + {"type":"file","name":"README","size":48,"time":"1487780066"}, + {"type":"file","name":"index.html","size":158,"time":"1487780054"} + ]} + ]}, + {"type":"file","name":"ls-lrRt.txt.gz","size":480054,"time":"1557684607"}, + {"type":"directory","name":"mirrors","size":4096,"time":"1114010630","contents":[ + {"type":"directory","name":"dynebolic","size":4096,"time":"1317827602","contents":[ + {"type":"file","name":"MOVED_TO_mirror.fsf.org_dynebolic","size":0,"time":"1317826935"}, + {"type":"file","name":"README.txt","size":41,"time":"1317827081"}, + {"type":"file","name":"index.html","size":107,"time":"1317827601"} + ]} + ]}, + {"type":"link","name":"non-gnu","target":"gnu/non-gnu","size":11,"time":"1082055542","contents":[]}, + {"type":"directory","name":"old-gnu","size":4096,"time":"1548360019","contents":[]}, + {"type":"link","name":"pub","target":".","size":1,"time":"1060090003","contents":[]}, + {"type":"directory","name":"savannah","size":4096,"time":"1194544006","contents":[ + {"type":"file","name":"README","size":473,"time":"1143758028"} + ]}, + {"type":"directory","name":"third-party","size":4096,"time":"1059825710","contents":[ + {"type":"file","name":"README","size":374,"time":"983824071"} + ]}, + {"type":"directory","name":"tmp","size":4096,"time":"1239072509","contents":[ + ]}, + {"type":"file","name":"tree.json.gz","size":0,"time":"1557684608"}, + {"type":"directory","name":"video","size":4096,"time":"1367963189","contents":[ + {"type":"file","name":".bash_history","size":27,"time":"1307027604"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-360p.webm","size":138044199,"time":"1284892010"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-480p.webm","size":365119650,"time":"1284895035"}, + {"type":"file","name":"A_Digital_Media_Primer_For_Geeks-720p.webm","size":820524785,"time":"1284903666"}, + {"type":"file","name":"Autobuild_Status_Update.ogv","size":95034503,"time":"1281781973"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg","size":23901186,"time":"1299776852"}, + {"type":"file","name":"FOSDEM2006-GPL.ogg.sig","size":536,"time":"1299776852"}, + {"type":"file","name":"FSF_event.ogv","size":52056292,"time":"1251497852"}, + {"type":"file","name":"FSF_event_small.ogv","size":5237196,"time":"1251840561"}, + {"type":"file","name":"GNU-Hurd_-_Its_About_Freedom,_Or_Why_you_should_care.ogv","size":177589989,"time":"1281779528"}, + {"type":"file","name":"GNU_Bazaar_-_a_distributed_version_control_system_for_free_software_communities.ogv","size":122114984,"time":"1281783734"}, + {"type":"file","name":"GNU_Psycosynth.ogv","size":69931061,"time":"1281784291"}, + {"type":"file","name":"GNU_in_the_cloud.ogv","size":84068213,"time":"1281784003"}, + {"type":"file","name":"GNUnet_-_Transports_and_Transport_Selection.ogv","size":334973582,"time":"1281801792"}, + {"type":"file","name":"GNUnet_Distributed_Data_Storage_-_DHT_and_Distance_Vector_Transport.ogv","size":630257300,"time":"1281791152"}, + {"type":"file","name":"Introduction_to_LilyPond.ogv","size":135277165,"time":"1281787990"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-100kbit_vorbis.ogg","size":4481112,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-12kbit_speex.spx","size":511060,"time":"1220091688"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit.ogv","size":34370182,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-hq_600px_780kbit_fr.ogv","size":35983054,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit.ogv","size":8126685,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-lq_300px_190kbit_fr.ogv","size":8978906,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit.ogv","size":22443268,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_en.ogv","size":23467445,"time":"1220091689"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-nq_600px_425kbit_fr.ogv","size":23649669,"time":"1220091690"}, + {"type":"file","name":"Stephen_Fry-Happy_Birthday_GNU-vlq_192px_56kbit.ogv","size":2530887,"time":"1220091690"}, + {"type":"file","name":"TIME","size":11,"time":"1557682561"}, + {"type":"file","name":"The_GCC_Compile_Farm.ogv","size":102712932,"time":"1281788256"}, + {"type":"file","name":"The_GNU_Record_Utilities.ogv","size":156667664,"time":"1281788873"}, + {"type":"file","name":"The_GNUnet_Peer-to-Peer_Framework.ogv","size":863516287,"time":"1281792840"}, + {"type":"file","name":"The_new_CPP_Standard_and_library_Cpp0x.ogv","size":406746577,"time":"1281790439"}, + {"type":"file","name":"dyne.org_hackers_network.ogv","size":154795708,"time":"1281785482"}, + {"type":"file","name":"fry720.jpg","size":141588,"time":"1219677812"}, + {"type":"file","name":"fsf-2009-hillaryrettig.ogv","size":17557909,"time":"1262115081"}, + {"type":"file","name":"fsf-2009-jeremyallison.ogv","size":20719197,"time":"1256665061"}, + {"type":"file","name":"fsf-2009-larrylessig.ogv","size":10571717,"time":"1261667503"}, + {"type":"file","name":"fsf-2009-maryloujepsen.ogv","size":4198432,"time":"1256222925"}, + {"type":"file","name":"fsf-2009-peterbrown-final.ogv","size":14283895,"time":"1259856669"}, + {"type":"file","name":"fsf-2009-robsavoye.ogv","size":10262195,"time":"1256596109"}, + {"type":"file","name":"fsf-2009-savoye.ogv","size":13594470,"time":"1256596105"}, + {"type":"file","name":"gNewSense.ogv","size":89891356,"time":"1281783196"}, + {"type":"file","name":"gnulib_-_Contributing_reusable_code.ogv","size":120341572,"time":"1281779839"}, + {"type":"file","name":"lp2010-eben-moglen.ogv","size":63138569,"time":"1280508649"}, + {"type":"file","name":"lp2010-fri-command-line.ogg","size":102351845,"time":"1280515516"}, + {"type":"file","name":"lp2010-fri-gimp-inkscape.ogg","size":73786977,"time":"1280515751"}, + {"type":"file","name":"lp2010-fri-inkscape-gimp.ogv","size":163618762,"time":"1279906253"}, + {"type":"file","name":"lp2010-fri-intro-to-command-line.ogv","size":166684617,"time":"1279913790"}, + {"type":"file","name":"lp2010-fri-intro.ogg","size":8830316,"time":"1280515539"}, + {"type":"file","name":"lp2010-fri-speaking-workshop.ogg","size":66985650,"time":"1280515967"}, + {"type":"file","name":"lp2010-fri-welcome.ogv","size":18634141,"time":"1279914475"}, + {"type":"file","name":"lp2010-fri-workshop-speaking.ogv","size":104913218,"time":"1279920003"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogg","size":36063678,"time":"1280516115"}, + {"type":"file","name":"lp2010-sat-eben-moglen.ogv","size":63138569,"time":"1280155656"}, + {"type":"file","name":"lp2010-sat-film-discussion.ogg","size":6212226,"time":"1280516133"}, + {"type":"file","name":"lp2010-sat-gilmore-fixed.ogv","size":315091609,"time":"1281103644"}, + {"type":"file","name":"lp2010-sat-gilmore.ogv","size":278528,"time":"1281106393"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogg","size":26624876,"time":"1280516218"}, + {"type":"file","name":"lp2010-sat-gnome-shell.ogv","size":54603867,"time":"1280158479"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogg","size":25279954,"time":"1280516298"}, + {"type":"file","name":"lp2010-sat-gnu-generation.ogv","size":34687879,"time":"1280160636"}, + {"type":"file","name":"lp2010-sat-intro.ogg","size":2257992,"time":"1280516304"}, + {"type":"file","name":"lp2010-sat-john-gilmore.ogg","size":39422949,"time":"1280516426"}, + {"type":"file","name":"lp2010-sat-karen-sandler.ogg","size":37913634,"time":"1280516542"}, + {"type":"file","name":"lp2010-sat-licensing-hoedown.ogv","size":81030741,"time":"1280163330"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogg","size":41024302,"time":"1280516669"}, + {"type":"file","name":"lp2010-sat-rms+awards.ogv","size":138070626,"time":"1280167204"}, + {"type":"file","name":"lp2010-sat-sandler-sflc.ogv","size":48057099,"time":"1280170655"}, + {"type":"file","name":"lp2010-sat-sugar-labs.ogv","size":63820951,"time":"1280173681"}, + {"type":"file","name":"lp2010-sat-symbian.ogv","size":71807650,"time":"1280176185"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-1.ogg","size":51329142,"time":"1280517315"}, + {"type":"file","name":"lp2010-sun-deb-nicholson-2.ogg","size":29574674,"time":"1280517409"}, + {"type":"file","name":"lp2010-sun-free-network-services.ogv","size":294504464,"time":"1280180824"}, + {"type":"file","name":"lp2010-sun-free-software-mentoring.ogg","size":11780923,"time":"1280517447"}, + {"type":"file","name":"lp2010-sun-fsf-campaigns-team-update.ogv","size":166676786,"time":"1280183913"}, + {"type":"file","name":"lp2010-sun-gnash.ogv","size":129681345,"time":"1280185455"}, + {"type":"file","name":"lp2010-sun-gnu-telephony.ogv","size":205191264,"time":"1280188317"}, + {"type":"file","name":"lp2010-sun-jeff-jaffe.ogv","size":98181482,"time":"1280189804"}, + {"type":"file","name":"lp2010-sun-libredwg.ogv","size":138981040,"time":"1280191724"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogg","size":47004371,"time":"1280517591"}, + {"type":"file","name":"lp2010-sun-lightning-talks.ogv","size":105266782,"time":"1280196010"}, + {"type":"file","name":"lp2010-sun-luis-villa.ogv","size":182437632,"time":"1280200006"}, + {"type":"file","name":"lp2010-sun-mentoring.ogv","size":47095807,"time":"1280201658"}, + {"type":"file","name":"lp2010-sun-non-coding-roles.ogv","size":109852516,"time":"1280206251"}, + {"type":"file","name":"lp2010-sun-noncoding-workshop.ogg","size":23507355,"time":"1280517664"}, + {"type":"file","name":"lp2010-sun-play-ogg.ogv","size":306319862,"time":"1280210339"}, + {"type":"file","name":"lp2010-sun-recruiting-retaining-women.ogv","size":31282146,"time":"1280212310"}, + {"type":"file","name":"lp2010-sun-recruiting-women.ogg","size":37264232,"time":"1280517788"}, + {"type":"file","name":"lp2010-sun-wrapup.ogv","size":109607244,"time":"1280215323"}, + {"type":"file","name":"mediagoblin_campaign_pitch-small.webm","size":11948337,"time":"1349289671"}, + {"type":"file","name":"mediagoblin_campaign_pitch.webm","size":27197918,"time":"1349289722"}, + {"type":"file","name":"rms-education-es.webm","size":64071545,"time":"1304351680"}, + {"type":"file","name":"rms-education.es.ogv","size":21726974,"time":"1304351770"}, + {"type":"file","name":"sf-large.ogv","size":140102085,"time":"1220111045"}, + {"type":"file","name":"stallmanupv.ogg","size":18683460,"time":"1299776853"}, + {"type":"file","name":"stallmanupv.ogg.sig","size":536,"time":"1299776853"} + ]}, + {"type":"file","name":"welcome.msg","size":2830,"time":"1545163301"} +]}, +{"type":"report","directories":2743,"files":63983} +] diff --git a/swh/lister/gnu/tests/conftest.py b/swh/lister/gnu/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/conftest.py @@ -0,0 +1 @@ +from swh.lister.core.tests.conftest import * # noqa diff --git a/swh/lister/gnu/tests/file_structure.json b/swh/lister/gnu/tests/file_structure.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/file_structure.json @@ -0,0 +1,130 @@ +[ + {"type":"directory","name":"artanis","size":4096,"time":"1546205705","contents":[ + {"type":"file","name":"artanis-0.2.1.tar.bz2","size":424081,"time":"1495205979"}, + {"type":"file","name":"artanis-0.2.1.tar.bz2.sig","size":833,"time":"1495205982"}, + {"type":"file","name":"artanis-0.2.1.tar.gz","size":506599,"time":"1495205967"}, + {"type":"file","name":"artanis-0.2.1.tar.gz.sig","size":833,"time":"1495205970"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2","size":421984,"time":"1494994239"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":833,"time":"1494994240"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz","size":504759,"time":"1494994222"}, + {"type":"file","name":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":833,"time":"1494994224"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2","size":439269,"time":"1520284021"}, + {"type":"file","name":"artanis-0.2.3.tar.bz2.sig","size":833,"time":"1520284023"}, + {"type":"file","name":"artanis-0.2.3.tar.gz","size":526293,"time":"1520284007"}, + {"type":"file","name":"artanis-0.2.3.tar.gz.sig","size":833,"time":"1520284009"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2","size":426626,"time":"1521742071"}, + {"type":"file","name":"artanis-0.2.4.tar.bz2.sig","size":833,"time":"1521742074"}, + {"type":"file","name":"artanis-0.2.4.tar.gz","size":508420,"time":"1521742057"}, + {"type":"file","name":"artanis-0.2.4.tar.gz.sig","size":833,"time":"1521742060"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2","size":440350,"time":"1525717261"}, + {"type":"file","name":"artanis-0.2.5.tar.bz2.sig","size":833,"time":"1525717263"}, + {"type":"file","name":"artanis-0.2.5.tar.gz","size":518316,"time":"1525717246"}, + {"type":"file","name":"artanis-0.2.5.tar.gz.sig","size":833,"time":"1525717249"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2","size":448329,"time":"1546205569"}, + {"type":"file","name":"artanis-0.3.1.tar.bz2.sig","size":833,"time":"1546205571"}, + {"type":"file","name":"artanis-0.3.1.tar.gz","size":535098,"time":"1546205555"}, + {"type":"file","name":"artanis-0.3.1.tar.gz.sig","size":833,"time":"1546205558"}, + {"type":"file","name":"artanis-0.3.tar.bz2","size":452609,"time":"1546205025"}, + {"type":"file","name":"artanis-0.3.tar.bz2.sig","size":833,"time":"1546205027"}, + {"type":"file","name":"artanis-0.3.tar.gz","size":550938,"time":"1546205012"}, + {"type":"file","name":"artanis-0.3.tar.gz.sig","size":833,"time":"1546205015"}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2","target":"artanis-0.2.12-f39e-dirty.tar.bz2","size":33,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.bz2.sig","target":"artanis-0.2.12-f39e-dirty.tar.bz2.sig","size":37,"time":"1494994512","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz","target":"artanis-0.2.12-f39e-dirty.tar.gz","size":32,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.12-f39e-dirty.tar.gz.sig","target":"artanis-0.2.12-f39e-dirty.tar.gz.sig","size":36,"time":"1494994519","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2","target":"artanis-0.3.1.tar.bz2","size":21,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.bz2.sig","target":"artanis-0.3.1.tar.bz2.sig","size":25,"time":"1546205705","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz","target":"artanis-0.3.1.tar.gz","size":20,"time":"1546205703","contents":[]}, + {"type":"link","name":"artanis-latest.tar.gz.sig","target":"artanis-0.3.1.tar.gz.sig","size":24,"time":"1546205703","contents":[]} + ]}, + {"type":"directory","name":"xboard","size":4096,"time":"1254860068","contents":[ + {"type":"directory","name":"winboard","size":4096,"time":"1181795103","contents":[ + {"type":"file","name":"README","size":107,"time":"1070058107"}, + {"type":"file","name":"README.sig","size":65,"time":"1070058115"}, + {"type":"file","name":"winboard-4_0_0-src.zip","size":1514448,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_0.README","size":4152,"time":"898422960"}, + {"type":"file","name":"winboard-4_0_0.exe","size":1652037,"time":"898422900"}, + {"type":"file","name":"winboard-4_0_2-src.zip","size":1482621,"time":"920018269"}, + {"type":"file","name":"winboard-4_0_2.README","size":3617,"time":"920018270"}, + {"type":"file","name":"winboard-4_0_2.exe","size":1716772,"time":"920018294"}, + {"type":"file","name":"winboard-4_0_3-src.zip","size":1499275,"time":"936750503"}, + {"type":"file","name":"winboard-4_0_3.README","size":2218,"time":"940361675"}, + {"type":"file","name":"winboard-4_0_3.exe","size":1725023,"time":"936750506"}, + {"type":"file","name":"winboard-4_0_4-src.tar.gz","size":1753506,"time":"944290190"}, + {"type":"file","name":"winboard-4_0_4.README","size":5393,"time":"944290195"}, + {"type":"file","name":"winboard-4_0_4.exe","size":2195155,"time":"944290204"}, + {"type":"file","name":"winboard-4_0_5-src.tar.gz","size":1752189,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.README","size":2348,"time":"944600462"}, + {"type":"file","name":"winboard-4_0_5.exe","size":2195822,"time":"944600468"}, + {"type":"file","name":"winboard-4_0_6-src.tar.gz","size":1761396,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.README","size":1592,"time":"952156231"}, + {"type":"file","name":"winboard-4_0_6.exe","size":2026273,"time":"952156230"}, + {"type":"file","name":"winboard-4_0_7-src.tar.gz","size":1764000,"time":"952313061"}, + {"type":"file","name":"winboard-4_0_7.README","size":1721,"time":"952313072"}, + {"type":"file","name":"winboard-4_0_7.exe","size":2202166,"time":"952313082"}, + {"type":"file","name":"winboard-4_1_0-src.tar.gz","size":1902251,"time":"969299378"}, + {"type":"file","name":"winboard-4_1_0.exe","size":2126431,"time":"969299361"}, + {"type":"file","name":"winboard-4_2_0beta-src.tar.gz","size":2000471,"time":"977027031"}, + {"type":"file","name":"winboard-4_2_0beta.README","size":3048,"time":"977033442"}, + {"type":"file","name":"winboard-4_2_0beta.exe","size":2292716,"time":"977027033"}, + {"type":"file","name":"winboard-4_2_1-src.tar.gz","size":2090945,"time":"981323331"}, + {"type":"file","name":"winboard-4_2_1.README","size":1336,"time":"981323332"}, + {"type":"file","name":"winboard-4_2_1.exe","size":2298010,"time":"981323012"}, + {"type":"file","name":"winboard-4_2_2-src.tar.gz","size":2025689,"time":"981570576"}, + {"type":"file","name":"winboard-4_2_2.README","size":2705,"time":"981562643"}, + {"type":"file","name":"winboard-4_2_2.exe","size":2298407,"time":"981570908"}, + {"type":"file","name":"winboard-4_2_3-src.tar.gz","size":2001746,"time":"982656672"}, + {"type":"file","name":"winboard-4_2_3.README","size":3014,"time":"982656842"}, + {"type":"file","name":"winboard-4_2_3.exe","size":2299250,"time":"982656841"}, + {"type":"file","name":"winboard-4_2_4-src.tar.gz","size":2388388,"time":"1007952574"}, + {"type":"file","name":"winboard-4_2_4.README","size":1863,"time":"1007952575"}, + {"type":"file","name":"winboard-4_2_4.exe","size":10020579,"time":"1007952203"}, + {"type":"file","name":"winboard-4_2_5-src.tar.gz","size":1962754,"time":"1008502483"}, + {"type":"file","name":"winboard-4_2_5.README","size":2069,"time":"1008466769"}, + {"type":"file","name":"winboard-4_2_5.exe","size":2489300,"time":"1008502215"}, + {"type":"file","name":"winboard-4_2_6-src.tar.gz","size":1982333,"time":"1012641285"}, + {"type":"file","name":"winboard-4_2_6.README","size":1765,"time":"1012640603"}, + {"type":"file","name":"winboard-4_2_6.exe","size":2490333,"time":"1012641027"}, + {"type":"file","name":"winboard-4_2_7-only.exe","size":1729532,"time":"1070149476"}, + {"type":"file","name":"winboard-4_2_7-only.exe.sig","size":65,"time":"1070149498"}, + {"type":"file","name":"winboard-4_2_7.exe","size":5629711,"time":"1070057661"}, + {"type":"file","name":"winboard-4_2_7.exe.sig","size":65,"time":"1070057687"}, + {"type":"file","name":"winboard-4_2_7a.exe","size":5629711,"time":"1070082423"}, + {"type":"file","name":"winboard-4_2_7a.exe.sig","size":65,"time":"1070082566"}, + {"type":"file","name":"winboard-4_2_7b.exe","size":6213290,"time":"1181794790"}, + {"type":"file","name":"winboard-4_2_7b.exe.sig","size":65,"time":"1181794954"} + ]}, + {"type":"file","name":"xboard-3.6.2.tar.gz","size":450164,"time":"869814000"}, + {"type":"file","name":"xboard-4.0.0.README","size":4152,"time":"920018090"}, + {"type":"file","name":"xboard-4.0.0.tar.gz","size":514951,"time":"898422900"}, + {"type":"file","name":"xboard-4.0.2.README","size":3617,"time":"920018199"}, + {"type":"file","name":"xboard-4.0.2.tar.gz","size":564856,"time":"920018202"}, + {"type":"file","name":"xboard-4.0.3.README","size":2218,"time":"936750507"}, + {"type":"file","name":"xboard-4.0.3.tar.gz","size":577351,"time":"936750512"}, + {"type":"file","name":"xboard-4.0.4.README","size":5393,"time":"944290145"}, + {"type":"file","name":"xboard-4.0.4.tar.gz","size":575421,"time":"944290148"}, + {"type":"file","name":"xboard-4.0.5.README","size":2348,"time":"944599460"}, + {"type":"file","name":"xboard-4.0.5.tar.gz","size":576300,"time":"944599461"}, + {"type":"file","name":"xboard-4.0.6.README","size":1592,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.6.tar.gz","size":579076,"time":"952156235"}, + {"type":"file","name":"xboard-4.0.7.README","size":1721,"time":"952313082"}, + {"type":"file","name":"xboard-4.0.7.tar.gz","size":578350,"time":"952313085"}, + {"type":"file","name":"xboard-4.1.0.tar.gz","size":1069507,"time":"969299287"}, + {"type":"file","name":"xboard-4.2.0beta.README","size":3048,"time":"977027107"}, + {"type":"file","name":"xboard-4.2.0beta.tar.gz","size":1093901,"time":"977027108"}, + {"type":"file","name":"xboard-4.2.1.README","size":1336,"time":"981323502"}, + {"type":"file","name":"xboard-4.2.1.tar.gz","size":1097200,"time":"981323501"}, + {"type":"file","name":"xboard-4.2.2.README","size":2705,"time":"981562810"}, + {"type":"file","name":"xboard-4.2.2.tar.gz","size":1097682,"time":"981562809"}, + {"type":"file","name":"xboard-4.2.3.README","size":3014,"time":"982657007"}, + {"type":"file","name":"xboard-4.2.3.tar.gz","size":1100059,"time":"982657006"}, + {"type":"file","name":"xboard-4.2.4.README","size":1863,"time":"1007952746"}, + {"type":"file","name":"xboard-4.2.4.tar.gz","size":1034728,"time":"1007952745"}, + {"type":"file","name":"xboard-4.2.5.README","size":2069,"time":"1008466946"}, + {"type":"file","name":"xboard-4.2.5.tar.gz","size":1055502,"time":"1008466945"}, + {"type":"file","name":"xboard-4.2.6.README","size":1765,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.6.tar.gz","size":1057625,"time":"1012641715"}, + {"type":"file","name":"xboard-4.2.7.tar.gz","size":1318110,"time":"1070057764"}, + {"type":"file","name":"xboard-4.2.7.tar.gz.sig","size":65,"time":"1070057702"} + ]} +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/find_tarballs_output.json b/swh/lister/gnu/tests/find_tarballs_output.json new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/find_tarballs_output.json @@ -0,0 +1,158 @@ +[ + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.1.tar.gz", + "date": "1495205967" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.12-f39e-dirty.tar.gz", + "date": "1494994222" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.3.tar.gz", + "date": "1520284007" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.4.tar.gz", + "date": "1521742057" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.2.5.tar.gz", + "date": "1525717246" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.1.tar.gz", + "date": "1546205555" + }, + { + "archive": "https://ftp.gnu.org/gnu/artanis/artanis-0.3.tar.gz", + "date": "1546205012" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_0-src.zip", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_2-src.zip", + "date": "920018269" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_3-src.zip", + "date": "936750503" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_4-src.tar.gz", + "date": "944290190" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_5-src.tar.gz", + "date": "944600462" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_6-src.tar.gz", + "date": "952156231" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_0_7-src.tar.gz", + "date": "952313061" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_1_0-src.tar.gz", + "date": "969299378" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_0beta-src.tar.gz", + "date": "977027031" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_1-src.tar.gz", + "date": "981323331" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_2-src.tar.gz", + "date": "981570576" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_3-src.tar.gz", + "date": "982656672" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_4-src.tar.gz", + "date": "1007952574" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_5-src.tar.gz", + "date": "1008502483" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/winboard/winboard-4_2_6-src.tar.gz", + "date": "1012641285" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-3.6.2.tar.gz", + "date": "869814000" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.0.tar.gz", + "date": "898422900" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.2.tar.gz", + "date": "920018202" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.3.tar.gz", + "date": "936750512" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.4.tar.gz", + "date": "944290148" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.5.tar.gz", + "date": "944599461" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.6.tar.gz", + "date": "952156235" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.0.7.tar.gz", + "date": "952313085" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.1.0.tar.gz", + "date": "969299287" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.0beta.tar.gz", + "date": "977027108" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.1.tar.gz", + "date": "981323501" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.2.tar.gz", + "date": "981562809" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.3.tar.gz", + "date": "982657006" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.4.tar.gz", + "date": "1007952745" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.5.tar.gz", + "date": "1008466945" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.6.tar.gz", + "date": "1012641715" + }, + { + "archive": "https://ftp.gnu.org/old-gnu/xboard/xboard-4.2.7.tar.gz", + "date": "1070057764" + } +] \ No newline at end of file diff --git a/swh/lister/gnu/tests/test_lister.py b/swh/lister/gnu/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_lister.py @@ -0,0 +1,33 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json + +from swh.lister.gnu.lister import find_tarballs, filter_directories + + +def test_filter_directories(): + f = open('swh/lister/gnu/tests/api_response.json') + api_response = json.load(f) + cleared_api_response = filter_directories(api_response) + for directory in cleared_api_response: + if directory['name'] not in ('gnu', 'old-gnu'): + assert False + + +def test_find_tarballs(): + f = open('swh/lister/gnu/tests/find_tarballs_output.json') + expected_list_of_all_tarballs = json.load(f) + + f = open('swh/lister/gnu/tests/file_structure.json') + file_structure = json.load(f) + list_of_all_tarballs = [] + list_of_all_tarballs.extend( + find_tarballs(file_structure[0]['contents'], + "https://ftp.gnu.org/gnu/artanis/")) + list_of_all_tarballs.extend( + find_tarballs(file_structure[1]['contents'], + "https://ftp.gnu.org/old-gnu/xboard/")) + + assert list_of_all_tarballs == expected_list_of_all_tarballs diff --git a/swh/lister/gnu/tests/test_tasks.py b/swh/lister/gnu/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/gnu/tests/test_tasks.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.gnu.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.gnu.tasks.GNULister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked GNULister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.gnu.tasks.GNUListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with() diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,7 @@ """ MODEL = NpmModel LISTER_NAME = 'npm' + instance = 'npm' def __init__(self, api_baseurl='https://replicate.npmjs.com', per_page=1000, override_config=None): diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -2,6 +2,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +import urllib.parse from swh.lister.core.indexing_lister import SWHIndexingHttpLister from swh.lister.phabricator.models import PhabricatorModel @@ -14,13 +15,17 @@ MODEL = PhabricatorModel LISTER_NAME = 'phabricator' - def __init__(self, forge_url, api_token, override_config=None): + def __init__(self, forge_url, api_token, instance=None, + override_config=None): if forge_url.endswith("/"): forge_url = forge_url[:-1] self.forge_url = forge_url api_endpoint = ('api/diffusion.repository.' 'search?api.token=%s') % api_token api_baseurl = '%s/%s' % (forge_url, api_endpoint) + if not instance: + instance = urllib.parse.urlparse(forge_url).hostname + self.instance = instance super().__init__(api_baseurl=api_baseurl, override_config=override_config) diff --git a/swh/lister/phabricator/tasks.py b/swh/lister/phabricator/tasks.py --- a/swh/lister/phabricator/tasks.py +++ b/swh/lister/phabricator/tasks.py @@ -6,9 +6,10 @@ from swh.lister.phabricator.lister import PhabricatorLister -def new_lister( - forge_url='https://forge.softwareheritage.org', api_token='', **kw): - return PhabricatorLister(forge_url=forge_url, api_token=api_token, **kw) +def new_lister(forge_url='https://forge.softwareheritage.org', api_token='', + instance='swh', **kw): + return PhabricatorLister(forge_url=forge_url, api_token=api_token, + instance=instance, **kw) @app.task(name=__name__ + '.IncrementalPhabricatorLister') diff --git a/swh/lister/phabricator/tests/test_lister.py b/swh/lister/phabricator/tests/test_lister.py --- a/swh/lister/phabricator/tests/test_lister.py +++ b/swh/lister/phabricator/tests/test_lister.py @@ -24,9 +24,11 @@ def get_fl(self, override_config=None): """(Override) Retrieve an instance of fake lister (fl). + """ if override_config or self.fl is None: - self.fl = self.Lister(forge_url='https://fakeurl', api_token='a-1', + self.fl = self.Lister(forge_url='https://fakeurl', instance='fake', + api_token='a-1', override_config=override_config) self.fl.INITIAL_BACKOFF = 1 diff --git a/swh/lister/phabricator/tests/test_tasks.py b/swh/lister/phabricator/tests/test_tasks.py --- a/swh/lister/phabricator/tests/test_tasks.py +++ b/swh/lister/phabricator/tests/test_tasks.py @@ -24,6 +24,7 @@ assert res.successful() lister.assert_called_once_with( - api_token='', forge_url='https://forge.softwareheritage.org') + api_token='', forge_url='https://forge.softwareheritage.org', + instance='swh') lister.db_last_index.assert_called_once_with() lister.run.assert_called_once_with(min_bound=42) diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py --- a/swh/lister/pypi/lister.py +++ b/swh/lister/pypi/lister.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 the Software Heritage developers +# Copyright (C) 2018-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,6 +16,7 @@ MODEL = PyPIModel LISTER_NAME = 'pypi' PAGE = 'https://pypi.org/simple/' + instance = 'pypi' # As of today only the main pypi.org is used def __init__(self, override_config=None): ListerOnePageApiTransport .__init__(self) @@ -28,7 +29,7 @@ needed for the ingestion task creation. """ - _type = 'origin-update-%s' % origin_type + _type = 'load-%s' % origin_type _policy = 'recurring' project_name = kwargs.get('name') project_metadata_url = kwargs.get('html_url') diff --git a/tox.ini b/tox.ini --- a/tox.ini +++ b/tox.ini @@ -3,6 +3,7 @@ [testenv:py3] deps = + swh.core[http] >= 0.0.61 .[testing] pytest-cov commands =