diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ lister.packagist=swh.lister.packagist:register lister.phabricator=swh.lister.phabricator:register lister.pypi=swh.lister.pypi:register + lister.json=swh.lister.json:register ''', classifiers=[ "Programming Language :: Python :: 3", diff --git a/swh/lister/json/__init__.py b/swh/lister/json/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/__init__.py @@ -0,0 +1,13 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def register(): + from .models import JSONModel + from .lister import JSONLister + + return {'models': [JSONModel], + 'lister': JSONLister, + 'task_modules': ['%s.tasks' % __name__], + } diff --git a/swh/lister/json/lister.py b/swh/lister/json/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/lister.py @@ -0,0 +1,65 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import requests + +from .models import JSONModel + +from swh.scheduler import utils +from swh.lister.core.simple_lister import SimpleLister + + +class JSONLister(SimpleLister): + MODEL = JSONModel + LISTER_NAME = 'json' + instance = 'json' + + def __init__(self, override_config=None): + self.url = "https://sources.nixos.org/sources.json" + super().__init__(override_config=override_config) + + def task_dict(self, origin_type, origin_url, **kwargs): + """ + Return task format dict + + This is overridden from the lister_base as more information is + needed for the ingestion task creation. + """ + return utils.create_task_dict( + 'load-tar', kwargs.get('policy', 'oneshot'), + kwargs.get('name'), + origin_url, tarballs=[{'archive': origin_url, 'date': 0}]) + + def safely_issue_request(self, identifier): + ''' + Make network request to download the JSON file. + + Args: + identifier: resource identifier (unused) + Returns: + Server response + ''' + response = requests.get(self.url, + allow_redirects=True) + # TODO: support gzip content as well + return json.loads(response.content.decode('utf-8')) + + def list_packages(self, response): + """List packages from the response + """ + return [r for r in response["sources"] if r["type"] == "url"] + + def get_model_from_repo(self, source): + """Transform from source representation to model + """ + return { + # We could use the content hash if it is provided + 'uid': source['url'], + 'name': source['url'], + 'full_name': source['url'], + 'html_url': source['url'], + 'origin_url': source['url'], + 'origin_type': 'tar', + } diff --git a/swh/lister/json/models.py b/swh/lister/json/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/models.py @@ -0,0 +1,16 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from sqlalchemy import Column, String, Integer + +from ..core.models import ModelBase + + +class JSONModel(ModelBase): + """a JSON packages list representation + + """ + __tablename__ = 'json' + + uid = Column(String, primary_key=True) diff --git a/swh/lister/json/tasks.py b/swh/lister/json/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/tasks.py @@ -0,0 +1,17 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scheduler.celery_backend.config import app + +from .lister import JSONLister + + +@app.task(name=__name__ + '.JSONListerTask') +def json_lister(**lister_args): + JSONLister(**lister_args).run() + + +@app.task(name=__name__ + '.ping') +def ping(): + return 'OK' diff --git a/swh/lister/json/tests/__init__.py b/swh/lister/json/tests/__init__.py new file mode 100644 diff --git a/swh/lister/json/tests/conftest.py b/swh/lister/json/tests/conftest.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/tests/conftest.py @@ -0,0 +1,19 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from swh.lister.core.tests.conftest import * # noqa + +@pytest.fixture +def lister_json(swh_listers): + lister = swh_listers['json'] + + # Amend the credentials + lister.config = { + 'url': "https://nixos.org/sources.json" + } + + return lister diff --git a/swh/lister/json/tests/data/sources.nixos.org/sources.json b/swh/lister/json/tests/data/sources.nixos.org/sources.json new file mode 100644 --- /dev/null +++ b/swh/lister/json/tests/data/sources.nixos.org/sources.json @@ -0,0 +1,309 @@ +{ + "sources": [ + { + "type": "url", + "url": "https://ftpmirror.gnu.org//hello/hello-2.10.tar.gz" + }, + { + "type": "url", + "url": "https://github.com/curl/curl/commit/5fc28510a4664f4.patch" + }, + { + "type": "url", + "url": "https://curl.haxx.se/download/curl-7.64.0.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/gzip/gzip-1.10.tar.xz" + }, + { + "type": "url", + "url": "http://ftp.uni-kl.de/pub/linux/suse/people/sbrabec/bzip2/tarballs/bzip2-1.0.6.0.1.tar.gz" + }, + { + "type": "url", + "url": "https://tukaani.org/xz/xz-5.2.4.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-001" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-002" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-003" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-004" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-005" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-006" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-007" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-008" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-009" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-010" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-011" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-012" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-013" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-014" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-015" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-016" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-017" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-018" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-019" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-020" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-021" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-022" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4-patches/bash44-023" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bash/bash-4.4.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/binutils/binutils-2.31.1.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/coreutils/coreutils-8.30.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/diffutils/diffutils-3.7.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/findutils/findutils-4.6.0.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/gawk/gawk-4.2.1.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/make/make-4.2.1.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/sed/sed-4.7.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/tar/tar-1.31.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/grep/grep-3.3.tar.xz" + }, + { + "type": "url", + "url": "https://git.savannah.gnu.org/cgit/patch.git/patch/?id=b5a91a01e5d0897facdd0f49d64b76b0f02b43e1" + }, + { + "type": "url", + "url": "https://git.savannah.gnu.org/cgit/patch.git/patch/?id=123eaff0d5d1aebe128295959435b9ca5909c26d" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/patch/patch-2.7.6.tar.xz" + }, + { + "type": "url", + "url": "https://nixos.org/releases/patchelf/patchelf-0.9/patchelf-0.9.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/ed/ed-1.15.tar.lz" + }, + { + "type": "url", + "url": "https://git.savannah.nongnu.org/cgit/attr.git/patch/?id=14adc898a36948267bfe5c63b399996879e94c98" + }, + { + "type": "url", + "url": "http://mirror.easyname.at/nongnu/attr/attr-2.4.48.tar.gz" + }, + { + "type": "url", + "url": "http://mirror.easyname.at/nongnu/acl/acl-2.2.53.tar.gz" + }, + { + "type": "url", + "url": "https://www.zlib.net/fossils/zlib-1.2.11.tar.gz" + }, + { + "type": "url", + "url": "https://ftp.pcre.org/pub/pcre/pcre-8.42.tar.bz2" + }, + { + "type": "url", + "url": "https://salsa.debian.org/glibc-team/glibc/raw/49767c9f7de4828220b691b29de0baf60d8a54ec/debian/patches/localedata/locale-C.diff" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/glibc/glibc-2.27.tar.xz" + }, + { + "type": "url", + "url": "http://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.19.16.tar.xz" + }, + { + "type": "url", + "url": "https://bigsearcher.com/mirrors/gcc/releases/gcc-7.4.0/gcc-7.4.0.tar.xz" + }, + { + "type": "url", + "url": "http://tarballs.nixos.org/stdenv-linux/x86_64/4907fc9e8d0d82b28b3c56e3a478a2882f1d700f/bootstrap-tools.tar.xz" + }, + { + "type": "url", + "url": "https://pkgconfig.freedesktop.org/releases/pkg-config-0.29.2.tar.gz" + }, + { + "type": "url", + "url": "https://rt.perl.org/Public/Ticket/Attachment/1502646/807252/0001-Fix-missing-build-dependency-for-pods.patch" + }, + { + "type": "url", + "url": "https://cpan.metacpan.org/src/5.0/perl-5.28.1.tar.gz" + }, + { + "type": "url", + "url": "https://www.openssl.org/source/openssl-1.0.2t.tar.gz" + }, + { + "type": "url", + "url": "https://github.com/nghttp2/nghttp2/releases/download/v1.36.0/nghttp2-1.36.0.tar.bz2" + }, + { + "type": "url", + "url": "http://web.mit.edu/kerberos/dist/krb5/1.15/krb5-1.15.2.tar.gz" + }, + { + "type": "url", + "url": "https://www.libssh2.org/download/libssh2-1.9.0.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/bison/bison-3.3.2.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/gmp/gmp-6.1.2.tar.bz2" + }, + { + "type": "url", + "url": "http://mirror.easyname.at/nongnu/lzip/lzip-1.21.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/gettext/gettext-0.19.8.1.tar.gz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/mpfr/mpfr-4.0.2.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/mpc/mpc-1.1.0.tar.gz" + }, + { + "type": "url", + "url": "https://fossies.org/linux/misc/old/libelf-0.8.13.tar.gz" + }, + { + "type": "url", + "url": "http://isl.gforge.inria.fr/isl-0.17.1.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/texinfo/texinfo-6.5.tar.xz" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/which/which-2.21.tar.gz" + }, + { + "type": "url", + "url": "http://dist.schmorp.de/libev/Attic/libev-4.25.tar.gz" + }, + { + "type": "url", + "url": "https://c-ares.haxx.se/download/c-ares-1.15.0.tar.gz" + }, + { + "type": "url", + "url": "https://salsa.debian.org/debian/keyutils/raw/4cecffcb8e2a2aa4ef41777ed40e4e4bcfb2e5bf/debian/patches/Make-build-reproducible.patch" + }, + { + "type": "url", + "url": "https://people.redhat.com/dhowells/keyutils/keyutils-1.6.tar.bz2" + }, + { + "type": "url", + "url": "https://ftpmirror.gnu.org/m4/m4-1.4.18.tar.bz2" + } + ], + "version": 1 +} diff --git a/swh/lister/json/tests/test_lister.py b/swh/lister/json/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/tests/test_lister.py @@ -0,0 +1,25 @@ +# Copyright (C) 2019 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def test_lister_no_page_check_results(lister_json, requests_mock_datadir): + lister = lister_json +# lister.url = "http://sources.nixos.org/packages.json" + lister.run() + + r = lister.scheduler.search_tasks(task_type='load-tar') + assert len(r) == 76 + + for row in r: + print(row) + assert row['type'] == 'load-tar' + # arguments check + args = row['arguments']['args'] + assert len(args) == 2 + + # kwargs + kwargs = row['arguments']['kwargs'] + assert list(kwargs.keys()) == ['tarballs'] + + assert row['policy'] == 'oneshot' diff --git a/swh/lister/json/tests/test_tasks.py b/swh/lister/json/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/json/tests/test_tasks.py @@ -0,0 +1,27 @@ +from unittest.mock import patch + + +def test_ping(swh_app, celery_session_worker): + res = swh_app.send_task( + 'swh.lister.json.tasks.ping') + assert res + res.wait() + assert res.successful() + assert res.result == 'OK' + + +@patch('swh.lister.json.tasks.JSONLister') +def test_lister(lister, swh_app, celery_session_worker): + # setup the mocked JSONLister + lister.return_value = lister + lister.run.return_value = None + + res = swh_app.send_task( + 'swh.lister.json.tasks.JSONListerTask') + assert res + res.wait() + assert res.successful() + + lister.assert_called_once_with() + lister.db_last_index.assert_not_called() + lister.run.assert_called_once_with()