diff --git a/swh/lister/arch/tests/test_lister.py b/swh/lister/arch/tests/test_lister.py index abe8e81..daa8712 100644 --- a/swh/lister/arch/tests/test_lister.py +++ b/swh/lister/arch/tests/test_lister.py @@ -1,1400 +1,1394 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.lister.arch.lister import ArchLister expected_origins = [ { "url": "https://archlinux.org/packages/core/x86_64/dialog", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190211-1", "length": 180000, "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190724-1", "length": 180000, "filename": "dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190728-1", "length": 180000, "filename": "dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190806-1", "length": 182000, "filename": "dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190808-1", "length": 182000, "filename": "dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191110-1", "length": 183000, "filename": "dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191110-2", "length": 183000, "filename": "dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191209-1", "length": 183000, "filename": "dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191210-1", "length": 184000, "filename": "dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20200228-1", "length": 196000, "filename": "dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20200327-1", "length": 196000, "filename": "dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20201126-1", "length": 199000, "filename": "dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210117-1", "length": 200000, "filename": "dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210306-1", "length": 201000, "filename": "dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210319-1", "length": 201000, "filename": "dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210324-1", "length": 201000, "filename": "dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210509-1", "length": 198000, "filename": "dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210530-1", "length": 198000, "filename": "dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210621-1", "length": 199000, "filename": "dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20211107-1", "length": 197000, "filename": "dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20211214-1", "length": 197000, "filename": "dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20220117-1", "length": 199000, "filename": "dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20220414-1", "length": 198000, "filename": "dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190211-1", "last_modified": "2019-02-13T08:36:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190724-1", "last_modified": "2019-07-26T21:39:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190728-1", "last_modified": "2019-07-29T12:10:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190806-1", "last_modified": "2019-08-07T04:19:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190808-1", "last_modified": "2019-08-09T22:49:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191110-1", "last_modified": "2019-11-11T11:15:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191110-2", "last_modified": "2019-11-13T17:40:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191209-1", "last_modified": "2019-12-10T09:56:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191210-1", "last_modified": "2019-12-12T15:55:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20200228-1", "last_modified": "2020-03-06T02:21:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20200327-1", "last_modified": "2020-03-29T17:08:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20201126-1", "last_modified": "2020-11-27T12:19:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210117-1", "last_modified": "2021-01-18T18:05:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210306-1", "last_modified": "2021-03-07T11:40:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210319-1", "last_modified": "2021-03-20T00:12:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210324-1", "last_modified": "2021-03-26T17:53:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210509-1", "last_modified": "2021-05-16T02:04:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210530-1", "last_modified": "2021-05-31T14:59:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210621-1", "last_modified": "2021-06-23T02:59:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20211107-1", "last_modified": "2021-11-09T14:06:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20211214-1", "last_modified": "2021-12-14T09:26:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20220117-1", "last_modified": "2022-01-19T09:56:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20220414-1", "last_modified": "2022-04-16T03:59:00", }, ], }, }, { "url": "https://archlinux.org/packages/community/x86_64/gnome-code-assistance", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-1", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-2", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-3", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-4", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "2:3.16.1+14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "2:3.16.1+14+gaad6437-2", "length": 2000000, "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+14+gaad6437-2", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+15+gb9ffc4d-1", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "3:3.16.1+r14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-1", "last_modified": "2019-11-10T20:55:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-2", "last_modified": "2020-03-28T15:58:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-3", "last_modified": "2020-07-05T15:28:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-4", "last_modified": "2020-11-12T17:28:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "2:3.16.1+14+gaad6437-1", "last_modified": "2021-02-24T16:30:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "2:3.16.1+14+gaad6437-2", "last_modified": "2021-12-02T23:36:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+14+gaad6437-1", "last_modified": "2019-03-15T19:23:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+14+gaad6437-2", "last_modified": "2019-08-24T20:05:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+15+gb9ffc4d-1", "last_modified": "2019-08-25T20:55:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3:3.16.1+r14+gaad6437-1", "last_modified": "2022-05-18T17:23:00", }, ], }, }, { "url": "https://archlinux.org/packages/core/x86_64/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-1", "length": 78000, "filename": "gzip-1.10-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-2", "length": 78000, "filename": "gzip-1.10-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-3-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-3", "length": 78000, "filename": "gzip-1.10-3-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.11-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1.11-1", "length": 82000, "filename": "gzip-1.11-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.12-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1.12-1", "length": 80000, "filename": "gzip-1.12-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-1", "last_modified": "2018-12-30T18:38:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-2", "last_modified": "2019-10-06T16:02:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-3", "last_modified": "2019-11-13T15:55:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.11-1", "last_modified": "2021-09-04T02:02:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.12-1", "last_modified": "2022-04-07T17:35:00", }, ], }, }, { "url": "https://archlinux.org/packages/extra/x86_64/libasyncns", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "0.8+3+g68cd5af-2", "length": 16000, "filename": "libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "0.8+3+g68cd5af-3", "length": 17000, "filename": "libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:0.8+r3+g68cd5af-1", "length": 17000, "filename": "libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst", # noqa: B950 }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "0.8+3+g68cd5af-2", "last_modified": "2018-11-09T23:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "0.8+3+g68cd5af-3", "last_modified": "2020-05-19T08:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "1:0.8+r3+g68cd5af-1", "last_modified": "2022-05-18T17:23:00", }, ], }, }, { "url": "https://archlinux.org/packages/extra/x86_64/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.8.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.8.2-1", "length": 4000000, "filename": "mercurial-4.8.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.9-1", "length": 4000000, "filename": "mercurial-4.9-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.9.1-1", "length": 4000000, "filename": "mercurial-4.9.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0-1", "length": 4000000, "filename": "mercurial-5.0-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0.1-1", "length": 4000000, "filename": "mercurial-5.0.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0.2-1", "length": 4000000, "filename": "mercurial-5.0.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.1-1", "length": 4000000, "filename": "mercurial-5.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.1.2-1", "length": 4000000, "filename": "mercurial-5.1.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.2-1", "length": 4000000, "filename": "mercurial-5.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.1-1", "length": 4000000, "filename": "mercurial-5.2.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.2-1", "length": 5000000, "filename": "mercurial-5.2.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.2-2", "length": 4000000, "filename": "mercurial-5.2.2-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3-1", "length": 5000000, "filename": "mercurial-5.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3.1-1", "length": 4000000, "filename": "mercurial-5.3.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3.2-1", "length": 4000000, "filename": "mercurial-5.3.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4-1", "length": 5000000, "filename": "mercurial-5.4-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4-2", "length": 5000000, "filename": "mercurial-5.4-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4.1-1", "length": 5000000, "filename": "mercurial-5.4.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4.2-1", "length": 5000000, "filename": "mercurial-5.4.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5-1", "length": 5000000, "filename": "mercurial-5.5-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5.1-1", "length": 5000000, "filename": "mercurial-5.5.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5.2-1", "length": 5000000, "filename": "mercurial-5.5.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-1", "length": 5000000, "filename": "mercurial-5.6-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-2", "length": 5000000, "filename": "mercurial-5.6-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-3", "length": 5000000, "filename": "mercurial-5.6-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6.1-1", "length": 5000000, "filename": "mercurial-5.6.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.7-1", "length": 5000000, "filename": "mercurial-5.7-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.7.1-1", "length": 5000000, "filename": "mercurial-5.7.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8-1", "length": 5000000, "filename": "mercurial-5.8-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8-2", "length": 5000000, "filename": "mercurial-5.8-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8.1-1", "length": 5000000, "filename": "mercurial-5.8.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.1-1", "length": 5000000, "filename": "mercurial-5.9.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.1-2", "length": 5000000, "filename": "mercurial-5.9.1-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.2-1", "length": 5000000, "filename": "mercurial-5.9.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.3-1", "length": 5000000, "filename": "mercurial-5.9.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-1", "length": 5000000, "filename": "mercurial-6.0-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-2", "length": 5000000, "filename": "mercurial-6.0-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-3", "length": 5000000, "filename": "mercurial-6.0-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.1-1", "length": 5000000, "filename": "mercurial-6.0.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.2-1", "length": 5000000, "filename": "mercurial-6.0.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.3-1", "length": 5000000, "filename": "mercurial-6.0.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1-1", "length": 5000000, "filename": "mercurial-6.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1-2", "length": 5000000, "filename": "mercurial-6.1-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1.1-1", "length": 5000000, "filename": "mercurial-6.1.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1.2-1", "length": 5000000, "filename": "mercurial-6.1.2-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.8.2-1", "last_modified": "2019-01-15T20:31:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.9-1", "last_modified": "2019-02-12T06:15:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.9.1-1", "last_modified": "2019-03-30T17:40:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0-1", "last_modified": "2019-05-10T08:44:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0.1-1", "last_modified": "2019-06-10T18:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0.2-1", "last_modified": "2019-07-10T04:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.1-1", "last_modified": "2019-08-17T19:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.1.2-1", "last_modified": "2019-10-08T08:38:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2-1", "last_modified": "2019-11-28T06:41:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.1-1", "last_modified": "2020-01-06T12:35:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.2-1", "last_modified": "2020-01-15T14:07:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.2-2", "last_modified": "2020-01-30T20:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3-1", "last_modified": "2020-02-13T21:40:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3.1-1", "last_modified": "2020-03-07T23:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3.2-1", "last_modified": "2020-04-05T17:48:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4-1", "last_modified": "2020-05-10T17:19:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4-2", "last_modified": "2020-06-04T13:38:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4.1-1", "last_modified": "2020-06-06T12:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4.2-1", "last_modified": "2020-07-02T21:35:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5-1", "last_modified": "2020-08-05T10:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5.1-1", "last_modified": "2020-09-03T19:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5.2-1", "last_modified": "2020-10-07T20:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-1", "last_modified": "2020-11-03T17:26:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-2", "last_modified": "2020-11-09T16:54:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-3", "last_modified": "2020-11-11T15:20:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6.1-1", "last_modified": "2020-12-05T12:29:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.7-1", "last_modified": "2021-02-04T08:41:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.7.1-1", "last_modified": "2021-03-11T07:51:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8-1", "last_modified": "2021-05-04T17:55:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8-2", "last_modified": "2021-05-08T22:08:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8.1-1", "last_modified": "2021-07-13T07:04:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.1-1", "last_modified": "2021-09-01T12:48:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.1-2", "last_modified": "2021-09-24T17:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.2-1", "last_modified": "2021-10-07T21:52:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.3-1", "last_modified": "2021-10-27T07:20:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-1", "last_modified": "2021-11-25T17:10:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-2", "last_modified": "2021-11-30T20:53:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-3", "last_modified": "2021-12-02T12:06:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.1-1", "last_modified": "2022-01-08T10:07:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.2-1", "last_modified": "2022-02-03T13:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.3-1", "last_modified": "2022-02-23T20:50:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1-1", "last_modified": "2022-03-03T18:06:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1-2", "last_modified": "2022-03-04T08:37:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1.1-1", "last_modified": "2022-04-07T18:26:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1.2-1", "last_modified": "2022-05-07T11:03:00", }, ], }, }, { "url": "https://archlinux.org/packages/community/any/python-hglib", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.1-3-any.pkg.tar.xz", # noqa: B950 "version": "2.6.1-3", "length": 40000, "filename": "python-hglib-2.6.1-3-any.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-1-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-1", "length": 43000, "filename": "python-hglib-2.6.2-1-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-2-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-2", "length": 43000, "filename": "python-hglib-2.6.2-2-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-3-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-3", "length": 43000, "filename": "python-hglib-2.6.2-3-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-4-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-4", "length": 43000, "filename": "python-hglib-2.6.2-4-any.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.1-3", "last_modified": "2019-11-06T14:08:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-1", "last_modified": "2020-11-19T22:29:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-2", "last_modified": "2020-11-19T22:31:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-3", "last_modified": "2020-11-19T22:35:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-4", "last_modified": "2021-12-03T00:44:00", }, ], }, }, { "url": "https://archlinuxarm.org/packages/aarch64/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/aarch64/core/gzip-1.12-1-aarch64.pkg.tar.xz", # noqa: B950 "length": 79640, "version": "1.12-1", "filename": "gzip-1.12-1-aarch64.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "aarch64", "name": "gzip", "repo": "core", "version": "1.12-1", "last_modified": "2022-04-07T21:08:14", } ], }, }, { "url": "https://archlinuxarm.org/packages/aarch64/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/aarch64/extra/mercurial-6.1.3-1-aarch64.pkg.tar.xz", # noqa: B950 "length": 4931228, "version": "6.1.3-1", "filename": "mercurial-6.1.3-1-aarch64.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "aarch64", "name": "mercurial", "repo": "extra", "version": "6.1.3-1", "last_modified": "2022-06-02T22:15:18", } ], }, }, { "url": "https://archlinuxarm.org/packages/any/python-hglib", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/any/community/python-hglib-2.6.2-4-any.pkg.tar.xz", # noqa: B950 "length": 41432, "version": "2.6.2-4", "filename": "python-hglib-2.6.2-4-any.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "any", "name": "python-hglib", "repo": "community", "version": "2.6.2-4", "last_modified": "2021-12-14T16:22:20", } ], }, }, { "url": "https://archlinuxarm.org/packages/armv7h/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/armv7h/core/gzip-1.12-1-armv7h.pkg.tar.xz", # noqa: B950 "length": 78468, "version": "1.12-1", "filename": "gzip-1.12-1-armv7h.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "armv7h", "name": "gzip", "repo": "core", "version": "1.12-1", "last_modified": "2022-04-07T21:08:35", } ], }, }, { "url": "https://archlinuxarm.org/packages/armv7h/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/armv7h/extra/mercurial-6.1.3-1-armv7h.pkg.tar.xz", # noqa: B950 "length": 4897816, "version": "6.1.3-1", "filename": "mercurial-6.1.3-1-armv7h.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "armv7h", "name": "mercurial", "repo": "extra", "version": "6.1.3-1", "last_modified": "2022-06-02T22:13:08", } ], }, }, ] def test_arch_lister(datadir, requests_mock_datadir, swh_scheduler): lister = ArchLister(scheduler=swh_scheduler) res = lister.run() assert res.pages == 9 assert res.origins == 12 - expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) - scheduler_origins_sorted = sorted( - swh_scheduler.get_listed_origins(lister.lister_obj.id).results, - key=lambda x: x.url, - ) - - assert len(scheduler_origins_sorted) == len(expected_origins_sorted) + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert [ ( scheduled.visit_type, scheduled.url, - scheduled.extra_loader_arguments.get("artifacts"), - scheduled.extra_loader_arguments.get("arch_metadata"), + scheduled.extra_loader_arguments["artifacts"], + scheduled.extra_loader_arguments["arch_metadata"], ) - for scheduled in scheduler_origins_sorted + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) ] == [ ( "arch", - expected.get("url"), - expected.get("extra_loader_arguments").get("artifacts"), - expected.get("extra_loader_arguments").get("arch_metadata"), + expected["url"], + expected["extra_loader_arguments"]["artifacts"], + expected["extra_loader_arguments"]["arch_metadata"], ) - for expected in expected_origins_sorted + for expected in sorted(expected_origins, key=lambda expected: expected["url"]) ] diff --git a/swh/lister/aur/tests/test_lister.py b/swh/lister/aur/tests/test_lister.py index c403dad..46c08da 100644 --- a/swh/lister/aur/tests/test_lister.py +++ b/swh/lister/aur/tests/test_lister.py @@ -1,131 +1,125 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.lister.aur.lister import AurLister expected_origins = [ { "visit_type": "aur", "url": "https://aur.archlinux.org/hg-evolve.git", "extra_loader_arguments": { "artifacts": [ { "filename": "hg-evolve.tar.gz", "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/hg-evolve.tar.gz", # noqa: B950 "version": "10.5.1-1", } ], "aur_metadata": [ { "version": "10.5.1-1", "project_url": "https://www.mercurial-scm.org/doc/evolution/", "last_update": "2022-04-27T20:02:56+00:00", "pkgname": "hg-evolve", } ], }, }, { "visit_type": "aur", "url": "https://aur.archlinux.org/ibus-git.git", "extra_loader_arguments": { "artifacts": [ { "filename": "ibus-git.tar.gz", "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/ibus-git.tar.gz", # noqa: B950 "version": "1.5.23+12+gef4c5c7e-1", } ], "aur_metadata": [ { "version": "1.5.23+12+gef4c5c7e-1", "project_url": "https://github.com/ibus/ibus/wiki", "last_update": "2021-02-08T06:12:11+00:00", "pkgname": "ibus-git", } ], }, }, { "visit_type": "aur", "url": "https://aur.archlinux.org/libervia-web-hg.git", "extra_loader_arguments": { "artifacts": [ { "filename": "libervia-web-hg.tar.gz", "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/libervia-web-hg.tar.gz", # noqa: B950 "version": "0.9.0.r1492.3a34d78f2717-1", } ], "aur_metadata": [ { "version": "0.9.0.r1492.3a34d78f2717-1", "project_url": "http://salut-a-toi.org/", "last_update": "2022-02-26T15:30:58+00:00", "pkgname": "libervia-web-hg", } ], }, }, { "visit_type": "aur", "url": "https://aur.archlinux.org/tealdeer-git.git", "extra_loader_arguments": { "artifacts": [ { "filename": "tealdeer-git.tar.gz", "url": "https://aur.archlinux.org/cgit/aur.git/snapshot/tealdeer-git.tar.gz", # noqa: B950 "version": "r255.30b7c5f-1", } ], "aur_metadata": [ { "version": "r255.30b7c5f-1", "project_url": "https://github.com/dbrgn/tealdeer", "last_update": "2020-09-04T20:36:52+00:00", "pkgname": "tealdeer-git", } ], }, }, ] def test_aur_lister(datadir, requests_mock_datadir, swh_scheduler): lister = AurLister(scheduler=swh_scheduler) res = lister.run() assert res.pages == 4 assert res.origins == 4 - scheduler_origins_sorted = sorted( - swh_scheduler.get_listed_origins(lister.lister_obj.id).results, - key=lambda x: x.url, - ) - expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) - - assert len(scheduler_origins_sorted) == len(expected_origins_sorted) + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert [ ( scheduled.visit_type, scheduled.url, - scheduled.extra_loader_arguments.get("artifacts"), + scheduled.extra_loader_arguments["artifacts"], ) - for scheduled in scheduler_origins_sorted + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) ] == [ ( "aur", - expected.get("url"), - expected.get("extra_loader_arguments").get("artifacts"), + expected["url"], + expected["extra_loader_arguments"]["artifacts"], ) - for expected in expected_origins_sorted + for expected in sorted(expected_origins, key=lambda expected: expected["url"]) ] def test_aur_lister_directory_cleanup(datadir, requests_mock_datadir, swh_scheduler): lister = AurLister(scheduler=swh_scheduler) lister.run() # Repository directory should not exists after the lister runs assert not lister.DESTINATION_PATH.exists() diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py index c568dbf..e624e8e 100644 --- a/swh/lister/bitbucket/tests/test_lister.py +++ b/swh/lister/bitbucket/tests/test_lister.py @@ -1,184 +1,178 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime import json import os import pytest from swh.lister.bitbucket.lister import BitbucketLister @pytest.fixture def bb_api_repositories_page1(datadir): data_file_path = os.path.join(datadir, "bb_api_repositories_page1.json") with open(data_file_path, "r") as data_file: return json.load(data_file) @pytest.fixture def bb_api_repositories_page2(datadir): data_file_path = os.path.join(datadir, "bb_api_repositories_page2.json") with open(data_file_path, "r") as data_file: return json.load(data_file) def _check_listed_origins(lister_origins, scheduler_origins): """Asserts that the two collections have the same origins from the point of view of the lister""" - - sorted_lister_origins = list(sorted(lister_origins)) - sorted_scheduler_origins = list(sorted(scheduler_origins)) - - assert len(sorted_lister_origins) == len(sorted_scheduler_origins) - - for lo, so in zip(sorted_lister_origins, sorted_scheduler_origins): - assert lo.url == so.url - assert lo.last_update == so.last_update + assert {(lo.url, lo.last_update) for lo in lister_origins} == { + (so.url, so.last_update) for so in scheduler_origins + } def test_bitbucket_incremental_lister( swh_scheduler, requests_mock, mocker, bb_api_repositories_page1, bb_api_repositories_page2, ): """Simple Bitbucket listing with two pages containing 10 origins""" requests_mock.get( BitbucketLister.API_URL, [ {"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2}, ], ) lister = BitbucketLister(scheduler=swh_scheduler, page_size=10) # First listing stats = lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert stats.pages == 2 assert stats.origins == 20 assert len(scheduler_origins) == 20 assert lister.updated lister_state = lister.get_state_from_scheduler() last_repo_cdate = lister_state.last_repo_cdate.isoformat() assert hasattr(lister_state, "last_repo_cdate") assert last_repo_cdate == bb_api_repositories_page2["values"][-1]["created_on"] # Second listing, restarting from last state lister.session.get = mocker.spy(lister.session, "get") lister.run() url_params = lister.url_params url_params["after"] = last_repo_cdate lister.session.get.assert_called_once_with(lister.API_URL, params=url_params) all_origins = ( bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"] ) _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins) def test_bitbucket_lister_rate_limit_hit( swh_scheduler, requests_mock, mocker, bb_api_repositories_page1, bb_api_repositories_page2, ): """Simple Bitbucket listing with two pages containing 10 origins""" requests_mock.get( BitbucketLister.API_URL, [ {"json": bb_api_repositories_page1, "status_code": 200}, {"json": None, "status_code": 429}, {"json": None, "status_code": 429}, {"json": bb_api_repositories_page2, "status_code": 200}, ], ) lister = BitbucketLister(scheduler=swh_scheduler, page_size=10) mocker.patch.object(lister.page_request.retry, "sleep") stats = lister.run() assert stats.pages == 2 assert stats.origins == 20 assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).results) == 20 def test_bitbucket_full_lister( swh_scheduler, requests_mock, mocker, bb_api_repositories_page1, bb_api_repositories_page2, ): """Simple Bitbucket listing with two pages containing 10 origins""" requests_mock.get( BitbucketLister.API_URL, [ {"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2}, {"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2}, ], ) credentials = {"bitbucket": {"bitbucket": [{"username": "u", "password": "p"}]}} lister = BitbucketLister( scheduler=swh_scheduler, page_size=10, incremental=True, credentials=credentials ) assert lister.session.auth is not None # First do a incremental run to have an initial lister state stats = lister.run() last_lister_state = lister.get_state_from_scheduler() assert stats.origins == 20 # Then do the full run and verify lister state did not change # Modify last listed repo modification date to check it will be not saved # to lister state after its execution last_page2_repo = bb_api_repositories_page2["values"][-1] last_page2_repo["created_on"] = datetime.now().isoformat() last_page2_repo["updated_on"] = datetime.now().isoformat() lister = BitbucketLister(scheduler=swh_scheduler, page_size=10, incremental=False) assert lister.session.auth is None stats = lister.run() assert stats.pages == 2 assert stats.origins == 20 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results # 20 because scheduler upserts based on (id, type, url) assert len(scheduler_origins) == 20 # Modification on created_on SHOULD NOT impact lister state assert lister.get_state_from_scheduler() == last_lister_state # Modification on updated_on SHOULD impact lister state all_origins = ( bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"] ) _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins) diff --git a/swh/lister/crates/tests/test_lister.py b/swh/lister/crates/tests/test_lister.py index 2c62449..8b26379 100644 --- a/swh/lister/crates/tests/test_lister.py +++ b/swh/lister/crates/tests/test_lister.py @@ -1,234 +1,238 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from pathlib import Path from dulwich.repo import Repo from swh.lister.crates.lister import CratesLister, CratesListerState from swh.lister.crates.tests import prepare_repository_from_archive expected_origins = [ { "url": "https://crates.io/api/v1/crates/rand", "artifacts": [ { "checksums": { "sha256": "48a45b46c2a8c38348adb1205b13c3c5eb0174e0c0fec52cc88e9fb1de14c54d", # noqa: B950 }, "filename": "rand-0.1.1.crate", "url": "https://static.crates.io/crates/rand/rand-0.1.1.crate", "version": "0.1.1", }, { "checksums": { "sha256": "6e229ed392842fa93c1d76018d197b7e1b74250532bafb37b0e1d121a92d4cf7", # noqa: B950 }, "filename": "rand-0.1.2.crate", "url": "https://static.crates.io/crates/rand/rand-0.1.2.crate", "version": "0.1.2", }, ], "metadata": [ { "version": "0.1.1", "yanked": False, }, { "version": "0.1.2", "yanked": False, }, ], }, { "url": "https://crates.io/api/v1/crates/regex", "artifacts": [ { "checksums": { "sha256": "f0ff1ca641d3c9a2c30464dac30183a8b91cdcc959d616961be020cdea6255c5", # noqa: B950 }, "filename": "regex-0.1.0.crate", "url": "https://static.crates.io/crates/regex/regex-0.1.0.crate", "version": "0.1.0", }, { "checksums": { "sha256": "a07bef996bd38a73c21a8e345d2c16848b41aa7ec949e2fedffe9edf74cdfb36", # noqa: B950 }, "filename": "regex-0.1.1.crate", "url": "https://static.crates.io/crates/regex/regex-0.1.1.crate", "version": "0.1.1", }, { "checksums": { "sha256": "343bd0171ee23346506db6f4c64525de6d72f0e8cc533f83aea97f3e7488cbf9", # noqa: B950 }, "filename": "regex-0.1.2.crate", "url": "https://static.crates.io/crates/regex/regex-0.1.2.crate", "version": "0.1.2", }, { "checksums": { "sha256": "defb220c4054ca1b95fe8b0c9a6e782dda684c1bdf8694df291733ae8a3748e3", # noqa: B950 }, "filename": "regex-0.1.3.crate", "url": "https://static.crates.io/crates/regex/regex-0.1.3.crate", "version": "0.1.3", }, ], "metadata": [ { "version": "0.1.0", "yanked": False, }, { "version": "0.1.1", "yanked": False, }, { "version": "0.1.2", "yanked": False, }, { "version": "0.1.3", "yanked": False, }, ], }, { "url": "https://crates.io/api/v1/crates/regex-syntax", "artifacts": [ { "checksums": { "sha256": "398952a2f6cd1d22bc1774fd663808e32cf36add0280dee5cdd84a8fff2db944", # noqa: B950 }, "filename": "regex-syntax-0.1.0.crate", "url": "https://static.crates.io/crates/regex-syntax/regex-syntax-0.1.0.crate", "version": "0.1.0", }, ], "metadata": [ { "version": "0.1.0", "yanked": False, }, ], }, ] expected_origins_incremental = [expected_origins[1], expected_origins[2]] def test_crates_lister(datadir, tmp_path, swh_scheduler): archive_path = Path(datadir, "fake-crates-repository.tar.gz") repo_url = prepare_repository_from_archive( archive_path, "crates.io-index", tmp_path ) lister = CratesLister(scheduler=swh_scheduler) lister.INDEX_REPOSITORY_URL = repo_url lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" res = lister.run() assert res.pages == 3 assert res.origins == 3 - expected_origins_sorted = sorted(expected_origins, key=lambda x: x.get("url")) - scheduler_origins_sorted = sorted( - swh_scheduler.get_listed_origins(lister.lister_obj.id).results, - key=lambda x: x.url, - ) - - for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted): - assert scheduled.visit_type == "crates" - assert scheduled.url == expected.get("url") - assert scheduled.extra_loader_arguments.get("artifacts") == expected.get( - "artifacts" + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert [ + ( + scheduled.visit_type, + scheduled.url, + scheduled.extra_loader_arguments["artifacts"], ) - - assert len(scheduler_origins_sorted) == len(expected_origins_sorted) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "crates", + expected["url"], + expected["artifacts"], + ) + for expected in sorted(expected_origins, key=lambda expected: expected["url"]) + ] def test_crates_lister_incremental(datadir, tmp_path, swh_scheduler): archive_path = Path(datadir, "fake-crates-repository.tar.gz") repo_url = prepare_repository_from_archive( archive_path, "crates.io-index", tmp_path ) lister = CratesLister(scheduler=swh_scheduler) lister.INDEX_REPOSITORY_URL = repo_url lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" # The lister has not run yet, get the index repository lister.get_index_repository() # Set a CratesListerState with a last commit value to force incremental case repo = Repo(lister.DESTINATION_PATH) # Lets set this last commit to third one from head step = list(repo.get_walker(max_entries=3))[-1] last_commit_state = CratesListerState(last_commit=step.commit.id.decode()) lister.state = last_commit_state res = lister.run() assert res.pages == 2 assert res.origins == 2 - expected_origins_sorted = sorted( - expected_origins_incremental, key=lambda x: x.get("url") - ) - scheduler_origins_sorted = sorted( - swh_scheduler.get_listed_origins(lister.lister_obj.id).results, - key=lambda x: x.url, - ) - - for scheduled, expected in zip(scheduler_origins_sorted, expected_origins_sorted): - assert scheduled.visit_type == "crates" - assert scheduled.url == expected.get("url") - assert scheduled.extra_loader_arguments.get("artifacts") == expected.get( - "artifacts" + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + assert [ + ( + scheduled.visit_type, + scheduled.url, + scheduled.extra_loader_arguments["artifacts"], ) - - assert len(scheduler_origins_sorted) == len(expected_origins_sorted) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "crates", + expected["url"], + expected["artifacts"], + ) + for expected in sorted( + expected_origins_incremental, key=lambda expected: expected["url"] + ) + ] def test_crates_lister_incremental_nothing_new(datadir, tmp_path, swh_scheduler): """Ensure incremental mode runs fine when the repository last commit is the same than lister.state.las-_commit""" archive_path = Path(datadir, "fake-crates-repository.tar.gz") repo_url = prepare_repository_from_archive( archive_path, "crates.io-index", tmp_path ) lister = CratesLister(scheduler=swh_scheduler) lister.INDEX_REPOSITORY_URL = repo_url lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" lister.get_index_repository() repo = Repo(lister.DESTINATION_PATH) # Set a CratesListerState with a last commit value to force incremental case last_commit_state = CratesListerState(last_commit=repo.head().decode()) lister.state = last_commit_state res = lister.run() assert res.pages == 0 assert res.origins == 0 def test_crates_lister_repository_cleanup(datadir, tmp_path, swh_scheduler): archive_path = Path(datadir, "fake-crates-repository.tar.gz") repo_url = prepare_repository_from_archive( archive_path, "crates.io-index", tmp_path ) lister = CratesLister(scheduler=swh_scheduler) lister.INDEX_REPOSITORY_URL = repo_url lister.DESTINATION_PATH = tmp_path.parent / "crates.io-index-tests" lister.run() # Repository directory should not exists after the lister runs assert not lister.DESTINATION_PATH.exists() diff --git a/swh/lister/gitea/tests/test_lister.py b/swh/lister/gitea/tests/test_lister.py index 08a17b5..90ec624 100644 --- a/swh/lister/gitea/tests/test_lister.py +++ b/swh/lister/gitea/tests/test_lister.py @@ -1,160 +1,153 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from pathlib import Path from typing import Dict, List, Tuple import pytest import requests from swh.lister.gitea.lister import GiteaLister from swh.lister.gogs.lister import GogsListerPage from swh.scheduler.model import ListedOrigin TRYGITEA_URL = "https://try.gitea.io/api/v1/" TRYGITEA_P1_URL = TRYGITEA_URL + "repos/search?limit=3&page=1" TRYGITEA_P2_URL = TRYGITEA_URL + "repos/search?limit=3&page=2" @pytest.fixture def trygitea_p1(datadir) -> Tuple[str, Dict[str, str], GogsListerPage, List[str]]: text = Path(datadir, "https_try.gitea.io", "repos_page1").read_text() headers = { "Link": '<{p2}>; rel="next",<{p2}>; rel="last"'.format(p2=TRYGITEA_P2_URL) } page_data = json.loads(text) page_result = GogsListerPage( repos=GiteaLister.extract_repos(page_data), next_link=TRYGITEA_P2_URL ) origin_urls = [r["clone_url"] for r in page_data["data"]] return text, headers, page_result, origin_urls @pytest.fixture def trygitea_p2(datadir) -> Tuple[str, Dict[str, str], GogsListerPage, List[str]]: text = Path(datadir, "https_try.gitea.io", "repos_page2").read_text() headers = { "Link": '<{p1}>; rel="prev",<{p1}>; rel="first"'.format(p1=TRYGITEA_P1_URL) } page_data = json.loads(text) page_result = GogsListerPage( repos=GiteaLister.extract_repos(page_data), next_link=None ) origin_urls = [r["clone_url"] for r in page_data["data"]] return text, headers, page_result, origin_urls def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): """Asserts that the two collections have the same origin URLs. Does not test last_update.""" - - sorted_lister_urls = list(sorted(lister_urls)) - sorted_scheduler_origins = list(sorted(scheduler_origins)) - - assert len(sorted_lister_urls) == len(sorted_scheduler_origins) - - for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): - assert l_url == s_origin.url + assert set(lister_urls) == {origin.url for origin in scheduler_origins} def test_gitea_full_listing( swh_scheduler, requests_mock, mocker, trygitea_p1, trygitea_p2 ): """Covers full listing of multiple pages, rate-limit, page size (required for test), checking page results and listed origins, statelessness.""" kwargs = dict(url=TRYGITEA_URL, instance="try_gitea", page_size=3) lister = GiteaLister(scheduler=swh_scheduler, **kwargs) lister.get_origins_from_page = mocker.spy(lister, "get_origins_from_page") p1_text, p1_headers, p1_result, p1_origin_urls = trygitea_p1 p2_text, p2_headers, p2_result, p2_origin_urls = trygitea_p2 requests_mock.get(TRYGITEA_P1_URL, text=p1_text, headers=p1_headers) requests_mock.get( TRYGITEA_P2_URL, [ {"status_code": requests.codes.too_many_requests}, {"text": p2_text, "headers": p2_headers}, ], ) # end test setup stats = lister.run() # start test checks assert stats.pages == 2 assert stats.origins == 6 calls = [mocker.call(p1_result), mocker.call(p2_result)] lister.get_origins_from_page.assert_has_calls(calls) scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins) lister_state = lister.get_state_from_scheduler() assert lister_state.last_seen_next_link == TRYGITEA_P2_URL assert lister_state.last_seen_repo_id == p2_result.repos[-1]["id"] def test_gitea_auth_instance(swh_scheduler, requests_mock, trygitea_p1): """Covers token authentication, token from credentials, instance inference from URL.""" api_token = "teapot" instance = "try.gitea.io" creds = {"gitea": {instance: [{"username": "u", "password": api_token}]}} kwargs1 = dict(url=TRYGITEA_URL, api_token=api_token) lister = GiteaLister(scheduler=swh_scheduler, **kwargs1) # test API token assert "Authorization" in lister.session.headers assert lister.session.headers["Authorization"].lower() == "token %s" % api_token kwargs2 = dict(url=TRYGITEA_URL, credentials=creds) lister = GiteaLister(scheduler=swh_scheduler, **kwargs2) # test API token from credentials assert "Authorization" in lister.session.headers assert lister.session.headers["Authorization"].lower() == "token %s" % api_token # test instance inference from URL assert lister.instance assert "gitea" in lister.instance # infer something related to that # setup requests mocking p1_text, p1_headers, _, _ = trygitea_p1 p1_headers["Link"] = p1_headers["Link"].replace("next", "") # only 1 page base_url = TRYGITEA_URL + lister.REPO_LIST_PATH requests_mock.get(base_url, text=p1_text, headers=p1_headers) # now check the lister runs without error stats = lister.run() assert stats.pages == 1 @pytest.mark.parametrize("http_code", [400, 500, 502]) def test_gitea_list_http_error(swh_scheduler, requests_mock, http_code): """Test handling of some HTTP errors commonly encountered""" lister = GiteaLister(scheduler=swh_scheduler, url=TRYGITEA_URL, page_size=3) base_url = TRYGITEA_URL + lister.REPO_LIST_PATH requests_mock.get(base_url, status_code=http_code) with pytest.raises(requests.HTTPError): lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 0 diff --git a/swh/lister/gogs/tests/test_lister.py b/swh/lister/gogs/tests/test_lister.py index b9f48a9..5c9b651 100644 --- a/swh/lister/gogs/tests/test_lister.py +++ b/swh/lister/gogs/tests/test_lister.py @@ -1,329 +1,322 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from pathlib import Path from typing import List from unittest.mock import Mock import pytest from requests import HTTPError from swh.lister.gogs.lister import GogsLister, GogsListerPage, _parse_page_id from swh.scheduler.model import ListedOrigin TRY_GOGS_URL = "https://try.gogs.io/api/v1/" def try_gogs_page(n: int): return TRY_GOGS_URL + GogsLister.REPO_LIST_PATH + f"?page={n}&limit=3" P1 = try_gogs_page(1) P2 = try_gogs_page(2) P3 = try_gogs_page(3) P4 = try_gogs_page(4) @pytest.fixture def trygogs_p1(datadir): text = Path(datadir, "https_try.gogs.io", "repos_page1").read_text() headers = {"Link": f'<{P2}>; rel="next"'} page_result = GogsListerPage( repos=GogsLister.extract_repos(json.loads(text)), next_link=P2 ) origin_urls = [r["clone_url"] for r in page_result.repos] return text, headers, page_result, origin_urls @pytest.fixture def trygogs_p2(datadir): text = Path(datadir, "https_try.gogs.io", "repos_page2").read_text() headers = {"Link": f'<{P3}>; rel="next",<{P1}>; rel="prev"'} page_result = GogsListerPage( repos=GogsLister.extract_repos(json.loads(text)), next_link=P3 ) origin_urls = [r["clone_url"] for r in page_result.repos] return text, headers, page_result, origin_urls @pytest.fixture def trygogs_p3(datadir): text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text() headers = {"Link": f'<{P4}>; rel="next",<{P2}>; rel="prev"'} page_result = GogsListerPage( repos=GogsLister.extract_repos(json.loads(text)), next_link=P3 ) origin_urls = [r["clone_url"] for r in page_result.repos] return text, headers, page_result, origin_urls @pytest.fixture def trygogs_p4(datadir): text = Path(datadir, "https_try.gogs.io", "repos_page4").read_text() headers = {"Link": f'<{P3}>; rel="prev"'} page_result = GogsListerPage( repos=GogsLister.extract_repos(json.loads(text)), next_link=P3 ) origin_urls = [r["clone_url"] for r in page_result.repos] return text, headers, page_result, origin_urls @pytest.fixture def trygogs_p3_last(datadir): text = Path(datadir, "https_try.gogs.io", "repos_page3").read_text() headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'} page_result = GogsListerPage( repos=GogsLister.extract_repos(json.loads(text)), next_link=None ) origin_urls = [r["clone_url"] for r in page_result.repos] return text, headers, page_result, origin_urls @pytest.fixture def trygogs_p3_empty(): origins_urls = [] body = {"data": [], "ok": True} headers = {"Link": f'<{P2}>; rel="prev",<{P1}>; rel="first"'} page_result = GogsListerPage(repos=GogsLister.extract_repos(body), next_link=None) text = json.dumps(body) return text, headers, page_result, origins_urls def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): """Asserts that the two collections have the same origin URLs. Does not test last_update.""" - - sorted_lister_urls = list(sorted(lister_urls)) - sorted_scheduler_origins = list(sorted(scheduler_origins)) - - assert len(sorted_lister_urls) == len(sorted_scheduler_origins) - - for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): - assert l_url == s_origin.url + assert set(lister_urls) == {origin.url for origin in scheduler_origins} def test_gogs_full_listing( swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3_last ): kwargs = dict( url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret" ) lister = GogsLister(scheduler=swh_scheduler, **kwargs) lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page") p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1 p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2 p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last requests_mock.get(P1, text=p1_text, headers=p1_headers) requests_mock.get(P2, text=p2_text, headers=p2_headers) requests_mock.get(P3, text=p3_text, headers=p3_headers) stats = lister.run() assert stats.pages == 3 assert stats.origins == 9 calls = map(mocker.call, [p1_result, p2_result, p3_result]) lister.get_origins_from_page.assert_has_calls(list(calls)) scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results check_listed_origins( p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins ) assert ( lister.get_state_from_scheduler().last_seen_next_link == P3 ) # P3 didn't provide any next link so it remains the last_seen_next_link def test_gogs_auth_instance( swh_scheduler, requests_mock, trygogs_p1, trygogs_p2, trygogs_p3_empty ): """Covers token authentication, token from credentials, instance inference from URL.""" api_token = "secret" instance = "try_gogs" # Test lister initialization without api_token or credentials: with pytest.raises(ValueError, match="No credentials or API token provided"): kwargs1 = dict(url=TRY_GOGS_URL, instance=instance) GogsLister(scheduler=swh_scheduler, **kwargs1) # Test lister initialization using api_token: kwargs2 = dict(url=TRY_GOGS_URL, api_token=api_token, instance=instance) lister = GogsLister(scheduler=swh_scheduler, **kwargs2) assert lister.session.headers["Authorization"].lower() == "token %s" % api_token # Test lister initialization with credentials and run it: creds = {"gogs": {instance: [{"username": "u", "password": api_token}]}} kwargs3 = dict(url=TRY_GOGS_URL, credentials=creds, instance=instance, page_size=3) lister = GogsLister(scheduler=swh_scheduler, **kwargs3) assert lister.session.headers["Authorization"].lower() == "token %s" % api_token assert lister.instance == "try_gogs" # setup requests mocking p1_text, p1_headers, _, _ = trygogs_p1 p2_text, p2_headers, _, _ = trygogs_p2 p3_text, p3_headers, _, _ = trygogs_p3_empty requests_mock.get(P1, text=p1_text, headers=p1_headers) requests_mock.get(P2, text=p2_text, headers=p2_headers) requests_mock.get(P3, text=p3_text, headers=p3_headers) # lister should run without any error and extract the origins stats = lister.run() assert stats.pages == 3 assert stats.origins == 6 @pytest.mark.parametrize("http_code", [400, 500, 502]) def test_gogs_list_http_error( swh_scheduler, requests_mock, http_code, trygogs_p1, trygogs_p3_last ): """Test handling of some HTTP errors commonly encountered""" lister = GogsLister(scheduler=swh_scheduler, url=TRY_GOGS_URL, api_token="secret") p1_text, p1_headers, _, p1_origin_urls = trygogs_p1 p3_text, p3_headers, _, _ = trygogs_p3_last base_url = TRY_GOGS_URL + lister.REPO_LIST_PATH requests_mock.get( base_url, [ {"text": p1_text, "headers": p1_headers, "status_code": 200}, {"status_code": http_code}, {"text": p3_text, "headers": p3_headers, "status_code": 200}, ], ) with pytest.raises(HTTPError): lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results check_listed_origins( p1_origin_urls, scheduler_origins ) # Only the first page is listed def test_gogs_incremental_lister( swh_scheduler, requests_mock, mocker, trygogs_p1, trygogs_p2, trygogs_p3, trygogs_p3_last, trygogs_p3_empty, trygogs_p4, ): kwargs = dict( url=TRY_GOGS_URL, instance="try_gogs", page_size=3, api_token="secret" ) lister = GogsLister(scheduler=swh_scheduler, **kwargs) lister.get_origins_from_page: Mock = mocker.spy(lister, "get_origins_from_page") # First listing attempt: P1 and P2 return 3 origins each # while P3 (current last page) is empty. p1_text, p1_headers, p1_result, p1_origin_urls = trygogs_p1 p2_text, p2_headers, p2_result, p2_origin_urls = trygogs_p2 p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_empty requests_mock.get(P1, text=p1_text, headers=p1_headers) requests_mock.get(P2, text=p2_text, headers=p2_headers) requests_mock.get(P3, text=p3_text, headers=p3_headers) attempt1_stats = lister.run() assert attempt1_stats.pages == 3 assert attempt1_stats.origins == 6 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results lister_state = lister.get_state_from_scheduler() assert lister_state.last_seen_next_link == P3 assert lister_state.last_seen_repo_id == p2_result.repos[-1]["id"] assert lister.updated check_listed_origins(p1_origin_urls + p2_origin_urls, scheduler_origins) lister.updated = False # Reset the flag # Second listing attempt: P3 isn't empty anymore. # The lister should restart from last state and hence revisit P3. p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3_last requests_mock.get(P3, text=p3_text, headers=p3_headers) lister.session.get = mocker.spy(lister.session, "get") attempt2_stats = lister.run() assert attempt2_stats.pages == 1 assert attempt2_stats.origins == 3 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results page_id = _parse_page_id(lister_state.last_seen_next_link) query_params = lister.query_params query_params["page"] = page_id lister.session.get.assert_called_once_with( TRY_GOGS_URL + lister.REPO_LIST_PATH, params=query_params ) # All the 9 origins (3 pages) should be passed on to the scheduler: check_listed_origins( p1_origin_urls + p2_origin_urls + p3_origin_urls, scheduler_origins ) lister_state = lister.get_state_from_scheduler() assert lister_state.last_seen_next_link == P3 assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"] assert lister.updated lister.updated = False # Reset the flag # Third listing attempt: No new origins # The lister should revisit last seen page (P3) attempt3_stats = lister.run() assert attempt3_stats.pages == 1 assert attempt3_stats.origins == 3 lister_state = lister.get_state_from_scheduler() assert lister_state.last_seen_next_link == P3 assert lister_state.last_seen_repo_id == p3_result.repos[-1]["id"] assert lister.updated is False # No new origins so state isn't updated. # Fourth listing attempt: Page 4 is introduced and returns 3 new origins # The lister should revisit last seen page (P3) as well as P4. p3_text, p3_headers, p3_result, p3_origin_urls = trygogs_p3 # new P3 points to P4 p4_text, p4_headers, p4_result, p4_origin_urls = trygogs_p4 requests_mock.get(P3, text=p3_text, headers=p3_headers) requests_mock.get(P4, text=p4_text, headers=p4_headers) attempt4_stats = lister.run() assert attempt4_stats.pages == 2 assert attempt4_stats.origins == 6 lister_state = lister.get_state_from_scheduler() assert lister_state.last_seen_next_link == P4 assert lister_state.last_seen_repo_id == p4_result.repos[-1]["id"] assert lister.updated # All the 12 origins (4 pages) should be passed on to the scheduler: scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results check_listed_origins( p1_origin_urls + p2_origin_urls + p3_origin_urls + p4_origin_urls, scheduler_origins, ) diff --git a/swh/lister/pypi/tests/test_lister.py b/swh/lister/pypi/tests/test_lister.py index fefb01f..a6dac88 100644 --- a/swh/lister/pypi/tests/test_lister.py +++ b/swh/lister/pypi/tests/test_lister.py @@ -1,259 +1,252 @@ # Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from collections import defaultdict from datetime import datetime, timezone from typing import List import pytest from swh.lister.pypi.lister import ChangelogEntry, PyPILister, pypi_url from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): """Asserts that the two collections have the same origin URLs""" - - sorted_lister_urls = list(sorted(lister_urls)) - sorted_scheduler_origins = list(sorted(scheduler_origins)) - - assert len(sorted_lister_urls) == len(sorted_scheduler_origins) - - for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): - assert l_url == s_origin.url + assert set(lister_urls) == {origin.url for origin in scheduler_origins} @pytest.mark.parametrize( "credentials, expected_credentials", [ (None, []), ({"key": "value"}, []), ( {"pypi": {"pypi": [{"username": "user", "password": "pass"}]}}, [{"username": "user", "password": "pass"}], ), ], ) def test_lister_pypi_instantiation_with_credentials( credentials, expected_credentials, swh_scheduler ): lister = PyPILister(swh_scheduler, credentials=credentials) # Credentials are allowed in constructor assert lister.credentials == expected_credentials def test_lister_pypi_from_configfile(swh_scheduler_config, mocker): load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") load_from_envvar.return_value = { "scheduler": {"cls": "local", **swh_scheduler_config}, "credentials": {}, } lister = PyPILister.from_configfile() assert lister.scheduler is not None assert lister.credentials is not None def to_serial(changelog_entry: ChangelogEntry) -> int: """Helper utility to read the serial entry in the tuple Args: changelog_entry: Changelog entry to read data from Returns: The serial from the entry """ return changelog_entry[4] def configure_scheduler_state( scheduler: SchedulerInterface, data: List[ChangelogEntry] ): """Allows to pre configure a last serial state for the lister consistent with the test data set (the last_serial will be something inferior than the most minimal serial in the data set). Args: scheduler: The actual scheduler instance used during test data: The actual dataset used during test """ # Compute the lowest serial to make it a minimum state to store in the scheduler lowest_serial = min(map(to_serial, data)) # We'll need to configure the scheduler's state lister_obj = scheduler.get_or_create_lister( name=PyPILister.LISTER_NAME, instance_name=PyPILister.INSTANCE ) lister_obj.current_state = {"last_serial": lowest_serial - 10} scheduler.update_lister(lister_obj) @pytest.fixture def mock_pypi_xmlrpc(mocker, swh_scheduler): """This setups a lister so it can actually fake the call to the rpc service executed during an incremental listing. To retrieve or update the faked data, open a python3 toplevel and execute the following: .. code:: python from datetime import timezone, datetime, timedelta from xmlrpc.client import ServerProxy from swh.scheduler.utils import utcnow RPC_URL = "https://pypi.org/pypi" cli = ServerProxy(RPC_URL) last_serial = cli.changelog_last_serial() # 10854808 last_state_serial = 2168587 results = cli.changelog_since_serial(last_state_serial) Returns: the following Tuple[serial, List[PackageUpdate], MagicMock, MagicMock] type. """ data = [ ["wordsmith", None, 1465998124, "add Owner DoublePlusAwks", 2168628], ["wordsmith", "0.1", 1465998123, "new release", 2168629], ["wordsmith", "0.1", 1465998131, "update classifiers", 2168630], [ "UFx", "1.0", 1465998207, "update author_email, home_page, summary, description", 2168631, ], ["UFx", "1.0", 1465998236, "remove file UFx-1.0.tar.gz", 2168632], ["wordsmith", "0.1", 1465998309, "update classifiers", 2168633], [ "wordsmith", "0.1", 1465998406, "update summary, description, classifiers", 2168634, ], ["property-manager", "2.0", 1465998436, "new release", 2168635], [ "property-manager", "2.0", 1465998439, "add source file property-manager-2.0.tar.gz", 2168636, ], ["numtest", "2.0.0", 1465998446, "new release", 2168637], ["property-manager", "2.1", 1465998468, "new release", 2168638], [ "property-manager", "2.1", 1465998472, "add source file property-manager-2.1.tar.gz", 2168639, ], ["kafka-utils", "0.2.0", 1465998477, "new release", 2168640], [ "kafka-utils", "0.2.0", 1465998480, "add source file kafka-utils-0.2.0.tar.gz", 2168641, ], ["numtest", "2.0.1", 1465998520, "new release", 2168642], ["coala-bears", "0.3.0.dev20160615134909", 1465998552, "new release", 2168643], [ "coala-bears", "0.3.0.dev20160615134909", 1465998556, "add py3 file coala_bears-0.3.0.dev20160615134909-py3-none-any.whl", 2168644, ], ["django_sphinxsearch", "0.4.0", 1465998571, "new release", 2168645], [ "django_sphinxsearch", "0.4.0", 1465998573, "add source file django_sphinxsearch-0.4.0.tar.gz", 2168646, ], [ "coala-bears", "0.3.0.dev20160615134909", 1465998589, "add source file coala-bears-0.3.0.dev20160615134909.tar.gz", 2168647, ], ] highest_serial = min(map(to_serial, data)) def sleep(seconds): pass mocker.patch("swh.lister.pypi.lister.sleep").return_value = sleep class FakeServerProxy: """Fake Server Proxy""" def changelog_last_serial(self): return highest_serial def changelog_since_serial(self, serial): return data mock_serverproxy = mocker.patch("swh.lister.pypi.lister.ServerProxy") mock_serverproxy.return_value = FakeServerProxy() return highest_serial, data, mock_serverproxy @pytest.mark.parametrize("configure_state", [True, False]) def test_lister_pypi_run(mock_pypi_xmlrpc, swh_scheduler, configure_state): highest_serial, data, mock_serverproxy = mock_pypi_xmlrpc if configure_state: configure_scheduler_state(swh_scheduler, data) updated_packages = defaultdict(list) for [package, _, release_date, _, _] in data: updated_packages[package].append(release_date) assert len(updated_packages) > 0 expected_last_updates = { pypi_url(package): datetime.fromtimestamp(max(releases)).replace( tzinfo=timezone.utc ) for package, releases in updated_packages.items() } expected_pypi_urls = [pypi_url(package_name) for package_name in updated_packages] lister = PyPILister(scheduler=swh_scheduler) stats = lister.run() assert mock_serverproxy.called assert stats.pages == 1 assert stats.origins == len(updated_packages) scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == stats.origins check_listed_origins(expected_pypi_urls, scheduler_origins) actual_scheduler_state = lister.get_state_from_scheduler() # This new visit updated the state to the new one assert actual_scheduler_state.last_serial == highest_serial for listed_origin in scheduler_origins: assert listed_origin.last_update is not None assert listed_origin.last_update == expected_last_updates[listed_origin.url] def test__if_rate_limited(): # TODO pass diff --git a/swh/lister/tuleap/tests/test_lister.py b/swh/lister/tuleap/tests/test_lister.py index 5e74d35..16d0c7a 100644 --- a/swh/lister/tuleap/tests/test_lister.py +++ b/swh/lister/tuleap/tests/test_lister.py @@ -1,171 +1,165 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json from pathlib import Path from typing import Dict, List, Tuple import pytest import requests from swh.lister.tuleap.lister import RepoPage, TuleapLister from swh.scheduler.model import ListedOrigin TULEAP_URL = "https://tuleap.net/" TULEAP_PROJECTS_URL = TULEAP_URL + "api/projects/" TULEAP_REPO_1_URL = TULEAP_URL + "api/projects/685/git" # manjaromemodoc TULEAP_REPO_2_URL = TULEAP_URL + "api/projects/309/git" # myaurora TULEAP_REPO_3_URL = TULEAP_URL + "api/projects/1080/git" # tuleap cleanup module GIT_REPOS = ( "https://tuleap.net/plugins/git/manjaromemodoc/manjaro-memo-documentation.git", "https://tuleap.net/plugins/git/myaurora/myaurora.git", ) @pytest.fixture def tuleap_projects(datadir) -> Tuple[str, Dict[str, str], List[str]]: text = Path(datadir, "https_tuleap.net", "projects").read_text() headers = { "X-PAGINATION-LIMIT-MAX": "50", "X-PAGINATION-LIMIT": "10", "X-PAGINATION-SIZE": "2", } repo_json = json.loads(text) projects = [p["shortname"] for p in repo_json] return text, headers, projects @pytest.fixture def tuleap_repo_1(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]: text = Path(datadir, "https_tuleap.net", "repo_1").read_text() headers = { "X-PAGINATION-LIMIT-MAX": "50", "X-PAGINATION-LIMIT": "10", "X-PAGINATION-SIZE": "1", } reps = json.loads(text) page_results = [] for r in reps["repositories"]: page_results.append( TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r) ) origin_urls = [r["uri"] for r in page_results] return text, headers, page_results, origin_urls @pytest.fixture def tuleap_repo_2(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]: text = Path(datadir, "https_tuleap.net", "repo_2").read_text() headers = { "X-PAGINATION-LIMIT-MAX": "50", "X-PAGINATION-LIMIT": "10", "X-PAGINATION-SIZE": "1", } reps = json.loads(text) page_results = [] for r in reps["repositories"]: page_results.append( TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r) ) origin_urls = [r["uri"] for r in page_results] return text, headers, page_results, origin_urls @pytest.fixture def tuleap_repo_3(datadir) -> Tuple[str, Dict[str, str], List[RepoPage], List[str]]: text = Path(datadir, "https_tuleap.net", "repo_3").read_text() headers = { "X-PAGINATION-LIMIT-MAX": "50", "X-PAGINATION-LIMIT": "10", "X-PAGINATION-SIZE": "0", } reps = json.loads(text) page_results = [] for r in reps["repositories"]: page_results.append( TuleapLister.results_simplified(url=TULEAP_URL, repo_type="git", repo=r) ) origin_urls = [r["uri"] for r in page_results] return text, headers, page_results, origin_urls def check_listed_origins(lister_urls: List[str], scheduler_origins: List[ListedOrigin]): """Asserts that the two collections have the same origin URLs. Does not test last_update.""" - sorted_lister_urls = list(sorted(lister_urls)) - sorted_scheduler_origins = list(sorted(scheduler_origins)) - - assert len(sorted_lister_urls) == len(sorted_scheduler_origins) - - for l_url, s_origin in zip(sorted_lister_urls, sorted_scheduler_origins): - assert l_url == s_origin.url + assert set(lister_urls) == {origin.url for origin in scheduler_origins} def test_tuleap_full_listing( swh_scheduler, requests_mock, mocker, tuleap_projects, tuleap_repo_1, tuleap_repo_2, tuleap_repo_3, ): """Covers full listing of multiple pages, rate-limit, page size (required for test), checking page results and listed origins, statelessness.""" lister = TuleapLister( scheduler=swh_scheduler, url=TULEAP_URL, instance="tuleap.net" ) p_text, p_headers, p_projects = tuleap_projects r1_text, r1_headers, r1_result, r1_origin_urls = tuleap_repo_1 r2_text, r2_headers, r2_result, r2_origin_urls = tuleap_repo_2 r3_text, r3_headers, r3_result, r3_origin_urls = tuleap_repo_3 requests_mock.get(TULEAP_PROJECTS_URL, text=p_text, headers=p_headers) requests_mock.get(TULEAP_REPO_1_URL, text=r1_text, headers=r1_headers) requests_mock.get( TULEAP_REPO_2_URL, [ {"status_code": requests.codes.too_many_requests}, {"text": r2_text, "headers": r2_headers}, ], ) requests_mock.get(TULEAP_REPO_3_URL, text=r3_text, headers=r3_headers) # end test setup stats = lister.run() # start test checks assert stats.pages == 2 assert stats.origins == 2 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results check_listed_origins( r1_origin_urls + r2_origin_urls + r3_origin_urls, scheduler_origins ) check_listed_origins(GIT_REPOS, scheduler_origins) assert lister.get_state_from_scheduler() is None @pytest.mark.parametrize("http_code", [400, 500, 502]) def test_tuleap_list_http_error(swh_scheduler, requests_mock, http_code): """Test handling of some HTTP errors commonly encountered""" lister = TuleapLister(scheduler=swh_scheduler, url=TULEAP_URL) requests_mock.get(TULEAP_PROJECTS_URL, status_code=http_code) with pytest.raises(requests.HTTPError): lister.run() scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == 0