diff --git a/swh/lister/arch/tests/test_lister.py b/swh/lister/arch/tests/test_lister.py index daa8712..fa644d3 100644 --- a/swh/lister/arch/tests/test_lister.py +++ b/swh/lister/arch/tests/test_lister.py @@ -1,1394 +1,1395 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information + from swh.lister.arch.lister import ArchLister expected_origins = [ { "url": "https://archlinux.org/packages/core/x86_64/dialog", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190211-1", "length": 180000, "filename": "dialog-1:1.3_20190211-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190724-1", "length": 180000, "filename": "dialog-1:1.3_20190724-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190728-1", "length": 180000, "filename": "dialog-1:1.3_20190728-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190806-1", "length": 182000, "filename": "dialog-1:1.3_20190806-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20190808-1", "length": 182000, "filename": "dialog-1:1.3_20190808-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191110-1", "length": 183000, "filename": "dialog-1:1.3_20191110-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191110-2", "length": 183000, "filename": "dialog-1:1.3_20191110-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191209-1", "length": 183000, "filename": "dialog-1:1.3_20191209-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:1.3_20191210-1", "length": 184000, "filename": "dialog-1:1.3_20191210-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20200228-1", "length": 196000, "filename": "dialog-1:1.3_20200228-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20200327-1", "length": 196000, "filename": "dialog-1:1.3_20200327-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20201126-1", "length": 199000, "filename": "dialog-1:1.3_20201126-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210117-1", "length": 200000, "filename": "dialog-1:1.3_20210117-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210306-1", "length": 201000, "filename": "dialog-1:1.3_20210306-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210319-1", "length": 201000, "filename": "dialog-1:1.3_20210319-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210324-1", "length": 201000, "filename": "dialog-1:1.3_20210324-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210509-1", "length": 198000, "filename": "dialog-1:1.3_20210509-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210530-1", "length": 198000, "filename": "dialog-1:1.3_20210530-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20210621-1", "length": 199000, "filename": "dialog-1:1.3_20210621-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20211107-1", "length": 197000, "filename": "dialog-1:1.3_20211107-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20211214-1", "length": 197000, "filename": "dialog-1:1.3_20211214-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20220117-1", "length": 199000, "filename": "dialog-1:1.3_20220117-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/d/dialog/dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:1.3_20220414-1", "length": 198000, "filename": "dialog-1:1.3_20220414-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190211-1", "last_modified": "2019-02-13T08:36:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190724-1", "last_modified": "2019-07-26T21:39:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190728-1", "last_modified": "2019-07-29T12:10:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190806-1", "last_modified": "2019-08-07T04:19:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20190808-1", "last_modified": "2019-08-09T22:49:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191110-1", "last_modified": "2019-11-11T11:15:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191110-2", "last_modified": "2019-11-13T17:40:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191209-1", "last_modified": "2019-12-10T09:56:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20191210-1", "last_modified": "2019-12-12T15:55:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20200228-1", "last_modified": "2020-03-06T02:21:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20200327-1", "last_modified": "2020-03-29T17:08:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20201126-1", "last_modified": "2020-11-27T12:19:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210117-1", "last_modified": "2021-01-18T18:05:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210306-1", "last_modified": "2021-03-07T11:40:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210319-1", "last_modified": "2021-03-20T00:12:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210324-1", "last_modified": "2021-03-26T17:53:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210509-1", "last_modified": "2021-05-16T02:04:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210530-1", "last_modified": "2021-05-31T14:59:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20210621-1", "last_modified": "2021-06-23T02:59:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20211107-1", "last_modified": "2021-11-09T14:06:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20211214-1", "last_modified": "2021-12-14T09:26:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20220117-1", "last_modified": "2022-01-19T09:56:00", }, { "arch": "x86_64", "repo": "core", "name": "dialog", "version": "1:1.3_20220414-1", "last_modified": "2022-04-16T03:59:00", }, ], }, }, { "url": "https://archlinux.org/packages/community/x86_64/gnome-code-assistance", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-1", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-2", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-2-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-3", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-3-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:3.16.1+15+g0fd8b5f-4", "length": 2000000, "filename": "gnome-code-assistance-1:3.16.1+15+g0fd8b5f-4-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "2:3.16.1+14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "2:3.16.1+14+gaad6437-2", "length": 2000000, "filename": "gnome-code-assistance-2:3.16.1+14+gaad6437-2-x86_64.pkg.tar.zst", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+14+gaad6437-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+14+gaad6437-2", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+14+gaad6437-2-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "3.16.1+15+gb9ffc4d-1", "length": 2000000, "filename": "gnome-code-assistance-3.16.1+15+gb9ffc4d-1-x86_64.pkg.tar.xz", # noqa: B950 }, { "url": "https://archive.archlinux.org/packages/g/gnome-code-assistance/gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "3:3.16.1+r14+gaad6437-1", "length": 2000000, "filename": "gnome-code-assistance-3:3.16.1+r14+gaad6437-1-x86_64.pkg.tar.zst", # noqa: B950 }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-1", "last_modified": "2019-11-10T20:55:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-2", "last_modified": "2020-03-28T15:58:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-3", "last_modified": "2020-07-05T15:28:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "1:3.16.1+15+g0fd8b5f-4", "last_modified": "2020-11-12T17:28:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "2:3.16.1+14+gaad6437-1", "last_modified": "2021-02-24T16:30:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "2:3.16.1+14+gaad6437-2", "last_modified": "2021-12-02T23:36:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+14+gaad6437-1", "last_modified": "2019-03-15T19:23:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+14+gaad6437-2", "last_modified": "2019-08-24T20:05:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3.16.1+15+gb9ffc4d-1", "last_modified": "2019-08-25T20:55:00", }, { "arch": "x86_64", "repo": "community", "name": "gnome-code-assistance", "version": "3:3.16.1+r14+gaad6437-1", "last_modified": "2022-05-18T17:23:00", }, ], }, }, { "url": "https://archlinux.org/packages/core/x86_64/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-1", "length": 78000, "filename": "gzip-1.10-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-2", "length": 78000, "filename": "gzip-1.10-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.10-3-x86_64.pkg.tar.xz", # noqa: B950 "version": "1.10-3", "length": 78000, "filename": "gzip-1.10-3-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.11-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1.11-1", "length": 82000, "filename": "gzip-1.11-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/g/gzip/gzip-1.12-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1.12-1", "length": 80000, "filename": "gzip-1.12-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-1", "last_modified": "2018-12-30T18:38:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-2", "last_modified": "2019-10-06T16:02:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.10-3", "last_modified": "2019-11-13T15:55:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.11-1", "last_modified": "2021-09-04T02:02:00", }, { "arch": "x86_64", "repo": "core", "name": "gzip", "version": "1.12-1", "last_modified": "2022-04-07T17:35:00", }, ], }, }, { "url": "https://archlinux.org/packages/extra/x86_64/libasyncns", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz", # noqa: B950 "version": "0.8+3+g68cd5af-2", "length": 16000, "filename": "libasyncns-0.8+3+g68cd5af-2-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "0.8+3+g68cd5af-3", "length": 17000, "filename": "libasyncns-0.8+3+g68cd5af-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/l/libasyncns/libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "1:0.8+r3+g68cd5af-1", "length": 17000, "filename": "libasyncns-1:0.8+r3+g68cd5af-1-x86_64.pkg.tar.zst", # noqa: B950 }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "0.8+3+g68cd5af-2", "last_modified": "2018-11-09T23:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "0.8+3+g68cd5af-3", "last_modified": "2020-05-19T08:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "libasyncns", "version": "1:0.8+r3+g68cd5af-1", "last_modified": "2022-05-18T17:23:00", }, ], }, }, { "url": "https://archlinux.org/packages/extra/x86_64/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.8.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.8.2-1", "length": 4000000, "filename": "mercurial-4.8.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.9-1", "length": 4000000, "filename": "mercurial-4.9-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-4.9.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "4.9.1-1", "length": 4000000, "filename": "mercurial-4.9.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0-1", "length": 4000000, "filename": "mercurial-5.0-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0.1-1", "length": 4000000, "filename": "mercurial-5.0.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.0.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.0.2-1", "length": 4000000, "filename": "mercurial-5.0.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.1-1", "length": 4000000, "filename": "mercurial-5.1-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.1.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.1.2-1", "length": 4000000, "filename": "mercurial-5.1.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2-1-x86_64.pkg.tar.xz", # noqa: B950 "version": "5.2-1", "length": 4000000, "filename": "mercurial-5.2-1-x86_64.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.1-1", "length": 4000000, "filename": "mercurial-5.2.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.2-1", "length": 5000000, "filename": "mercurial-5.2.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.2.2-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.2.2-2", "length": 4000000, "filename": "mercurial-5.2.2-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3-1", "length": 5000000, "filename": "mercurial-5.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3.1-1", "length": 4000000, "filename": "mercurial-5.3.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.3.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.3.2-1", "length": 4000000, "filename": "mercurial-5.3.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4-1", "length": 5000000, "filename": "mercurial-5.4-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4-2", "length": 5000000, "filename": "mercurial-5.4-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4.1-1", "length": 5000000, "filename": "mercurial-5.4.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.4.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.4.2-1", "length": 5000000, "filename": "mercurial-5.4.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5-1", "length": 5000000, "filename": "mercurial-5.5-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5.1-1", "length": 5000000, "filename": "mercurial-5.5.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.5.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.5.2-1", "length": 5000000, "filename": "mercurial-5.5.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-1", "length": 5000000, "filename": "mercurial-5.6-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-2", "length": 5000000, "filename": "mercurial-5.6-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6-3", "length": 5000000, "filename": "mercurial-5.6-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.6.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.6.1-1", "length": 5000000, "filename": "mercurial-5.6.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.7-1", "length": 5000000, "filename": "mercurial-5.7-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.7.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.7.1-1", "length": 5000000, "filename": "mercurial-5.7.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8-1", "length": 5000000, "filename": "mercurial-5.8-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8-2", "length": 5000000, "filename": "mercurial-5.8-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.8.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.8.1-1", "length": 5000000, "filename": "mercurial-5.8.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.1-1", "length": 5000000, "filename": "mercurial-5.9.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.1-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.1-2", "length": 5000000, "filename": "mercurial-5.9.1-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.2-1", "length": 5000000, "filename": "mercurial-5.9.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-5.9.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "5.9.3-1", "length": 5000000, "filename": "mercurial-5.9.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-1", "length": 5000000, "filename": "mercurial-6.0-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-2", "length": 5000000, "filename": "mercurial-6.0-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0-3-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0-3", "length": 5000000, "filename": "mercurial-6.0-3-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.1-1", "length": 5000000, "filename": "mercurial-6.0.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.2-1", "length": 5000000, "filename": "mercurial-6.0.2-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.0.3-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.0.3-1", "length": 5000000, "filename": "mercurial-6.0.3-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1-1", "length": 5000000, "filename": "mercurial-6.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1-2-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1-2", "length": 5000000, "filename": "mercurial-6.1-2-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.1-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1.1-1", "length": 5000000, "filename": "mercurial-6.1.1-1-x86_64.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/m/mercurial/mercurial-6.1.2-1-x86_64.pkg.tar.zst", # noqa: B950 "version": "6.1.2-1", "length": 5000000, "filename": "mercurial-6.1.2-1-x86_64.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.8.2-1", "last_modified": "2019-01-15T20:31:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.9-1", "last_modified": "2019-02-12T06:15:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "4.9.1-1", "last_modified": "2019-03-30T17:40:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0-1", "last_modified": "2019-05-10T08:44:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0.1-1", "last_modified": "2019-06-10T18:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.0.2-1", "last_modified": "2019-07-10T04:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.1-1", "last_modified": "2019-08-17T19:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.1.2-1", "last_modified": "2019-10-08T08:38:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2-1", "last_modified": "2019-11-28T06:41:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.1-1", "last_modified": "2020-01-06T12:35:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.2-1", "last_modified": "2020-01-15T14:07:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.2.2-2", "last_modified": "2020-01-30T20:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3-1", "last_modified": "2020-02-13T21:40:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3.1-1", "last_modified": "2020-03-07T23:58:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.3.2-1", "last_modified": "2020-04-05T17:48:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4-1", "last_modified": "2020-05-10T17:19:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4-2", "last_modified": "2020-06-04T13:38:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4.1-1", "last_modified": "2020-06-06T12:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.4.2-1", "last_modified": "2020-07-02T21:35:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5-1", "last_modified": "2020-08-05T10:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5.1-1", "last_modified": "2020-09-03T19:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.5.2-1", "last_modified": "2020-10-07T20:05:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-1", "last_modified": "2020-11-03T17:26:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-2", "last_modified": "2020-11-09T16:54:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6-3", "last_modified": "2020-11-11T15:20:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.6.1-1", "last_modified": "2020-12-05T12:29:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.7-1", "last_modified": "2021-02-04T08:41:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.7.1-1", "last_modified": "2021-03-11T07:51:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8-1", "last_modified": "2021-05-04T17:55:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8-2", "last_modified": "2021-05-08T22:08:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.8.1-1", "last_modified": "2021-07-13T07:04:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.1-1", "last_modified": "2021-09-01T12:48:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.1-2", "last_modified": "2021-09-24T17:39:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.2-1", "last_modified": "2021-10-07T21:52:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "5.9.3-1", "last_modified": "2021-10-27T07:20:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-1", "last_modified": "2021-11-25T17:10:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-2", "last_modified": "2021-11-30T20:53:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0-3", "last_modified": "2021-12-02T12:06:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.1-1", "last_modified": "2022-01-08T10:07:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.2-1", "last_modified": "2022-02-03T13:28:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.0.3-1", "last_modified": "2022-02-23T20:50:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1-1", "last_modified": "2022-03-03T18:06:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1-2", "last_modified": "2022-03-04T08:37:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1.1-1", "last_modified": "2022-04-07T18:26:00", }, { "arch": "x86_64", "repo": "extra", "name": "mercurial", "version": "6.1.2-1", "last_modified": "2022-05-07T11:03:00", }, ], }, }, { "url": "https://archlinux.org/packages/community/any/python-hglib", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.1-3-any.pkg.tar.xz", # noqa: B950 "version": "2.6.1-3", "length": 40000, "filename": "python-hglib-2.6.1-3-any.pkg.tar.xz", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-1-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-1", "length": 43000, "filename": "python-hglib-2.6.2-1-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-2-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-2", "length": 43000, "filename": "python-hglib-2.6.2-2-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-3-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-3", "length": 43000, "filename": "python-hglib-2.6.2-3-any.pkg.tar.zst", }, { "url": "https://archive.archlinux.org/packages/p/python-hglib/python-hglib-2.6.2-4-any.pkg.tar.zst", # noqa: B950 "version": "2.6.2-4", "length": 43000, "filename": "python-hglib-2.6.2-4-any.pkg.tar.zst", }, ], "arch_metadata": [ { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.1-3", "last_modified": "2019-11-06T14:08:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-1", "last_modified": "2020-11-19T22:29:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-2", "last_modified": "2020-11-19T22:31:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-3", "last_modified": "2020-11-19T22:35:00", }, { "arch": "any", "repo": "community", "name": "python-hglib", "version": "2.6.2-4", "last_modified": "2021-12-03T00:44:00", }, ], }, }, { "url": "https://archlinuxarm.org/packages/aarch64/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/aarch64/core/gzip-1.12-1-aarch64.pkg.tar.xz", # noqa: B950 "length": 79640, "version": "1.12-1", "filename": "gzip-1.12-1-aarch64.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "aarch64", "name": "gzip", "repo": "core", "version": "1.12-1", "last_modified": "2022-04-07T21:08:14", } ], }, }, { "url": "https://archlinuxarm.org/packages/aarch64/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/aarch64/extra/mercurial-6.1.3-1-aarch64.pkg.tar.xz", # noqa: B950 "length": 4931228, "version": "6.1.3-1", "filename": "mercurial-6.1.3-1-aarch64.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "aarch64", "name": "mercurial", "repo": "extra", "version": "6.1.3-1", "last_modified": "2022-06-02T22:15:18", } ], }, }, { "url": "https://archlinuxarm.org/packages/any/python-hglib", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/any/community/python-hglib-2.6.2-4-any.pkg.tar.xz", # noqa: B950 "length": 41432, "version": "2.6.2-4", "filename": "python-hglib-2.6.2-4-any.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "any", "name": "python-hglib", "repo": "community", "version": "2.6.2-4", "last_modified": "2021-12-14T16:22:20", } ], }, }, { "url": "https://archlinuxarm.org/packages/armv7h/gzip", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/armv7h/core/gzip-1.12-1-armv7h.pkg.tar.xz", # noqa: B950 "length": 78468, "version": "1.12-1", "filename": "gzip-1.12-1-armv7h.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "armv7h", "name": "gzip", "repo": "core", "version": "1.12-1", "last_modified": "2022-04-07T21:08:35", } ], }, }, { "url": "https://archlinuxarm.org/packages/armv7h/mercurial", "visit_type": "arch", "extra_loader_arguments": { "artifacts": [ { "url": "https://uk.mirror.archlinuxarm.org/armv7h/extra/mercurial-6.1.3-1-armv7h.pkg.tar.xz", # noqa: B950 "length": 4897816, "version": "6.1.3-1", "filename": "mercurial-6.1.3-1-armv7h.pkg.tar.xz", } ], "arch_metadata": [ { "arch": "armv7h", "name": "mercurial", "repo": "extra", "version": "6.1.3-1", "last_modified": "2022-06-02T22:13:08", } ], }, }, ] def test_arch_lister(datadir, requests_mock_datadir, swh_scheduler): lister = ArchLister(scheduler=swh_scheduler) res = lister.run() assert res.pages == 9 - assert res.origins == 12 + assert res.origins == 11 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert [ ( scheduled.visit_type, scheduled.url, scheduled.extra_loader_arguments["artifacts"], scheduled.extra_loader_arguments["arch_metadata"], ) for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) ] == [ ( "arch", expected["url"], expected["extra_loader_arguments"]["artifacts"], expected["extra_loader_arguments"]["arch_metadata"], ) for expected in sorted(expected_origins, key=lambda expected: expected["url"]) ] diff --git a/swh/lister/conda/tests/test_lister.py b/swh/lister/conda/tests/test_lister.py index 0a67ce3..244d61a 100644 --- a/swh/lister/conda/tests/test_lister.py +++ b/swh/lister/conda/tests/test_lister.py @@ -1,94 +1,94 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.lister.conda.lister import CondaLister def test_conda_lister_free_channel(datadir, requests_mock_datadir, swh_scheduler): lister = CondaLister( scheduler=swh_scheduler, channel="free", archs=["linux-64", "osx-64", "win-64"] ) res = lister.run() assert res.pages == 3 - assert res.origins == 14 + assert res.origins == 11 def test_conda_lister_conda_forge_channel( datadir, requests_mock_datadir, swh_scheduler ): lister = CondaLister( scheduler=swh_scheduler, url="https://conda.anaconda.org", channel="conda-forge", archs=["linux-64"], ) res = lister.run() assert res.pages == 1 assert res.origins == 2 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results expected_origins = [ { "url": "https://anaconda.org/conda-forge/21cmfast", "artifacts": { "linux-64/3.0.2-py36h1af98f8_1": { "url": "https://conda.anaconda.org/conda-forge/linux-64/21cmfast-3.0.2-py36h1af98f8_1.tar.bz2", # noqa: B950 "date": "2020-11-11T16:04:49.658000+00:00", "version": "3.0.2", "filename": "21cmfast-3.0.2-py36h1af98f8_1.tar.bz2", "checksums": { "md5": "d65ab674acf3b7294ebacaec05fc5b54", "sha256": "1154fceeb5c4ee9bb97d245713ac21eb1910237c724d2b7103747215663273c2", # noqa: B950 }, } }, }, { "url": "https://anaconda.org/conda-forge/lifetimes", "artifacts": { "linux-64/0.11.1-py36h9f0ad1d_1": { "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2", # noqa: B950 "date": "2020-07-06T12:19:36.425000+00:00", "version": "0.11.1", "filename": "lifetimes-0.11.1-py36h9f0ad1d_1.tar.bz2", "checksums": { "md5": "faa398f7ba0d60ce44aa6eeded490cee", "sha256": "f82a352dfae8abceeeaa538b220fd9c5e4aa4e59092a6a6cea70b9ec0581ea03", # noqa: B950 }, }, "linux-64/0.11.1-py36hc560c46_1": { "url": "https://conda.anaconda.org/conda-forge/linux-64/lifetimes-0.11.1-py36hc560c46_1.tar.bz2", # noqa: B950 "date": "2020-07-06T12:19:37.032000+00:00", "version": "0.11.1", "filename": "lifetimes-0.11.1-py36hc560c46_1.tar.bz2", "checksums": { "md5": "c53a689a4c5948e84211bdfc23e3fe68", "sha256": "76146c2ebd6e3b65928bde53a2585287759d77beba785c0eeb889ee565c0035d", # noqa: B950 }, }, }, }, ] assert len(scheduler_origins) == len(expected_origins) assert [ ( scheduled.visit_type, scheduled.url, scheduled.extra_loader_arguments["artifacts"], ) for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) ] == [ ( "conda", expected["url"], expected["artifacts"], ) for expected in sorted(expected_origins, key=lambda expected: expected["url"]) ] diff --git a/swh/lister/pattern.py b/swh/lister/pattern.py index d188896..7492683 100644 --- a/swh/lister/pattern.py +++ b/swh/lister/pattern.py @@ -1,316 +1,321 @@ # Copyright (C) 2020-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from __future__ import annotations from dataclasses import dataclass import logging -from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, TypeVar +from typing import Any, Dict, Generic, Iterable, Iterator, List, Optional, Set, TypeVar from urllib.parse import urlparse import requests from tenacity.before_sleep import before_sleep_log from swh.core.config import load_from_envvar from swh.core.utils import grouper from swh.scheduler import get_scheduler, model from swh.scheduler.interface import SchedulerInterface from . import USER_AGENT_TEMPLATE from .utils import http_retry logger = logging.getLogger(__name__) @dataclass class ListerStats: pages: int = 0 origins: int = 0 def __add__(self, other: ListerStats) -> ListerStats: return self.__class__(self.pages + other.pages, self.origins + other.origins) def __iadd__(self, other: ListerStats): self.pages += other.pages self.origins += other.origins def dict(self) -> Dict[str, int]: return {"pages": self.pages, "origins": self.origins} StateType = TypeVar("StateType") PageType = TypeVar("PageType") BackendStateType = Dict[str, Any] CredentialsType = Optional[Dict[str, Dict[str, List[Dict[str, str]]]]] class Lister(Generic[StateType, PageType]): """The base class for a Software Heritage lister. A lister scrapes a page by page list of origins from an upstream (a forge, the API of a package manager, ...), and massages the results of that scrape into a list of origins that are recorded by the scheduler backend. The main loop of the lister, :meth:`run`, basically revolves around the :meth:`get_pages` iterator, which sets up the lister state, then yields the scrape results page by page. The :meth:`get_origins_from_page` method converts the pages into a list of :class:`model.ListedOrigin`, sent to the scheduler at every page. The :meth:`commit_page` method can be used to update the lister state after a page of origins has been recorded in the scheduler backend. The :func:`finalize` method is called at lister teardown (whether the run has been successful or not) to update the local :attr:`state` object before it's sent to the database. This method must set the :attr:`updated` attribute if an updated state needs to be sent to the scheduler backend. This method can call :func:`get_state_from_scheduler` to refresh and merge the lister state from the scheduler before it's finalized (and potentially minimize the risk of race conditions between concurrent runs of the lister). The state of the lister is serialized and deserialized from the dict stored in the scheduler backend, using the :meth:`state_from_dict` and :meth:`state_to_dict` methods. Args: scheduler: the instance of the Scheduler being used to register the origins listed by this lister url: a URL representing this lister, e.g. the API's base URL instance: the instance name, to uniquely identify this lister instance, if not provided the URL network location will be used credentials: dictionary of credentials for all listers. The first level identifies the :attr:`LISTER_NAME`, the second level the lister :attr:`instance`. The final level is a list of dicts containing the expected credentials for the given instance of that lister. Generic types: - *StateType*: concrete lister type; should usually be a :class:`dataclass` for stricter typing - *PageType*: type of scrape results; can usually be a :class:`requests.Response`, or a :class:`dict` """ LISTER_NAME: str = "" def __init__( self, scheduler: SchedulerInterface, url: str, instance: Optional[str] = None, credentials: CredentialsType = None, ): if not self.LISTER_NAME: raise ValueError("Must set the LISTER_NAME attribute on Lister classes") self.url = url if instance is not None: self.instance = instance else: self.instance = urlparse(url).netloc self.scheduler = scheduler if not credentials: credentials = {} self.credentials = list( credentials.get(self.LISTER_NAME, {}).get(self.instance, []) ) # store the initial state of the lister self.state = self.get_state_from_scheduler() self.updated = False self.session = requests.Session() # Declare the USER_AGENT is more sysadm-friendly for the forge we list self.session.headers.update( {"User-Agent": USER_AGENT_TEMPLATE % self.LISTER_NAME} ) + self.recorded_origins: Set[str] = set() + @http_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) def http_request(self, url: str, method="GET", **kwargs) -> requests.Response: logger.debug("Fetching URL %s with params %s", url, kwargs.get("params")) response = self.session.request(method, url, **kwargs) if response.status_code not in (200, 304): logger.warning( "Unexpected HTTP status code %s on %s: %s", response.status_code, response.url, response.content, ) response.raise_for_status() return response def run(self) -> ListerStats: """Run the lister. Returns: A counter with the number of pages and origins seen for this run of the lister. """ full_stats = ListerStats() + self.recorded_origins = set() try: for page in self.get_pages(): full_stats.pages += 1 origins = self.get_origins_from_page(page) - full_stats.origins += self.send_origins(origins) + sent_origins = self.send_origins(origins) + self.recorded_origins.update(sent_origins) + full_stats.origins = len(self.recorded_origins) self.commit_page(page) finally: self.finalize() if self.updated: self.set_state_in_scheduler() return full_stats def get_state_from_scheduler(self) -> StateType: """Update the state in the current instance from the state in the scheduler backend. This updates :attr:`lister_obj`, and returns its (deserialized) current state, to allow for comparison with the local state. Returns: the state retrieved from the scheduler backend """ self.lister_obj = self.scheduler.get_or_create_lister( name=self.LISTER_NAME, instance_name=self.instance ) return self.state_from_dict(self.lister_obj.current_state) def set_state_in_scheduler(self) -> None: """Update the state in the scheduler backend from the state of the current instance. Raises: swh.scheduler.exc.StaleData: in case of a race condition between concurrent listers (from :meth:`swh.scheduler.Scheduler.update_lister`). """ self.lister_obj.current_state = self.state_to_dict(self.state) self.lister_obj = self.scheduler.update_lister(self.lister_obj) # State management to/from the scheduler def state_from_dict(self, d: BackendStateType) -> StateType: """Convert the state stored in the scheduler backend (as a dict), to the concrete StateType for this lister.""" raise NotImplementedError def state_to_dict(self, state: StateType) -> BackendStateType: """Convert the StateType for this lister to its serialization as dict for storage in the scheduler. Values must be JSON-compatible as that's what the backend database expects. """ raise NotImplementedError def finalize(self) -> None: """Custom hook to finalize the lister state before returning from the main loop. This method must set :attr:`updated` if the lister has done some work. If relevant, this method can use :meth`get_state_from_scheduler` to merge the current lister state with the one from the scheduler backend, reducing the risk of race conditions if we're running concurrent listings. This method is called in a `finally` block, which means it will also run when the lister fails. """ pass # Actual listing logic def get_pages(self) -> Iterator[PageType]: """Retrieve a list of pages of listed results. This is the main loop of the lister. Returns: an iterator of raw pages fetched from the platform currently being listed. """ raise NotImplementedError def get_origins_from_page(self, page: PageType) -> Iterator[model.ListedOrigin]: """Extract a list of :class:`model.ListedOrigin` from a raw page of results. Args: page: a single page of results Returns: an iterator for the origins present on the given page of results """ raise NotImplementedError def commit_page(self, page: PageType) -> None: """Custom hook called after the current page has been committed in the scheduler backend. This method can be used to update the state after a page of origins has been successfully recorded in the scheduler backend. If the new state should be recorded at the point the lister completes, the :attr:`updated` attribute must be set. """ pass - def send_origins(self, origins: Iterable[model.ListedOrigin]) -> int: + def send_origins(self, origins: Iterable[model.ListedOrigin]) -> List[str]: """Record a list of :class:`model.ListedOrigin` in the scheduler. Returns: - the number of listed origins recorded in the scheduler + the list of origin URLs recorded in scheduler database """ - count = 0 + recorded_origins = [] for batch_origins in grouper(origins, n=1000): ret = self.scheduler.record_listed_origins(batch_origins) - count += len(ret) + recorded_origins += [origin.url for origin in ret] - return count + return recorded_origins @classmethod def from_config(cls, scheduler: Dict[str, Any], **config: Any): """Instantiate a lister from a configuration dict. This is basically a backwards-compatibility shim for the CLI. Args: scheduler: instantiation config for the scheduler config: the configuration dict for the lister, with the following keys: - credentials (optional): credentials list for the scheduler - any other kwargs passed to the lister. Returns: the instantiated lister """ # Drop the legacy config keys which aren't used for this generation of listers. for legacy_key in ("storage", "lister", "celery"): config.pop(legacy_key, None) # Instantiate the scheduler scheduler_instance = get_scheduler(**scheduler) return cls(scheduler=scheduler_instance, **config) @classmethod def from_configfile(cls, **kwargs: Any): """Instantiate a lister from the configuration loaded from the SWH_CONFIG_FILENAME envvar, with potential extra keyword arguments if their value is not None. Args: kwargs: kwargs passed to the lister instantiation """ config = dict(load_from_envvar()) config.update({k: v for k, v in kwargs.items() if v is not None}) return cls.from_config(**config) class StatelessLister(Lister[None, PageType], Generic[PageType]): def state_from_dict(self, d: BackendStateType) -> None: """Always return empty state""" return None def state_to_dict(self, state: None) -> BackendStateType: """Always set empty state""" return {} diff --git a/swh/lister/tests/test_pattern.py b/swh/lister/tests/test_pattern.py index 192f8f7..554a8d1 100644 --- a/swh/lister/tests/test_pattern.py +++ b/swh/lister/tests/test_pattern.py @@ -1,200 +1,217 @@ # Copyright (C) 2020-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import TYPE_CHECKING, Any, Dict, Iterator, List import pytest from swh.lister import pattern from swh.scheduler.model import ListedOrigin StateType = Dict[str, str] OriginType = Dict[str, str] PageType = List[OriginType] class InstantiableLister(pattern.Lister[StateType, PageType]): """A lister that can only be instantiated, not run.""" LISTER_NAME = "test-pattern-lister" def state_from_dict(self, d: Dict[str, str]) -> StateType: return d def test_instantiation(swh_scheduler): lister = InstantiableLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com" ) # check the lister was registered in the scheduler backend stored_lister = swh_scheduler.get_or_create_lister( name="test-pattern-lister", instance_name="example.com" ) assert stored_lister == lister.lister_obj with pytest.raises(NotImplementedError): lister.run() def test_lister_instance_name(swh_scheduler): lister = InstantiableLister( scheduler=swh_scheduler, url="https://example.org", instance="example" ) assert lister.instance == "example" lister = InstantiableLister(scheduler=swh_scheduler, url="https://example.org") assert lister.instance == "example.org" def test_instantiation_from_configfile(swh_scheduler, mocker): mock_load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar") mock_get_scheduler = mocker.patch("swh.lister.pattern.get_scheduler") mock_load_from_envvar.return_value = { "scheduler": {}, "url": "foo", "instance": "bar", } mock_get_scheduler.return_value = swh_scheduler lister = InstantiableLister.from_configfile() assert lister.url == "foo" assert lister.instance == "bar" lister = InstantiableLister.from_configfile(url="bar", instance="foo") assert lister.url == "bar" assert lister.instance == "foo" lister = InstantiableLister.from_configfile(url=None, instance="foo") assert lister.url == "foo" assert lister.instance == "foo" if TYPE_CHECKING: _Base = pattern.Lister[Any, PageType] else: _Base = object class ListerMixin(_Base): def get_pages(self) -> Iterator[PageType]: for pageno in range(2): yield [ {"url": f"https://example.com/{pageno:02d}{i:03d}"} for i in range(10) ] def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]: assert self.lister_obj.id is not None for origin in page: yield ListedOrigin( lister_id=self.lister_obj.id, url=origin["url"], visit_type="git" ) def check_listed_origins(swh_scheduler, lister, stored_lister): """Check that the listed origins match the ones in the lister""" # Gather the origins that are supposed to be listed lister_urls = sorted( sum([[o["url"] for o in page] for page in lister.get_pages()], []) ) # And check the state of origins in the scheduler ret = swh_scheduler.get_listed_origins() assert ret.next_page_token is None assert len(ret.results) == len(lister_urls) for origin, expected_url in zip(ret.results, lister_urls): assert origin.url == expected_url assert origin.lister_id == stored_lister.id class RunnableLister(ListerMixin, InstantiableLister): """A lister that can be run.""" def state_to_dict(self, state: StateType) -> Dict[str, str]: return state def finalize(self) -> None: self.state["updated"] = "yes" self.updated = True def test_run(swh_scheduler): lister = RunnableLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com" ) assert "updated" not in lister.state update_date = lister.lister_obj.updated run_result = lister.run() assert run_result.pages == 2 assert run_result.origins == 20 stored_lister = swh_scheduler.get_or_create_lister( name="test-pattern-lister", instance_name="example.com" ) # Check that the finalize operation happened assert stored_lister.updated > update_date assert stored_lister.current_state["updated"] == "yes" check_listed_origins(swh_scheduler, lister, stored_lister) class InstantiableStatelessLister(pattern.StatelessLister[PageType]): LISTER_NAME = "test-stateless-lister" def test_stateless_instantiation(swh_scheduler): lister = InstantiableStatelessLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com", ) # check the lister was registered in the scheduler backend stored_lister = swh_scheduler.get_or_create_lister( name="test-stateless-lister", instance_name="example.com" ) assert stored_lister == lister.lister_obj assert stored_lister.current_state == {} assert lister.state is None with pytest.raises(NotImplementedError): lister.run() class RunnableStatelessLister(ListerMixin, InstantiableStatelessLister): def finalize(self): self.updated = True def test_stateless_run(swh_scheduler): lister = RunnableStatelessLister( scheduler=swh_scheduler, url="https://example.com", instance="example.com" ) update_date = lister.lister_obj.updated run_result = lister.run() assert run_result.pages == 2 assert run_result.origins == 20 stored_lister = swh_scheduler.get_or_create_lister( name="test-stateless-lister", instance_name="example.com" ) # Check that the finalize operation happened assert stored_lister.updated > update_date assert stored_lister.current_state == {} # And that all origins are stored check_listed_origins(swh_scheduler, lister, stored_lister) + + +class ListerWithSameOriginInMultiplePages(RunnableStatelessLister): + def get_pages(self) -> Iterator[PageType]: + for _ in range(2): + yield [{"url": "https://example.org/user/project"}] + + +def test_listed_origins_count(swh_scheduler): + lister = ListerWithSameOriginInMultiplePages( + scheduler=swh_scheduler, url="https://example.org", instance="example.org" + ) + + run_result = lister.run() + + assert run_result.pages == 2 + assert run_result.origins == 1