diff --git a/swh/lister/nuget/__init__.py b/swh/lister/nuget/__init__.py index 73aaafa..acf0e1b 100644 --- a/swh/lister/nuget/__init__.py +++ b/swh/lister/nuget/__init__.py @@ -1,79 +1,86 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ NuGet lister ============ The NuGet lister discover origins from `nuget.org`_, NuGet is the package manager for .NET. As .NET packages mostly contains binaries, we keep only track of packages that have a Dvcs repository (GIT, SVN, Mercurial...) url usable as an origin. The `nuget.org/packages`_ list 301,206 packages as of September 2022. Origins retrieving strategy --------------------------- Nuget.org provides an `http api`_ with several endpoint to discover and list packages and versions. -The recommended way to retrieve all packages is to use the `catalog`_ api endpoint. -It provides a first endpoint that list all available pages. We then iterate to get -content of related pages. +The recommended way to `retrieve all packages`_ is to use the `catalog`_ api endpoint. +It provides a `catalog index endpoint`_ that list all available pages. We then iterate to +get content of related pages. + +The lister is incremental following a `cursor`_ principle, based on the value of +``commitTimeStamp`` from the catalog index endpoint. It retrieve only pages for which +``commitTimeStamp``is greater than ``lister.state.last_listing_date``. Page listing ------------ Each page returns a list of packages which is the data of the response request. Origins from page ----------------- For each entry in a page listing we get related metadata through its `package metadata`_ http api endpoint. It returns uri for linked archives that contains binary, not the original source code. Our strategy is then to get a related GIT repository. We use another endpoint for each package to get its `package manifest`_, a .nuspec file (xml data) which may contains a GIT repository url. If we found one, it is used as origin. Running tests ------------- Activate the virtualenv and run from within swh-lister directory:: pytest -s -vv --log-cli-level=DEBUG swh/lister/nuget/tests Testing with Docker ------------------- Change directory to swh/docker then launch the docker environment:: docker compose up -d Then schedule a nuget listing task:: docker compose exec swh-scheduler swh scheduler task add -p oneshot list-nuget You can follow lister execution by displaying logs of swh-lister service:: docker compose logs -f swh-lister .. _nuget.org: https://nuget.org .. _nuget.org/packages: https://www.nuget.org/packages .. _http api: https://api.nuget.org/v3/index.json .. _catalog: https://learn.microsoft.com/en-us/nuget/api/catalog-resource +.. _catalog index endpoint: https://learn.microsoft.com/en-us/nuget/api/catalog-resource#catalog-page-object-in-the-index +.. _retrieve all packages: https://learn.microsoft.com/en-us/nuget/guides/api/query-for-all-published-packages#initialize-a-cursor +.. _cursor: https://learn.microsoft.com/en-us/nuget/api/catalog-resource#cursor .. _package metadata: https://learn.microsoft.com/en-us/nuget/api/registration-base-url-resource -.. _package manifest: https://learn.microsoft.com/en-us/nuget/api/package-base-address-resource#download-package-manifest-nuspec # noqa: B950 -""" +.. _package manifest: https://learn.microsoft.com/en-us/nuget/api/package-base-address-resource#download-package-manifest-nuspec +""" # noqa: B950 def register(): from .lister import NugetLister return { "lister": NugetLister, "task_modules": ["%s.tasks" % __name__], } diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py index 51652ec..54a6c22 100644 --- a/swh/lister/nuget/lister.py +++ b/swh/lister/nuget/lister.py @@ -1,114 +1,158 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from dataclasses import dataclass +from datetime import datetime import logging -from typing import Dict, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional from bs4 import BeautifulSoup +import iso8601 from requests.exceptions import HTTPError from swh.scheduler.interface import SchedulerInterface from swh.scheduler.model import ListedOrigin -from ..pattern import CredentialsType, StatelessLister +from ..pattern import CredentialsType, Lister logger = logging.getLogger(__name__) + # Aliasing the page results returned by `get_pages` method from the lister. NugetListerPage = List[Dict[str, str]] -class NugetLister(StatelessLister[NugetListerPage]): +@dataclass +class NugetListerState: + """Store lister state for incremental mode operations""" + + last_listing_date: Optional[datetime] = None + """Last date from main http api endpoint when lister was executed""" + + +class NugetLister(Lister[NugetListerState, NugetListerPage]): """List Nuget (Package manager for .NET) origins.""" LISTER_NAME = "nuget" INSTANCE = "nuget" API_INDEX_URL = "https://api.nuget.org/v3/catalog0/index.json" def __init__( self, scheduler: SchedulerInterface, credentials: Optional[CredentialsType] = None, ): super().__init__( scheduler=scheduler, credentials=credentials, instance=self.INSTANCE, url=self.API_INDEX_URL, ) + self.listing_date: Optional[datetime] = None + + def state_from_dict(self, d: Dict[str, Any]) -> NugetListerState: + last_listing_date = d.get("last_listing_date") + if last_listing_date is not None: + d["last_listing_date"] = iso8601.parse_date(last_listing_date) + return NugetListerState(**d) + + def state_to_dict(self, state: NugetListerState) -> Dict[str, Any]: + d: Dict[str, Optional[str]] = {"last_listing_date": None} + last_listing_date = state.last_listing_date + if last_listing_date is not None: + d["last_listing_date"] = last_listing_date.isoformat() + return d def get_pages(self) -> Iterator[NugetListerPage]: """Yield an iterator which returns 'page' It uses the following endpoint `https://api.nuget.org/v3/catalog0/index.json` to get a list of pages endpoint to iterate. """ index_response = self.http_request(url=self.url) index = index_response.json() - assert "items" in index + assert "commitTimeStamp" in index + self.listing_date = iso8601.parse_date(index["commitTimeStamp"]) + + assert "items" in index for page in index["items"]: + assert page["@id"] - try: - page_response = self.http_request(url=page["@id"]) - page_data = page_response.json() - assert "items" in page_data - yield page_data["items"] - except HTTPError: - logger.warning( - "Failed to fetch page %s, skipping it from listing.", - page["@id"], - ) - continue + assert page["commitTimeStamp"] + + commit_timestamp = iso8601.parse_date(page["commitTimeStamp"]) + + if ( + not self.state.last_listing_date + or commit_timestamp > self.state.last_listing_date + ): + try: + page_response = self.http_request(url=page["@id"]) + page_data = page_response.json() + assert "items" in page_data + yield page_data["items"] + except HTTPError: + logger.warning( + "Failed to fetch page %s, skipping it from listing.", + page["@id"], + ) + continue def get_origins_from_page(self, page: NugetListerPage) -> Iterator[ListedOrigin]: """Iterate on all pages and yield ListedOrigin instances. .NET packages are binary, dll, etc. We retrieve only packages for which we can find a vcs repository. To check if a vcs repository exists, we need for each entry in a page to retrieve a .nuspec file, which is a package metadata xml file, and search for a `repository` value. """ assert self.lister_obj.id is not None for elt in page: try: res = self.http_request(url=elt["@id"]) except HTTPError: logger.warning( "Failed to fetch page %s, skipping it from listing.", elt["@id"], ) continue data = res.json() pkgname = data["id"] nuspec_url = ( f"https://api.nuget.org/v3-flatcontainer/{pkgname.lower()}/" f"{data['version'].lower()}/{pkgname.lower()}.nuspec" ) + try: res_metadata = self.http_request(url=nuspec_url) except HTTPError: logger.warning( "Failed to fetch nuspec file %s, skipping it from listing.", nuspec_url, ) continue xml = BeautifulSoup(res_metadata.content, "xml") repo = xml.find("repository") if repo and "url" in repo.attrs and "type" in repo.attrs: vcs_url = repo.attrs["url"] vcs_type = repo.attrs["type"] + last_update = iso8601.parse_date(elt["commitTimeStamp"]) yield ListedOrigin( lister_id=self.lister_obj.id, visit_type=vcs_type, url=vcs_url, - last_update=None, + last_update=last_update, ) else: continue + + def finalize(self) -> None: + self.state.last_listing_date = self.listing_date + self.updated = True diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_moq.automock_3.5.0-ci0287_moq.automock.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_moq.automock_3.5.0-ci0287_moq.automock.nuspec new file mode 100644 index 0000000..29a4a55 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_moq.automock_3.5.0-ci0287_moq.automock.nuspec @@ -0,0 +1,25 @@ + + + + Moq.AutoMock + 3.5.0-ci0287 + Tim Kellogg, Adam Hewitt, Kevin Bost + LICENSE + https://aka.ms/deprecateLicenseUrl + https://github.com/moq/Moq.AutoMocker + An auto-mocking container that generates mocks using Moq + Copyright Tim Kellogg 2022 + + + + + + + + + + + + + + \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.10.10.04.04.00_moq.automock.3.5.0-ci0287.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.10.10.04.04.00_moq.automock.3.5.0-ci0287.json new file mode 100644 index 0000000..878b828 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.10.10.04.04.00_moq.automock.3.5.0-ci0287.json @@ -0,0 +1,187 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json", + "@type": [ + "PackageDetails", + "catalog:Permalink" + ], + "authors": "Tim Kellogg, Adam Hewitt, Kevin Bost", + "catalog:commitId": "de4b22b8-397b-4fa1-a160-db3a7c5b17cd", + "catalog:commitTimeStamp": "2022-10-10T04:04:00.6654802Z", + "copyright": "Copyright Tim Kellogg 2022", + "created": "2022-10-10T04:01:52.21Z", + "description": "An auto-mocking container that generates mocks using Moq", + "id": "Moq.AutoMock", + "isPrerelease": true, + "lastEdited": "2022-10-10T04:03:52.51Z", + "licenseFile": "LICENSE", + "licenseUrl": "https://aka.ms/deprecateLicenseUrl", + "listed": true, + "packageHash": "jtvxZ9lJGiNWCvKx4oZByy/knRu86ze833hZa2XvAbzYcSR3gSesdWgbGw1yNGDY0TuHobTETq/lorrtE2/pPA==", + "packageHashAlgorithm": "SHA512", + "packageSize": 70853, + "projectUrl": "https://github.com/moq/Moq.AutoMocker", + "published": "2022-10-10T04:01:52.21Z", + "repository": "", + "verbatimVersion": "3.5.0-ci0287", + "version": "3.5.0-ci0287", + "dependencyGroups": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netframework4.6.1", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netframework4.6.1/moq", + "@type": "PackageDependency", + "id": "Moq", + "range": "[4.18.2, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netframework4.6.1/nonblocking", + "@type": "PackageDependency", + "id": "NonBlocking", + "range": "[2.1.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netframework4.6.1/system.valuetuple", + "@type": "PackageDependency", + "id": "System.ValueTuple", + "range": "[4.5.0, )" + } + ], + "targetFramework": ".NETFramework4.6.1" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netstandard2.0", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netstandard2.0/moq", + "@type": "PackageDependency", + "id": "Moq", + "range": "[4.18.2, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#dependencygroup/.netstandard2.0/nonblocking", + "@type": "PackageDependency", + "id": "NonBlocking", + "range": "[2.1.0, )" + } + ], + "targetFramework": ".NETStandard2.0" + } + ], + "packageEntries": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#Moq.AutoMock.nuspec", + "@type": "PackageEntry", + "compressedLength": 567, + "fullName": "Moq.AutoMock.nuspec", + "length": 1287, + "name": "Moq.AutoMock.nuspec" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#lib/net461/Moq.AutoMock.dll", + "@type": "PackageEntry", + "compressedLength": 17993, + "fullName": "lib/net461/Moq.AutoMock.dll", + "length": 41984, + "name": "Moq.AutoMock.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#lib/net461/Moq.AutoMock.xml", + "@type": "PackageEntry", + "compressedLength": 5031, + "fullName": "lib/net461/Moq.AutoMock.xml", + "length": 55041, + "name": "Moq.AutoMock.xml" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#lib/netstandard2.0/Moq.AutoMock.dll", + "@type": "PackageEntry", + "compressedLength": 17927, + "fullName": "lib/netstandard2.0/Moq.AutoMock.dll", + "length": 41984, + "name": "Moq.AutoMock.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#lib/netstandard2.0/Moq.AutoMock.xml", + "@type": "PackageEntry", + "compressedLength": 5031, + "fullName": "lib/netstandard2.0/Moq.AutoMock.xml", + "length": 55041, + "name": "Moq.AutoMock.xml" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#LICENSE", + "@type": "PackageEntry", + "compressedLength": 628, + "fullName": "LICENSE", + "length": 1068, + "name": "LICENSE" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#analyzers/dotnet/cs/Moq.AutoMocker.TestGenerator.dll", + "@type": "PackageEntry", + "compressedLength": 9686, + "fullName": "analyzers/dotnet/cs/Moq.AutoMocker.TestGenerator.dll", + "length": 25088, + "name": "Moq.AutoMocker.TestGenerator.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json#.signature.p7s", + "@type": "PackageEntry", + "compressedLength": 11534, + "fullName": ".signature.p7s", + "length": 11534, + "name": ".signature.p7s" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/schema#", + "catalog": "http://schema.nuget.org/catalog#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dependencies": { + "@id": "dependency", + "@container": "@set" + }, + "dependencyGroups": { + "@id": "dependencyGroup", + "@container": "@set" + }, + "packageEntries": { + "@id": "packageEntry", + "@container": "@set" + }, + "packageTypes": { + "@id": "packageType", + "@container": "@set" + }, + "supportedFrameworks": { + "@id": "supportedFramework", + "@container": "@set" + }, + "tags": { + "@id": "tag", + "@container": "@set" + }, + "vulnerabilities": { + "@id": "vulnerability", + "@container": "@set" + }, + "published": { + "@type": "xsd:dateTime" + }, + "created": { + "@type": "xsd:dateTime" + }, + "lastEdited": { + "@type": "xsd:dateTime" + }, + "catalog:commitTimeStamp": { + "@type": "xsd:dateTime" + }, + "reasons": { + "@container": "@set" + } + } +} \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json_visit1 b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json_visit1 new file mode 100644 index 0000000..9f40584 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json_visit1 @@ -0,0 +1,46 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/index.json", + "@type": [ + "CatalogRoot", + "AppendOnlyCatalog", + "Permalink" + ], + "commitId": "b5e49ade-c7b8-482a-8a9b-3aee7bed9698", + "commitTimeStamp": "2022-10-10T04:20:52.8660454Z", + "count": 16959, + "nuget:lastCreated": "2022-10-10T04:20:52.8660454Z", + "nuget:lastDeleted": "2022-10-10T04:20:52.8660454Z", + "nuget:lastEdited": "2022-10-10T04:20:52.8660454Z", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/page17100.json", + "@type": "CatalogPage", + "commitId": "b5e49ade-c7b8-482a-8a9b-3aee7bed9698", + "commitTimeStamp": "2022-10-10T04:20:52.8660454Z", + "count": 545 + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": { + "@id": "item", + "@container": "@set" + }, + "parent": { + "@type": "@id" + }, + "commitTimeStamp": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastCreated": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastEdited": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastDeleted": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + } + } +} diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page17100.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page17100.json new file mode 100644 index 0000000..2e7eb13 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page17100.json @@ -0,0 +1,49 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/page17100.json", + "@type": "CatalogPage", + "commitId": "b5e49ade-c7b8-482a-8a9b-3aee7bed9698", + "commitTimeStamp": "2022-10-10T04:20:52.8660454Z", + "count": 545, + "parent": "https://api.nuget.org/v3/catalog0/index.json", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.04.00/moq.automock.3.5.0-ci0287.json", + "@type": "nuget:PackageDetails", + "commitId": "de4b22b8-397b-4fa1-a160-db3a7c5b17cd", + "commitTimeStamp": "2022-10-10T04:04:00.6654802Z", + "nuget:id": "Moq.AutoMock", + "nuget:version": "3.5.0-ci0287" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.10.10.04.20.52/alzabox.api.sdk.0.0.13.json", + "@type": "nuget:PackageDetails", + "commitId": "b5e49ade-c7b8-482a-8a9b-3aee7bed9698", + "commitTimeStamp": "2022-10-10T04:20:52.8660454Z", + "nuget:id": "Alzabox.API.SDK", + "nuget:version": "0.0.13" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": { + "@id": "item", + "@container": "@set" + }, + "parent": { + "@type": "@id" + }, + "commitTimeStamp": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastCreated": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastEdited": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastDeleted": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + } + } +} diff --git a/swh/lister/nuget/tests/test_lister.py b/swh/lister/nuget/tests/test_lister.py index 8c94c8e..a5488ff 100644 --- a/swh/lister/nuget/tests/test_lister.py +++ b/swh/lister/nuget/tests/test_lister.py @@ -1,34 +1,131 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.lister.nuget.lister import NugetLister expected_origins = ["https://github.com/sillsdev/libpalaso.git"] +expected_origins_incremental = ["https://github.com/moq/Moq.AutoMocker"] def test_nuget_lister(datadir, requests_mock_datadir, swh_scheduler): lister = NugetLister(scheduler=swh_scheduler) res = lister.run() assert res.pages == 2 assert res.origins == 1 scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results assert len(scheduler_origins) == len(expected_origins) assert [ ( scheduled.visit_type, scheduled.url, ) for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) ] == [ ( "git", url, ) for url in expected_origins ] + + +def test_nuget_lister_incremental(datadir, requests_mock_datadir_visits, swh_scheduler): + # First run + lister = NugetLister(scheduler=swh_scheduler) + assert lister.state.last_listing_date is None + + res = lister.run() + assert res.pages == 2 + assert res.origins == 1 + assert lister.state.last_listing_date + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert [ + ( + scheduled.visit_type, + scheduled.url, + ) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "git", + url, + ) + for url in expected_origins + ] + + last_date = lister.state.last_listing_date + + # Second run + lister = NugetLister(scheduler=swh_scheduler) + assert lister.state.last_listing_date == last_date + res = lister.run() + # One page and one new origin + assert lister.state.last_listing_date > last_date + assert res.pages == 1 + assert res.origins == 1 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert [ + ( + scheduled.visit_type, + scheduled.url, + ) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "git", + url, + ) + for url in sorted(expected_origins + expected_origins_incremental) + ] + + +def test_nuget_lister_incremental_no_changes( + datadir, requests_mock_datadir, swh_scheduler +): + # First run + lister = NugetLister(scheduler=swh_scheduler) + assert lister.state.last_listing_date is None + + res = lister.run() + assert res.pages == 2 + assert res.origins == 1 + assert lister.state.last_listing_date + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert len(scheduler_origins) == len(expected_origins) + + assert [ + ( + scheduled.visit_type, + scheduled.url, + ) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "git", + url, + ) + for url in expected_origins + ] + + last_date = lister.state.last_listing_date + + # Second run + lister = NugetLister(scheduler=swh_scheduler) + assert lister.state.last_listing_date == last_date + res = lister.run() + # Nothing new + assert lister.state.last_listing_date == last_date + assert res.pages == 0 + assert res.origins == 0