diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -70,6 +70,7 @@ lister.golang=swh.lister.golang:register lister.launchpad=swh.lister.launchpad:register lister.npm=swh.lister.npm:register + lister.nuget=swh.lister.nuget:register lister.opam=swh.lister.opam:register lister.packagist=swh.lister.packagist:register lister.phabricator=swh.lister.phabricator:register diff --git a/swh/lister/nuget/__init__.py b/swh/lister/nuget/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/__init__.py @@ -0,0 +1,20 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +""" +Nuget lister +============ + +""" + + +def register(): + from .lister import NugetLister + + return { + "lister": NugetLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/lister.py @@ -0,0 +1,148 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +import logging +from typing import Any, Dict, Iterator, List, Optional + +from bs4 import BeautifulSoup +import requests +from requests.exceptions import HTTPError +from tenacity.before_sleep import before_sleep_log + +from swh.lister.utils import throttling_retry +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from .. import __version__ +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing the page results returned by `get_pages` method from the lister. +NugetListerPage = List[Dict[str, str]] + +USER_AGENT = ( + f"Software Heritage Nuget Lister v{__version__} " + "(+https://www.softwareheritage.org/contact)" +) + + +class NugetLister(StatelessLister[NugetListerPage]): + """List Nuget (Package manager for .NET) origins.""" + + LISTER_NAME = "nuget" + VISIT_TYPE = "git" # Nuget origins url are Git repositories + INSTANCE = "nuget" + + API_INDEX_URL = "https://api.nuget.org/v3/catalog0/index.json" + + def __init__( + self, + scheduler: SchedulerInterface, + credentials: Optional[CredentialsType] = None, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + instance=self.INSTANCE, + url=self.API_INDEX_URL, + ) + self.session = requests.Session() + self.session.headers.update( + { + "Accept": "application/json", + "User-Agent": USER_AGENT, + } + ) + + @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING)) + def page_request(self, url: str, params: Dict[str, Any]) -> requests.Response: + + logger.info("Fetching URL %s with params %s", url, params) + + response = self.session.get(url, params=params) + if response.status_code != 200: + logger.warning( + "Unexpected HTTP status code %s on %s: %s", + response.status_code, + response.url, + response.content, + ) + response.raise_for_status() + + return response + + def get_pages(self) -> Iterator[NugetListerPage]: + """Yield an iterator which returns 'page' + + It uses the following endpoint `https://api.nuget.org/v3/catalog0/index.json` + to get a list of pages endpoint to iterate. + """ + index_response = self.page_request(url=self.url, params={}) + index = index_response.json() + assert "items" in index + + for page in index["items"]: + assert page["@id"] + try: + page_response = self.page_request(url=page["@id"], params={}) + page_data = page_response.json() + assert "items" in page_data + yield page_data["items"] + except HTTPError: + logger.warning( + "Failed to fetch page %s, skipping it from listing.", + page["@id"], + ) + continue + + def get_origins_from_page(self, page: NugetListerPage) -> Iterator[ListedOrigin]: + """Iterate on all pages and yield ListedOrigin instances. + .NET packages are binary, dll, etc. We retrieve only packages for which we can + find a vcs repository. + + To check if a vcs repository exists, we need for each entry in a page to retrieve + a .nuspec file, which is a package metadata xml file, and search for a `repository` + value. + """ + assert self.lister_obj.id is not None + + for elt in page: + try: + res = self.page_request(url=elt["@id"], params={}) + except HTTPError: + logger.warning( + "Failed to fetch page %s, skipping it from listing.", + elt["@id"], + ) + continue + + data = res.json() + pkgname = data["id"] + nuspec_url = ( + f"https://api.nuget.org/v3-flatcontainer/{pkgname.lower()}/" + f"{data['version'].lower()}/{pkgname.lower()}.nuspec" + ) + try: + res_metadata = self.page_request(url=elt["@id"], params={}) + except HTTPError: + logger.warning( + "Failed to fetch nuspec file %s, skipping it from listing.", + nuspec_url, + ) + continue + metadata = res_metadata.content + xml = BeautifulSoup(metadata, "xml") + repo = xml.find("repository") + if repo and "url" in repo.attrs and "type" in repo.attrs: + vcs_url = repo.attrs["url"] + vcs_type = repo.attrs["type"] + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=vcs_type, + url=vcs_url, + last_update=None, + ) + else: + continue diff --git a/swh/lister/nuget/tasks.py b/swh/lister/nuget/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tasks.py @@ -0,0 +1,19 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.nuget.lister import NugetLister + + +@shared_task(name=__name__ + ".NugetListerTask") +def list_nuget(**lister_args): + """Lister task for Nuget (Javascript package manager) registry""" + return NugetLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/nuget/tests/__init__.py b/swh/lister/nuget/tests/__init__.py new file mode 100644 diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec @@ -0,0 +1,23 @@ + + + + Intersoft.Crosslight.Logging.EntityFramework + 5.0.5000.1235-experimental + Intersoft Crosslight Logging EntityFramework + Intersoft Solutions + Intersoft Solutions + false + http://www.intersoftsolutions.com/Products/Licensing + http://www.intersoftsolutions.com/Crosslight + http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png + Provides server logging functionality that use Entity Framework for the storage. + + Copyright © 2016 Intersoft Solutions + Intersoft Crosslight Logging Entity Framework EF PCL Portable Class Library Log + + + + + + + \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json @@ -0,0 +1,189 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json", + "@type": [ + "PackageDetails", + "catalog:Permalink" + ], + "authors": "SIL International", + "catalog:commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "catalog:commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "copyright": "Copyright © 2010-2021 SIL International", + "created": "2022-08-25T21:03:44.337Z", + "description": "SIL.Core.Desktop provides general UI related utilities for language software.", + "frameworkAssemblyGroup": { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#frameworkassemblygroup/.netframework4.6.1", + "assembly": [ + "System.Configuration", + "System.Management", + "System.Security" + ], + "targetFramework": ".NETFramework4.6.1" + }, + "id": "SIL.Core.Desktop", + "isPrerelease": true, + "lastEdited": "2022-09-23T08:07:21.247Z", + "licenseExpression": "MIT", + "licenseUrl": "https://licenses.nuget.org/MIT", + "listed": false, + "packageHash": "VeWqaDhGa/Y2dPKmeujabcBtTV2itT0sicfQnt5ZMESHddQg1S0T3Rm+8QDsGx7H7hGVHM/lTEbblOJ3LwyBBQ==", + "packageHashAlgorithm": "SHA512", + "packageSize": 68053, + "projectUrl": "https://github.com/sillsdev/libpalaso", + "published": "1900-01-01T00:00:00Z", + "releaseNotes": "Changes since version 10.0.0\n\nAdded:\n- [SIL.Core] Added SIL.PlatformUtilities.Platform.IsFlatpak property.\n- [SIL.Core.Desktop] Added Testing channel to UpdateSettings.\n\nFixed:\n- [SIL.Core] Fixed SIL.IO.PathUtilities.DeleteToRecycleBin and .GetDefaultFileManager to work in a flatpak environment.\n- [SIL.Windows.Forms] Fixed ImageToolbox.ImageGallery.ImageCollectionManager.FromStandardLocations to work in a flatpak environment.\n- [SIL.WritingSystems] Fixed SLDR initialization for users with European number formats.\n\nSee full changelog at https://github.com/sillsdev/libpalaso/blob/master/CHANGELOG.md", + "repository": "", + "verbatimVersion": "10.0.1-beta0012", + "version": "10.0.1-beta0012", + "dependencyGroups": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/sil.core", + "@type": "PackageDependency", + "id": "SIL.Core", + "range": "[10.0.1-beta0012, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/ndesk.dbus", + "@type": "PackageDependency", + "id": "NDesk.DBus", + "range": "[0.15.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.configuration.configurationmanager", + "@type": "PackageDependency", + "id": "System.Configuration.ConfigurationManager", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.drawing.common", + "@type": "PackageDependency", + "id": "System.Drawing.Common", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.io.filesystem.accesscontrol", + "@type": "PackageDependency", + "id": "System.IO.FileSystem.AccessControl", + "range": "[5.0.0, )" + } + ], + "targetFramework": ".NETFramework4.6.1" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/sil.core", + "@type": "PackageDependency", + "id": "SIL.Core", + "range": "[10.0.1-beta0012, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.configuration.configurationmanager", + "@type": "PackageDependency", + "id": "System.Configuration.ConfigurationManager", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.drawing.common", + "@type": "PackageDependency", + "id": "System.Drawing.Common", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.io.filesystem.accesscontrol", + "@type": "PackageDependency", + "id": "System.IO.FileSystem.AccessControl", + "range": "[5.0.0, )" + } + ], + "targetFramework": ".NETStandard2.0" + } + ], + "packageEntries": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#SIL.Core.Desktop.nuspec", + "@type": "PackageEntry", + "compressedLength": 984, + "fullName": "SIL.Core.Desktop.nuspec", + "length": 2835, + "name": "SIL.Core.Desktop.nuspec" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/net461/SIL.Core.Desktop.dll", + "@type": "PackageEntry", + "compressedLength": 31890, + "fullName": "lib/net461/SIL.Core.Desktop.dll", + "length": 75776, + "name": "SIL.Core.Desktop.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/netstandard2.0/SIL.Core.Desktop.dll", + "@type": "PackageEntry", + "compressedLength": 23793, + "fullName": "lib/netstandard2.0/SIL.Core.Desktop.dll", + "length": 55296, + "name": "SIL.Core.Desktop.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#.signature.p7s", + "@type": "PackageEntry", + "compressedLength": 9465, + "fullName": ".signature.p7s", + "length": 9465, + "name": ".signature.p7s" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/schema#", + "catalog": "http://schema.nuget.org/catalog#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dependencies": { + "@id": "dependency", + "@container": "@set" + }, + "dependencyGroups": { + "@id": "dependencyGroup", + "@container": "@set" + }, + "packageEntries": { + "@id": "packageEntry", + "@container": "@set" + }, + "packageTypes": { + "@id": "packageType", + "@container": "@set" + }, + "supportedFrameworks": { + "@id": "supportedFramework", + "@container": "@set" + }, + "tags": { + "@id": "tag", + "@container": "@set" + }, + "vulnerabilities": { + "@id": "vulnerability", + "@container": "@set" + }, + "published": { + "@type": "xsd:dateTime" + }, + "created": { + "@type": "xsd:dateTime" + }, + "lastEdited": { + "@type": "xsd:dateTime" + }, + "catalog:commitTimeStamp": { + "@type": "xsd:dateTime" + }, + "reasons": { + "@container": "@set" + } + } +} \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json @@ -0,0 +1,142 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json", + "@type": [ + "PackageDetails", + "catalog:Permalink" + ], + "authors": "Intersoft Solutions", + "catalog:commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "catalog:commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "copyright": "Copyright © 2016 Intersoft Solutions", + "created": "2022-09-23T09:08:08.377Z", + "description": "Provides server logging functionality that use Entity Framework for the storage.", + "iconUrl": "http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png", + "id": "Intersoft.Crosslight.Logging.EntityFramework", + "isPrerelease": true, + "lastEdited": "2022-09-23T09:10:09.59Z", + "licenseUrl": "http://www.intersoftsolutions.com/Products/Licensing", + "listed": true, + "packageHash": "8la7uRv/KjXFBRiSgbvODemvNmjDCYbZ0/15WfNJCBz4gq3mFoKcuOuq1xwMUwi88DqHGI9Ov//6k7bocsaT1w==", + "packageHashAlgorithm": "SHA512", + "packageSize": 16908, + "projectUrl": "http://www.intersoftsolutions.com/Crosslight", + "published": "2022-09-23T09:08:08.377Z", + "releaseNotes": "", + "requireLicenseAcceptance": false, + "title": "Intersoft Crosslight Logging EntityFramework", + "verbatimVersion": "5.0.5000.1235-experimental", + "version": "5.0.5000.1235-experimental", + "dependencyGroups": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight", + "@type": "PackageDependency", + "id": "Intersoft.Crosslight", + "range": "[5.0.5000.1235-experimental, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight.logging", + "@type": "PackageDependency", + "id": "Intersoft.Crosslight.Logging", + "range": "[5.0.5000.1235-experimental, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/entityframework", + "@type": "PackageDependency", + "id": "EntityFramework", + "range": "[6.0.2, )" + } + ] + } + ], + "packageEntries": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#Intersoft.Crosslight.Logging.EntityFramework.nuspec", + "@type": "PackageEntry", + "compressedLength": 541, + "fullName": "Intersoft.Crosslight.Logging.EntityFramework.nuspec", + "length": 1339, + "name": "Intersoft.Crosslight.Logging.EntityFramework.nuspec" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll", + "@type": "PackageEntry", + "compressedLength": 4952, + "fullName": "lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll", + "length": 11264, + "name": "Intersoft.Crosslight.Logging.EntityFramework.Server.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#.signature.p7s", + "@type": "PackageEntry", + "compressedLength": 9474, + "fullName": ".signature.p7s", + "length": 9474, + "name": ".signature.p7s" + } + ], + "tags": [ + "Intersoft", + "Crosslight", + "Logging", + "Entity", + "Framework", + "EF", + "PCL", + "Portable", + "Class", + "Library", + "Log" + ], + "@context": { + "@vocab": "http://schema.nuget.org/schema#", + "catalog": "http://schema.nuget.org/catalog#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dependencies": { + "@id": "dependency", + "@container": "@set" + }, + "dependencyGroups": { + "@id": "dependencyGroup", + "@container": "@set" + }, + "packageEntries": { + "@id": "packageEntry", + "@container": "@set" + }, + "packageTypes": { + "@id": "packageType", + "@container": "@set" + }, + "supportedFrameworks": { + "@id": "supportedFramework", + "@container": "@set" + }, + "tags": { + "@id": "tag", + "@container": "@set" + }, + "vulnerabilities": { + "@id": "vulnerability", + "@container": "@set" + }, + "published": { + "@type": "xsd:dateTime" + }, + "created": { + "@type": "xsd:dateTime" + }, + "lastEdited": { + "@type": "xsd:dateTime" + }, + "catalog:commitTimeStamp": { + "@type": "xsd:dateTime" + }, + "reasons": { + "@container": "@set" + } + } +} \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json @@ -0,0 +1,53 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/index.json", + "@type": [ + "CatalogRoot", + "AppendOnlyCatalog", + "Permalink" + ], + "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea", + "commitTimeStamp": "2022-09-23T08:18:58.8986871Z", + "count": 16959, + "nuget:lastCreated": "2022-09-23T08:18:53.69Z", + "nuget:lastDeleted": "2022-09-21T22:41:42.7622396Z", + "nuget:lastEdited": "2022-09-23T08:18:53.69Z", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/page11702.json", + "@type": "CatalogPage", + "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc", + "commitTimeStamp": "2021-01-11T08:39:31.3161021Z", + "count": 550 + }, + { + "@id": "https://api.nuget.org/v3/catalog0/page16958.json", + "@type": "CatalogPage", + "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea", + "commitTimeStamp": "2022-09-23T08:18:58.8986871Z", + "count": 240 + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": { + "@id": "item", + "@container": "@set" + }, + "parent": { + "@type": "@id" + }, + "commitTimeStamp": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastCreated": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastEdited": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastDeleted": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + } + } +} diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json @@ -0,0 +1,44 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/page11702.json", + "@type": "CatalogPage", + "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc", + "commitTimeStamp": "2021-01-11T08:39:31.3161021Z", + "count": 550, + "parent": "https://api.nuget.org/v3/catalog0/index.json", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.51.35/hanatech.application.3.1.3.6-preview9.83.0.json", + "@type": "nuget:PackageDetails", + "commitId": "7d6ac844-6613-41dd-bbb3-998cec87f79d", + "commitTimeStamp": "2021-01-11T07:51:35.5330856Z", + "nuget:id": "HanaTech.Application", + "nuget:version": "3.1.3.6-preview9.83.0" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.33.0.json", + "@type": "nuget:PackageDetails", + "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8", + "commitTimeStamp": "2021-01-11T07:28:39.9688859Z", + "nuget:id": "HanaTech.Framework.WorkFlow", + "nuget:version": "3.1.3.6-preview9.33.0" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.74.0.json", + "@type": "nuget:PackageDetails", + "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8", + "commitTimeStamp": "2021-01-11T07:28:39.9688859Z", + "nuget:id": "HanaTech.Framework.WorkFlow", + "nuget:version": "3.1.3.6-preview9.74.0" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": {"@id": "item", "@container": "@set"}, + "parent": {"@type": "@id"}, + "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"} + } +} diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json @@ -0,0 +1,44 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/page16958.json", + "@type": "CatalogPage", + "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "count": 546, + "parent": "https://api.nuget.org/v3/catalog0/index.json", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json", + "@type": "nuget:PackageDetails", + "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "nuget:id": "Intersoft.Crosslight.Logging.EntityFramework", + "nuget:version": "5.0.5000.1235-experimental" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.tests.10.0.1-beta0005.json", + "@type": "nuget:PackageDetails", + "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "nuget:id": "SIL.Core.Tests", + "nuget:version": "10.0.1-beta0005" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json", + "@type": "nuget:PackageDetails", + "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "nuget:id": "SIL.Core.Desktop", + "nuget:version": "10.0.1-beta0012" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": {"@id": "item", "@container": "@set"}, + "parent": {"@type": "@id"}, + "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"} + } +} diff --git a/swh/lister/nuget/tests/test_lister.py b/swh/lister/nuget/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/test_lister.py @@ -0,0 +1,15 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from swh.lister.nuget.lister import NugetLister + + +def test_nuget_lister(datadir, requests_mock_datadir, swh_scheduler): + lister = NugetLister(scheduler=swh_scheduler) + res = lister.run() + + assert res.pages == 2 + # TODO: the fixtures I have randomly selected does not have a repository, + # need to find some when I will have better internet bandwidth + assert res.origins == 0 diff --git a/swh/lister/nuget/tests/test_tasks.py b/swh/lister/nuget/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_nuget_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_nuget_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked NugetLister + lister = mocker.patch("swh.lister.nuget.tasks.NugetLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.NugetListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with()