diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -73,6 +73,7 @@ lister.hackage=swh.lister.hackage:register lister.launchpad=swh.lister.launchpad:register lister.npm=swh.lister.npm:register + lister.nuget=swh.lister.nuget:register lister.opam=swh.lister.opam:register lister.packagist=swh.lister.packagist:register lister.phabricator=swh.lister.phabricator:register diff --git a/swh/lister/nuget/__init__.py b/swh/lister/nuget/__init__.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/__init__.py @@ -0,0 +1,79 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +""" +NuGet lister +============ + +The NuGet lister discover origins from `nuget.org`_, NuGet is the package manager for .NET. +As .NET packages mostly contains binaries, we keep only track of packages that have +a Dvcs repository (GIT, SVN, Mercurial...) url usable as an origin. + +The `nuget.org/packages`_ list 301,206 packages as of September 2022. + +Origins retrieving strategy +--------------------------- + +Nuget.org provides an `http api`_ with several endpoint to discover and list packages +and versions. + +The recommended way to retrieve all packages is to use the `catalog`_ api endpoint. +It provides a first endpoint that list all available pages. We then iterate to get +content of related pages. + +Page listing +------------ + +Each page returns a list of packages which is the data of the response request. + +Origins from page +----------------- + +For each entry in a page listing we get related metadata through its `package metadata`_ +http api endpoint. It returns uri for linked archives that contains binary, not the +original source code. Our strategy is then to get a related GIT repository. + +We use another endpoint for each package to get its `package manifest`_, a .nuspec file (xml + data) which may contains a GIT repository url. If we found one, it is used as origin. + +Running tests +------------- + +Activate the virtualenv and run from within swh-lister directory:: + + pytest -s -vv --log-cli-level=DEBUG swh/lister/nuget/tests + +Testing with Docker +------------------- + +Change directory to swh/docker then launch the docker environment:: + + docker compose up -d + +Then schedule a nuget listing task:: + + docker compose exec swh-scheduler swh scheduler task add -p oneshot list-nuget + +You can follow lister execution by displaying logs of swh-lister service:: + + docker compose logs -f swh-lister + +.. _nuget.org: https://nuget.org +.. _nuget.org/packages: https://www.nuget.org/packages +.. _http api: https://api.nuget.org/v3/index.json +.. _catalog: https://learn.microsoft.com/en-us/nuget/api/catalog-resource +.. _package metadata: https://learn.microsoft.com/en-us/nuget/api/registration-base-url-resource +.. _package manifest: https://learn.microsoft.com/en-us/nuget/api/package-base-address-resource#download-package-manifest-nuspec # noqa: B950 +""" + + +def register(): + from .lister import NugetLister + + return { + "lister": NugetLister, + "task_modules": ["%s.tasks" % __name__], + } diff --git a/swh/lister/nuget/lister.py b/swh/lister/nuget/lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/lister.py @@ -0,0 +1,114 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging +from typing import Dict, Iterator, List, Optional + +from bs4 import BeautifulSoup +from requests.exceptions import HTTPError + +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from ..pattern import CredentialsType, StatelessLister + +logger = logging.getLogger(__name__) + +# Aliasing the page results returned by `get_pages` method from the lister. +NugetListerPage = List[Dict[str, str]] + + +class NugetLister(StatelessLister[NugetListerPage]): + """List Nuget (Package manager for .NET) origins.""" + + LISTER_NAME = "nuget" + INSTANCE = "nuget" + + API_INDEX_URL = "https://api.nuget.org/v3/catalog0/index.json" + + def __init__( + self, + scheduler: SchedulerInterface, + credentials: Optional[CredentialsType] = None, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + instance=self.INSTANCE, + url=self.API_INDEX_URL, + ) + + def get_pages(self) -> Iterator[NugetListerPage]: + """Yield an iterator which returns 'page' + + It uses the following endpoint `https://api.nuget.org/v3/catalog0/index.json` + to get a list of pages endpoint to iterate. + """ + index_response = self.http_request(url=self.url) + index = index_response.json() + assert "items" in index + + for page in index["items"]: + assert page["@id"] + try: + page_response = self.http_request(url=page["@id"]) + page_data = page_response.json() + assert "items" in page_data + yield page_data["items"] + except HTTPError: + logger.warning( + "Failed to fetch page %s, skipping it from listing.", + page["@id"], + ) + continue + + def get_origins_from_page(self, page: NugetListerPage) -> Iterator[ListedOrigin]: + """Iterate on all pages and yield ListedOrigin instances. + .NET packages are binary, dll, etc. We retrieve only packages for which we can + find a vcs repository. + + To check if a vcs repository exists, we need for each entry in a page to retrieve + a .nuspec file, which is a package metadata xml file, and search for a `repository` + value. + """ + assert self.lister_obj.id is not None + + for elt in page: + try: + res = self.http_request(url=elt["@id"]) + except HTTPError: + logger.warning( + "Failed to fetch page %s, skipping it from listing.", + elt["@id"], + ) + continue + + data = res.json() + pkgname = data["id"] + nuspec_url = ( + f"https://api.nuget.org/v3-flatcontainer/{pkgname.lower()}/" + f"{data['version'].lower()}/{pkgname.lower()}.nuspec" + ) + try: + res_metadata = self.http_request(url=nuspec_url) + except HTTPError: + logger.warning( + "Failed to fetch nuspec file %s, skipping it from listing.", + nuspec_url, + ) + continue + xml = BeautifulSoup(res_metadata.content, "xml") + repo = xml.find("repository") + if repo and "url" in repo.attrs and "type" in repo.attrs: + vcs_url = repo.attrs["url"] + vcs_type = repo.attrs["type"] + yield ListedOrigin( + lister_id=self.lister_obj.id, + visit_type=vcs_type, + url=vcs_url, + last_update=None, + ) + else: + continue diff --git a/swh/lister/nuget/tasks.py b/swh/lister/nuget/tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tasks.py @@ -0,0 +1,19 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.lister.nuget.lister import NugetLister + + +@shared_task(name=__name__ + ".NugetListerTask") +def list_nuget(**lister_args): + """Lister task for Nuget (Javascript package manager) registry""" + return NugetLister.from_configfile(**lister_args).run().dict() + + +@shared_task(name=__name__ + ".ping") +def _ping(): + return "OK" diff --git a/swh/lister/nuget/tests/__init__.py b/swh/lister/nuget/tests/__init__.py new file mode 100644 diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_intersoft.crosslight.logging.entityframework_5.0.5000.1235-experimental_intersoft.crosslight.logging.entityframework.nuspec @@ -0,0 +1,23 @@ + + + + Intersoft.Crosslight.Logging.EntityFramework + 5.0.5000.1235-experimental + Intersoft Crosslight Logging EntityFramework + Intersoft Solutions + Intersoft Solutions + false + http://www.intersoftsolutions.com/Products/Licensing + http://www.intersoftsolutions.com/Crosslight + http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png + Provides server logging functionality that use Entity Framework for the storage. + + Copyright © 2016 Intersoft Solutions + Intersoft Crosslight Logging Entity Framework EF PCL Portable Class Library Log + + + + + + + \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3-flatcontainer_sil.core.desktop_10.0.1-beta0012_sil.core.desktop.nuspec @@ -0,0 +1,46 @@ + + + + SIL.Core.Desktop + 10.0.1-beta0012 + SIL International + MIT + https://licenses.nuget.org/MIT + https://github.com/sillsdev/libpalaso + SIL.Core.Desktop provides general UI related utilities for language software. + Changes since version 10.0.0 + +Added: +- [SIL.Core] Added SIL.PlatformUtilities.Platform.IsFlatpak property. +- [SIL.Core.Desktop] Added Testing channel to UpdateSettings. + +Fixed: +- [SIL.Core] Fixed SIL.IO.PathUtilities.DeleteToRecycleBin and .GetDefaultFileManager to work in a flatpak environment. +- [SIL.Windows.Forms] Fixed ImageToolbox.ImageGallery.ImageCollectionManager.FromStandardLocations to work in a flatpak environment. +- [SIL.WritingSystems] Fixed SLDR initialization for users with European number formats. + +See full changelog at https://github.com/sillsdev/libpalaso/blob/master/CHANGELOG.md + Copyright © 2010-2021 SIL International + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.08.07.54_sil.core.desktop.10.0.1-beta0012.json @@ -0,0 +1,189 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json", + "@type": [ + "PackageDetails", + "catalog:Permalink" + ], + "authors": "SIL International", + "catalog:commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "catalog:commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "copyright": "Copyright © 2010-2021 SIL International", + "created": "2022-08-25T21:03:44.337Z", + "description": "SIL.Core.Desktop provides general UI related utilities for language software.", + "frameworkAssemblyGroup": { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#frameworkassemblygroup/.netframework4.6.1", + "assembly": [ + "System.Configuration", + "System.Management", + "System.Security" + ], + "targetFramework": ".NETFramework4.6.1" + }, + "id": "SIL.Core.Desktop", + "isPrerelease": true, + "lastEdited": "2022-09-23T08:07:21.247Z", + "licenseExpression": "MIT", + "licenseUrl": "https://licenses.nuget.org/MIT", + "listed": false, + "packageHash": "VeWqaDhGa/Y2dPKmeujabcBtTV2itT0sicfQnt5ZMESHddQg1S0T3Rm+8QDsGx7H7hGVHM/lTEbblOJ3LwyBBQ==", + "packageHashAlgorithm": "SHA512", + "packageSize": 68053, + "projectUrl": "https://github.com/sillsdev/libpalaso", + "published": "1900-01-01T00:00:00Z", + "releaseNotes": "Changes since version 10.0.0\n\nAdded:\n- [SIL.Core] Added SIL.PlatformUtilities.Platform.IsFlatpak property.\n- [SIL.Core.Desktop] Added Testing channel to UpdateSettings.\n\nFixed:\n- [SIL.Core] Fixed SIL.IO.PathUtilities.DeleteToRecycleBin and .GetDefaultFileManager to work in a flatpak environment.\n- [SIL.Windows.Forms] Fixed ImageToolbox.ImageGallery.ImageCollectionManager.FromStandardLocations to work in a flatpak environment.\n- [SIL.WritingSystems] Fixed SLDR initialization for users with European number formats.\n\nSee full changelog at https://github.com/sillsdev/libpalaso/blob/master/CHANGELOG.md", + "repository": "", + "verbatimVersion": "10.0.1-beta0012", + "version": "10.0.1-beta0012", + "dependencyGroups": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/sil.core", + "@type": "PackageDependency", + "id": "SIL.Core", + "range": "[10.0.1-beta0012, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/ndesk.dbus", + "@type": "PackageDependency", + "id": "NDesk.DBus", + "range": "[0.15.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.configuration.configurationmanager", + "@type": "PackageDependency", + "id": "System.Configuration.ConfigurationManager", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.drawing.common", + "@type": "PackageDependency", + "id": "System.Drawing.Common", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netframework4.6.1/system.io.filesystem.accesscontrol", + "@type": "PackageDependency", + "id": "System.IO.FileSystem.AccessControl", + "range": "[5.0.0, )" + } + ], + "targetFramework": ".NETFramework4.6.1" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/sil.core", + "@type": "PackageDependency", + "id": "SIL.Core", + "range": "[10.0.1-beta0012, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.configuration.configurationmanager", + "@type": "PackageDependency", + "id": "System.Configuration.ConfigurationManager", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.drawing.common", + "@type": "PackageDependency", + "id": "System.Drawing.Common", + "range": "[6.0.0, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#dependencygroup/.netstandard2.0/system.io.filesystem.accesscontrol", + "@type": "PackageDependency", + "id": "System.IO.FileSystem.AccessControl", + "range": "[5.0.0, )" + } + ], + "targetFramework": ".NETStandard2.0" + } + ], + "packageEntries": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#SIL.Core.Desktop.nuspec", + "@type": "PackageEntry", + "compressedLength": 984, + "fullName": "SIL.Core.Desktop.nuspec", + "length": 2835, + "name": "SIL.Core.Desktop.nuspec" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/net461/SIL.Core.Desktop.dll", + "@type": "PackageEntry", + "compressedLength": 31890, + "fullName": "lib/net461/SIL.Core.Desktop.dll", + "length": 75776, + "name": "SIL.Core.Desktop.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#lib/netstandard2.0/SIL.Core.Desktop.dll", + "@type": "PackageEntry", + "compressedLength": 23793, + "fullName": "lib/netstandard2.0/SIL.Core.Desktop.dll", + "length": 55296, + "name": "SIL.Core.Desktop.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json#.signature.p7s", + "@type": "PackageEntry", + "compressedLength": 9465, + "fullName": ".signature.p7s", + "length": 9465, + "name": ".signature.p7s" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/schema#", + "catalog": "http://schema.nuget.org/catalog#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dependencies": { + "@id": "dependency", + "@container": "@set" + }, + "dependencyGroups": { + "@id": "dependencyGroup", + "@container": "@set" + }, + "packageEntries": { + "@id": "packageEntry", + "@container": "@set" + }, + "packageTypes": { + "@id": "packageType", + "@container": "@set" + }, + "supportedFrameworks": { + "@id": "supportedFramework", + "@container": "@set" + }, + "tags": { + "@id": "tag", + "@container": "@set" + }, + "vulnerabilities": { + "@id": "vulnerability", + "@container": "@set" + }, + "published": { + "@type": "xsd:dateTime" + }, + "created": { + "@type": "xsd:dateTime" + }, + "lastEdited": { + "@type": "xsd:dateTime" + }, + "catalog:commitTimeStamp": { + "@type": "xsd:dateTime" + }, + "reasons": { + "@container": "@set" + } + } +} \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_data_2022.09.23.09.10.26_intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json @@ -0,0 +1,142 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json", + "@type": [ + "PackageDetails", + "catalog:Permalink" + ], + "authors": "Intersoft Solutions", + "catalog:commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "catalog:commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "copyright": "Copyright © 2016 Intersoft Solutions", + "created": "2022-09-23T09:08:08.377Z", + "description": "Provides server logging functionality that use Entity Framework for the storage.", + "iconUrl": "http://www.intersoftsolutions.com/assets/images/default/logo-crosslight-medium.png", + "id": "Intersoft.Crosslight.Logging.EntityFramework", + "isPrerelease": true, + "lastEdited": "2022-09-23T09:10:09.59Z", + "licenseUrl": "http://www.intersoftsolutions.com/Products/Licensing", + "listed": true, + "packageHash": "8la7uRv/KjXFBRiSgbvODemvNmjDCYbZ0/15WfNJCBz4gq3mFoKcuOuq1xwMUwi88DqHGI9Ov//6k7bocsaT1w==", + "packageHashAlgorithm": "SHA512", + "packageSize": 16908, + "projectUrl": "http://www.intersoftsolutions.com/Crosslight", + "published": "2022-09-23T09:08:08.377Z", + "releaseNotes": "", + "requireLicenseAcceptance": false, + "title": "Intersoft Crosslight Logging EntityFramework", + "verbatimVersion": "5.0.5000.1235-experimental", + "version": "5.0.5000.1235-experimental", + "dependencyGroups": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup", + "@type": "PackageDependencyGroup", + "dependencies": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight", + "@type": "PackageDependency", + "id": "Intersoft.Crosslight", + "range": "[5.0.5000.1235-experimental, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/intersoft.crosslight.logging", + "@type": "PackageDependency", + "id": "Intersoft.Crosslight.Logging", + "range": "[5.0.5000.1235-experimental, )" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#dependencygroup/entityframework", + "@type": "PackageDependency", + "id": "EntityFramework", + "range": "[6.0.2, )" + } + ] + } + ], + "packageEntries": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#Intersoft.Crosslight.Logging.EntityFramework.nuspec", + "@type": "PackageEntry", + "compressedLength": 541, + "fullName": "Intersoft.Crosslight.Logging.EntityFramework.nuspec", + "length": 1339, + "name": "Intersoft.Crosslight.Logging.EntityFramework.nuspec" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll", + "@type": "PackageEntry", + "compressedLength": 4952, + "fullName": "lib/net45/Intersoft.Crosslight.Logging.EntityFramework.Server.dll", + "length": 11264, + "name": "Intersoft.Crosslight.Logging.EntityFramework.Server.dll" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json#.signature.p7s", + "@type": "PackageEntry", + "compressedLength": 9474, + "fullName": ".signature.p7s", + "length": 9474, + "name": ".signature.p7s" + } + ], + "tags": [ + "Intersoft", + "Crosslight", + "Logging", + "Entity", + "Framework", + "EF", + "PCL", + "Portable", + "Class", + "Library", + "Log" + ], + "@context": { + "@vocab": "http://schema.nuget.org/schema#", + "catalog": "http://schema.nuget.org/catalog#", + "xsd": "http://www.w3.org/2001/XMLSchema#", + "dependencies": { + "@id": "dependency", + "@container": "@set" + }, + "dependencyGroups": { + "@id": "dependencyGroup", + "@container": "@set" + }, + "packageEntries": { + "@id": "packageEntry", + "@container": "@set" + }, + "packageTypes": { + "@id": "packageType", + "@container": "@set" + }, + "supportedFrameworks": { + "@id": "supportedFramework", + "@container": "@set" + }, + "tags": { + "@id": "tag", + "@container": "@set" + }, + "vulnerabilities": { + "@id": "vulnerability", + "@container": "@set" + }, + "published": { + "@type": "xsd:dateTime" + }, + "created": { + "@type": "xsd:dateTime" + }, + "lastEdited": { + "@type": "xsd:dateTime" + }, + "catalog:commitTimeStamp": { + "@type": "xsd:dateTime" + }, + "reasons": { + "@container": "@set" + } + } +} \ No newline at end of file diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_index.json @@ -0,0 +1,53 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/index.json", + "@type": [ + "CatalogRoot", + "AppendOnlyCatalog", + "Permalink" + ], + "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea", + "commitTimeStamp": "2022-09-23T08:18:58.8986871Z", + "count": 16959, + "nuget:lastCreated": "2022-09-23T08:18:53.69Z", + "nuget:lastDeleted": "2022-09-21T22:41:42.7622396Z", + "nuget:lastEdited": "2022-09-23T08:18:53.69Z", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/page11702.json", + "@type": "CatalogPage", + "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc", + "commitTimeStamp": "2021-01-11T08:39:31.3161021Z", + "count": 550 + }, + { + "@id": "https://api.nuget.org/v3/catalog0/page16958.json", + "@type": "CatalogPage", + "commitId": "94389591-14c5-4802-8cdf-0c4e46dbaaea", + "commitTimeStamp": "2022-09-23T08:18:58.8986871Z", + "count": 240 + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": { + "@id": "item", + "@container": "@set" + }, + "parent": { + "@type": "@id" + }, + "commitTimeStamp": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastCreated": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastEdited": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + }, + "nuget:lastDeleted": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime" + } + } +} diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page11702.json @@ -0,0 +1,52 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/page11702.json", + "@type": "CatalogPage", + "commitId": "6df640f0-8681-460e-adb3-8ea5de6f53cc", + "commitTimeStamp": "2021-01-11T08:39:31.3161021Z", + "count": 550, + "parent": "https://api.nuget.org/v3/catalog0/index.json", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.51.35/hanatech.application.3.1.3.6-preview9.83.0.json", + "@type": "nuget:PackageDetails", + "commitId": "7d6ac844-6613-41dd-bbb3-998cec87f79d", + "commitTimeStamp": "2021-01-11T07:51:35.5330856Z", + "nuget:id": "HanaTech.Application", + "nuget:version": "3.1.3.6-preview9.83.0" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.20.14.22.12/glader.essentials.gameframework.2.3.117.json", + "@type": "nuget:PackageDetails", + "commitId": "2340f078-1087-47a6-969d-be0f1fac4249", + "commitTimeStamp": "2022-09-20T14:22:12.7481401Z", + "nuget:id": "Glader.Essentials.GameFramework", + "nuget:version": "2.3.117" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.33.0.json", + "@type": "nuget:PackageDetails", + "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8", + "commitTimeStamp": "2021-01-11T07:28:39.9688859Z", + "nuget:id": "HanaTech.Framework.WorkFlow", + "nuget:version": "3.1.3.6-preview9.33.0" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2021.01.11.07.28.39/hanatech.framework.workflow.3.1.3.6-preview9.74.0.json", + "@type": "nuget:PackageDetails", + "commitId": "be968d19-2aee-434f-9013-9a0bcbe1e2c8", + "commitTimeStamp": "2021-01-11T07:28:39.9688859Z", + "nuget:id": "HanaTech.Framework.WorkFlow", + "nuget:version": "3.1.3.6-preview9.74.0" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": {"@id": "item", "@container": "@set"}, + "parent": {"@type": "@id"}, + "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"} + } +} diff --git a/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/data/https_api.nuget.org/v3_catalog0_page16958.json @@ -0,0 +1,44 @@ +{ + "@id": "https://api.nuget.org/v3/catalog0/page16958.json", + "@type": "CatalogPage", + "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "count": 546, + "parent": "https://api.nuget.org/v3/catalog0/index.json", + "items": [ + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.09.10.26/intersoft.crosslight.logging.entityframework.5.0.5000.1235-experimental.json", + "@type": "nuget:PackageDetails", + "commitId": "8b51bc91-722d-48fb-a4e4-e6167c9a459b", + "commitTimeStamp": "2022-09-23T09:10:26.5844749Z", + "nuget:id": "Intersoft.Crosslight.Logging.EntityFramework", + "nuget:version": "5.0.5000.1235-experimental" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.tests.10.0.1-beta0005.json", + "@type": "nuget:PackageDetails", + "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "nuget:id": "SIL.Core.Tests", + "nuget:version": "10.0.1-beta0005" + }, + { + "@id": "https://api.nuget.org/v3/catalog0/data/2022.09.23.08.07.54/sil.core.desktop.10.0.1-beta0012.json", + "@type": "nuget:PackageDetails", + "commitId": "f97b1cb0-9a7f-4bfd-a9ee-d61a3cd59c6d", + "commitTimeStamp": "2022-09-23T08:07:54.051884Z", + "nuget:id": "SIL.Core.Desktop", + "nuget:version": "10.0.1-beta0012" + } + ], + "@context": { + "@vocab": "http://schema.nuget.org/catalog#", + "nuget": "http://schema.nuget.org/schema#", + "items": {"@id": "item", "@container": "@set"}, + "parent": {"@type": "@id"}, + "commitTimeStamp": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastCreated": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastEdited": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"}, + "nuget:lastDeleted": {"@type": "http://www.w3.org/2001/XMLSchema#dateTime"} + } +} diff --git a/swh/lister/nuget/tests/test_lister.py b/swh/lister/nuget/tests/test_lister.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/test_lister.py @@ -0,0 +1,34 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.nuget.lister import NugetLister + +expected_origins = ["https://github.com/sillsdev/libpalaso.git"] + + +def test_nuget_lister(datadir, requests_mock_datadir, swh_scheduler): + lister = NugetLister(scheduler=swh_scheduler) + res = lister.run() + + assert res.pages == 2 + assert res.origins == 1 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results + + assert len(scheduler_origins) == len(expected_origins) + + assert [ + ( + scheduled.visit_type, + scheduled.url, + ) + for scheduled in sorted(scheduler_origins, key=lambda scheduled: scheduled.url) + ] == [ + ( + "git", + url, + ) + for url in expected_origins + ] diff --git a/swh/lister/nuget/tests/test_tasks.py b/swh/lister/nuget/tests/test_tasks.py new file mode 100644 --- /dev/null +++ b/swh/lister/nuget/tests/test_tasks.py @@ -0,0 +1,31 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.lister.pattern import ListerStats + + +def test_nuget_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): + res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.ping") + assert res + res.wait() + assert res.successful() + assert res.result == "OK" + + +def test_nuget_lister(swh_scheduler_celery_app, swh_scheduler_celery_worker, mocker): + # setup the mocked NugetLister + lister = mocker.patch("swh.lister.nuget.tasks.NugetLister") + lister.from_configfile.return_value = lister + stats = ListerStats(pages=42, origins=42) + lister.run.return_value = stats + + res = swh_scheduler_celery_app.send_task("swh.lister.nuget.tasks.NugetListerTask") + assert res + res.wait() + assert res.successful() + assert res.result == stats.dict() + + lister.from_configfile.assert_called_once_with() + lister.run.assert_called_once_with()