diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst index e4135e9..03f4933 100644 --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -1,151 +1,160 @@ .. _package-loader-specifications: Package loader specifications ============================= Release fields -------------- Here is an overview of the fields (+ internal version name + branch name) used by each package loader, after D6616: .. list-table:: Fields used by each package loader :header-rows: 1 * - Loader - internal version - branch name - name - message - synthetic - author - date - Notes * - arch - ``p_info.​version`` - ``release_name(​version, filename)`` - =version - Synthetic release for Arch Linux source package {p_info.name} version {p_info.version} {description} - true - from intrinsic metadata - from extra_loader_arguments['arch_metadata'] - Intrinsic metadata extracted from .PKGINFO file of the package * - archive - passed as arg - ``release_name(​version)`` - =version - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - * - aur - ``p_info.​version`` - ``release_name(​version, filename)`` - =version - Synthetic release for Aur source package {p_info.name} version {p_info.version} {description} - true - "" - from extra_loader_arguments['aur_metadata'] - Intrinsic metadata extracted from .SRCINFO file of the package * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` - =version - standard message - true - ``metadata.get(​"Maintainer", "")`` - ``metadata.get(​"Date")`` - metadata is intrinsic * - crates - ``p_info.​version`` - ``release_name(​version, filename) + "\n\n" + i_metadata.description + "\n"`` - =version - Synthetic release for Crate source package {p_info.name} version {p_info.version} {description} - true - from int metadata - from ext metadata - ``i_metadata`` for intrinsic metadata, ``e_metadata`` for extrinsic metadata * - debian - =``version`` - ``release_name(​version)`` - =``i_version`` - standard message (using ``i_version``) - true - ``metadata​.changelog​.person`` - ``metadata​.changelog​.date`` - metadata is intrinsic. Old revisions have ``dsc`` as type ``i_version`` is the intrinsic version (eg. ``0.7.2-3``) while ``version`` contains the debian suite name (eg. ``stretch/contrib/0.7.2-3``) and is passed as arg * - deposit - HEAD - only HEAD - HEAD - "{client}: Deposit {id} in collection {collection}\n" - true - original author - ```` from SWORD XML - revisions had parents * - maven-loader - passed as arg - HEAD - ``release_name(version)`` - "Synthetic release for archive at {p_info.url}\n" - true - "" - passed as arg - Only one artefact per url (jar/zip src) * - nixguix - URL - URL - URL - None - true - "" - None - it's the URL of the artifact referenced by the derivation * - npm - ``metadata​["version"]`` - ``release_name(​version)`` - =version - standard message - true - from int metadata or "" - from ext metadata or None - * - opam - as given by opam - "{opam_package}​.{version}" - =version - standard message - true - from metadata - None - "{self.opam_package}​.{version}" matches the version names used by opam's backend. metadata is extrinsic + * - pubdev + - ``p_info.​version`` + - ``release_name(​version)`` + - =version + - Synthetic release for pub.dev source package {name} version {version} {description} + - true + - from extrinsic metadata + - from extrinsic metadata + - name, version and description from intrinsic metadata * - pypi - ``metadata​["version"]`` - ``release_name(​version)`` or ``release_name(​version, filename)`` - =version - ``metadata[​'comment_text']}`` or standard message - true - from int metadata or "" - from ext metadata or None - metadata is intrinsic using this function:: def release_name(version: str, filename: Optional[str] = None) -> str: if filename: return "releases/%s/%s" % (version, filename) return "releases/%s" % version and "standard message" being:: msg = ( f"Synthetic release for {PACKAGE_MANAGER} source package {name} " f"version {version}\n" ) The ``target_type`` field is always ``dir``, and the target the id of a directory loaded by unpacking a tarball/zip file/... diff --git a/setup.py b/setup.py index 49f18ff..0da8153 100755 --- a/setup.py +++ b/setup.py @@ -1,84 +1,85 @@ #!/usr/bin/env python3 # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from io import open from os import path from setuptools import find_packages, setup here = path.abspath(path.dirname(__file__)) # Get the long description from the README file with open(path.join(here, "README.rst"), encoding="utf-8") as f: long_description = f.read() def parse_requirements(name=None): if name: reqf = "requirements-%s.txt" % name else: reqf = "requirements.txt" requirements = [] if not path.exists(reqf): return requirements with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith("#"): continue requirements.append(line) return requirements setup( name="swh.loader.core", description="Software Heritage Base Loader", long_description=long_description, long_description_content_type="text/markdown", python_requires=">=3.7", author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/DLDBASE", packages=find_packages(), # packages's modules scripts=[], # scripts to package install_requires=parse_requirements() + parse_requirements("swh"), setup_requires=["setuptools-scm"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, include_package_data=True, entry_points=""" [swh.cli.subcommands] loader=swh.loader.cli [swh.workers] loader.arch=swh.loader.package.arch:register loader.archive=swh.loader.package.archive:register loader.aur=swh.loader.package.aur:register loader.cran=swh.loader.package.cran:register loader.crates=swh.loader.package.crates:register loader.debian=swh.loader.package.debian:register loader.deposit=swh.loader.package.deposit:register loader.nixguix=swh.loader.package.nixguix:register loader.npm=swh.loader.package.npm:register loader.opam=swh.loader.package.opam:register + loader.pubdev=swh.loader.package.pubdev:register loader.pypi=swh.loader.package.pypi:register loader.maven=swh.loader.package.maven:register """, classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Development Status :: 5 - Production/Stable", ], project_urls={ "Bug Reports": "https://forge.softwareheritage.org/maniphest", "Funding": "https://www.softwareheritage.org/donate", "Source": "https://forge.softwareheritage.org/source/swh-loader-core", "Documentation": "https://docs.softwareheritage.org/devel/swh-loader-core/", }, ) diff --git a/swh/loader/package/pubdev/__init__.py b/swh/loader/package/pubdev/__init__.py new file mode 100644 index 0000000..0ae96b2 --- /dev/null +++ b/swh/loader/package/pubdev/__init__.py @@ -0,0 +1,17 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +from typing import Any, Mapping + + +def register() -> Mapping[str, Any]: + """Register the current worker module's definition""" + from .loader import PubDevLoader + + return { + "task_modules": [f"{__name__}.tasks"], + "loader": PubDevLoader, + } diff --git a/swh/loader/package/pubdev/loader.py b/swh/loader/package/pubdev/loader.py new file mode 100644 index 0000000..f5e161c --- /dev/null +++ b/swh/loader/package/pubdev/loader.py @@ -0,0 +1,188 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from distutils.version import StrictVersion +import json +from pathlib import Path +from typing import Any, Dict, Iterator, Optional, Sequence, Tuple + +import attr +import yaml + +from swh.loader.package.loader import BasePackageInfo, PackageLoader +from swh.loader.package.utils import ( + EMPTY_AUTHOR, + Person, + api_info, + cached_method, + release_name, +) +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone +from swh.storage.interface import StorageInterface + + +@attr.s +class PubDevPackageInfo(BasePackageInfo): + + name = attr.ib(type=str) + """Name of the package""" + + version = attr.ib(type=str) + """Current version""" + + last_modified = attr.ib(type=str) + """Last modified date as release date""" + + author = attr.ib(type=Person) + """Author""" + + description = attr.ib(type=str) + """Description""" + + +def extract_intrinsic_metadata(dir_path: Path) -> Dict[str, Any]: + """Extract intrinsic metadata from pubspec.yaml file at dir_path. + + Each pub.dev package version has a pubspec.yaml file at the root of the archive. + + See https://dart.dev/tools/pub/pubspec for pubspec specifications. + + Args: + dir_path: A directory on disk where a pubspec.yaml must be present + + Returns: + A dict mapping from yaml parser + """ + pubspec_path = dir_path / "pubspec.yaml" + return yaml.safe_load(pubspec_path.read_text()) + + +class PubDevLoader(PackageLoader[PubDevPackageInfo]): + visit_type = "pubdev" + + def __init__( + self, + storage: StorageInterface, + url: str, + **kwargs, + ): + + super().__init__(storage=storage, url=url, **kwargs) + self.url = url + + def _raw_info(self) -> bytes: + return api_info(self.url) + + @cached_method + def info(self) -> Dict: + """Return the project metadata information (fetched from pub.dev registry)""" + # Use strict=False in order to correctly manage case where \n is present in a string + info = json.loads(self._raw_info(), strict=False) + # Arrange versions list as a new dict with `version` as key + versions = {v["version"]: v for v in info["versions"]} + info["versions"] = versions + return info + + def get_versions(self) -> Sequence[str]: + """Get all released versions of a PubDev package + + Returns: + A sequence of versions + + Example:: + + ["0.1.1", "0.10.2"] + """ + versions = list(self.info()["versions"].keys()) + versions.sort(key=StrictVersion) + return versions + + def get_default_version(self) -> str: + """Get the newest release version of a PubDev package + + Returns: + A string representing a version + + Example:: + + "0.1.2" + """ + latest = self.info()["latest"] + return latest["version"] + + def get_package_info(self, version: str) -> Iterator[Tuple[str, PubDevPackageInfo]]: + """Get release name and package information from version + + Package info comes from extrinsic metadata (from self.info()) + + Args: + version: Package version (e.g: "0.1.0") + + Returns: + Iterator of tuple (release_name, p_info) + """ + v = self.info()["versions"][version] + assert v["version"] == version + + url = v["archive_url"] + name = v["pubspec"]["name"] + filename = f"{name}-{version}.tar.gz" + last_modified = v["published"] + + if "authors" in v["pubspec"]: + # TODO: here we have a list of author, see T3887 + author = Person.from_fullname(v["pubspec"]["authors"][0].encode()) + elif "author" in v["pubspec"] and v["pubspec"]["author"] is not None: + author = Person.from_fullname(v["pubspec"]["author"].encode()) + else: + author = EMPTY_AUTHOR + + description = v["pubspec"]["description"] + + p_info = PubDevPackageInfo( + name=name, + filename=filename, + url=url, + version=version, + last_modified=last_modified, + author=author, + description=description, + ) + yield release_name(version), p_info + + def build_release( + self, p_info: PubDevPackageInfo, uncompressed_path: str, directory: Sha1Git + ) -> Optional[Release]: + + # Extract intrinsic metadata from uncompressed_path/pubspec.yaml + intrinsic_metadata = extract_intrinsic_metadata(Path(uncompressed_path)) + + name: str = intrinsic_metadata["name"] + version: str = intrinsic_metadata["version"] + assert version == p_info.version + + # author from intrinsic_metadata should not take precedence over the one + # returned by the api, see https://dart.dev/tools/pub/pubspec#authorauthors + author: Person = p_info.author + + if "description" in intrinsic_metadata and intrinsic_metadata["description"]: + description = intrinsic_metadata["description"] + else: + description = p_info.description + + message = ( + f"Synthetic release for pub.dev source package {name} " + f"version {version}\n\n" + f"{description}\n" + ) + + return Release( + name=version.encode(), + author=author, + date=TimestampWithTimezone.from_iso8601(p_info.last_modified), + message=message.encode(), + target_type=ObjectType.DIRECTORY, + target=directory, + synthetic=True, + ) diff --git a/swh/loader/package/pubdev/tasks.py b/swh/loader/package/pubdev/tasks.py new file mode 100644 index 0000000..7fdfa42 --- /dev/null +++ b/swh/loader/package/pubdev/tasks.py @@ -0,0 +1,14 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from celery import shared_task + +from swh.loader.package.pubdev.loader import PubDevLoader + + +@shared_task(name=__name__ + ".LoadPubDev") +def load_arch(**kwargs): + """Load packages from pub.dev (Dart, Flutter)""" + return PubDevLoader.from_configfile(**kwargs).load() diff --git a/swh/loader/package/pubdev/tests/__init__.py b/swh/loader/package/pubdev/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/loader/package/pubdev/tests/data/fake_pubdev.sh b/swh/loader/package/pubdev/tests/data/fake_pubdev.sh new file mode 100644 index 0000000..e65b394 --- /dev/null +++ b/swh/loader/package/pubdev/tests/data/fake_pubdev.sh @@ -0,0 +1,191 @@ +#!/usr/bin/env bash + +# Script to generate fake pub.dev http api response and fake Dart or FLutter packages archives as .tar.gz. + +set -euo pipefail + +# Create directories +readonly TMP=tmp_dir/pubdev +readonly BASE_API=https_pub.dev +readonly BASE_ARCHIVES=https_pub.dartlang.org + +mkdir -p $TMP +mkdir -p $BASE_API +mkdir -p $BASE_ARCHIVES + +# http api response as json +echo -e '''{"name":"authentication","latest":{"version":"0.0.1","pubspec":{"name":"authentication","description":"Persistent user authentication for Flutter with optional backend API integration.","version":"0.0.1","author":null,"homepage":null,"environment":{"sdk":">=2.7.0 <3.0.0","flutter":">=1.17.0 <2.0.0"},"dependencies":{"flutter":{"sdk":"flutter"}},"dev_dependencies":{"flutter_test":{"sdk":"flutter"}},"flutter":{"plugin":{"platforms":{"some_platform":{"pluginClass":"somePluginClass"}}}}},"archive_url":"https://pub.dartlang.org/packages/authentication/versions/0.0.1.tar.gz","archive_sha256":"0179334b346cb67e4e6e3c905e5cc5c8e488a45ebd99fd2be3a7e0476d620d99","published":"2020-08-13T04:53:34.134687Z"},"versions":[{"version":"0.0.1","pubspec":{"name":"authentication","description":"Persistent user authentication for Flutter with optional backend API integration.","version":"0.0.1","author":null,"homepage":null,"environment":{"sdk":">=2.7.0 <3.0.0","flutter":">=1.17.0 <2.0.0"},"dependencies":{"flutter":{"sdk":"flutter"}},"dev_dependencies":{"flutter_test":{"sdk":"flutter"}},"flutter":{"plugin":{"platforms":{"some_platform":{"pluginClass":"somePluginClass"}}}}},"archive_url":"https://pub.dartlang.org/packages/authentication/versions/0.0.1.tar.gz","archive_sha256":"0179334b346cb67e4e6e3c905e5cc5c8e488a45ebd99fd2be3a7e0476d620d99","published":"2020-08-13T04:53:34.134687Z"}]} +''' > $BASE_API/api_packages_authentication + +echo -e '''{"name":"Autolinker","latest":{"version":"0.1.1","pubspec":{"version":"0.1.1","homepage":"https://github.com/hackcave","description":"Port of Autolinker.js to dart","name":"Autolinker","author":"hackcave "},"archive_url":"https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz","published":"2014-12-24T22:34:02.534090Z"},"versions":[{"version":"0.1.1","pubspec":{"version":"0.1.1","homepage":"https://github.com/hackcave","description":"Port of Autolinker.js to dart","name":"Autolinker","author":"hackcave "},"archive_url":"https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz","published":"2014-12-24T22:34:02.534090Z"}]} +''' > ${BASE_API}/api_packages_Autolinker + +echo -e '''{"name":"bezier","latest":{"version":"1.1.5","pubspec":{"name":"bezier","version":"1.1.5","authors":["Aaron Barrett ","Isaac Barrett "],"description":"A 2D Bézier curve math library. Based heavily on the work of @TheRealPomax .\nLive examples can be found at .","homepage":"https://github.com/aab29/bezier.dart","environment":{"sdk":">=2.0.0 <3.0.0"},"dependencies":{"vector_math":"^2.0.0"},"dev_dependencies":{"test":"^1.0.0"}},"archive_url":"https://pub.dartlang.org/packages/bezier/versions/1.1.5.tar.gz","archive_sha256":"cc5da2fa927b5d347550f78d456cd984b7df78a7f0405119cdab12111e2f9ee8","published":"2019-12-22T03:17:30.805225Z"},"versions":[{"version":"1.1.5","pubspec":{"name":"bezier","version":"1.1.5","authors":["Aaron Barrett ","Isaac Barrett "],"description":"A 2D Bézier curve math library. Based heavily on the work of @TheRealPomax .\nLive examples can be found at .","homepage":"https://github.com/aab29/bezier.dart","environment":{"sdk":">=2.0.0 <3.0.0"},"dependencies":{"vector_math":"^2.0.0"},"dev_dependencies":{"test":"^1.0.0"}},"archive_url":"https://pub.dartlang.org/packages/bezier/versions/1.1.5.tar.gz","archive_sha256":"cc5da2fa927b5d347550f78d456cd984b7df78a7f0405119cdab12111e2f9ee8","published":"2019-12-22T03:17:30.805225Z"}]} +''' > ${BASE_API}/api_packages_bezier + +echo -e '''{"name":"pdf","latest":{"version":"3.8.2","pubspec":{"name":"pdf","description":"A pdf producer for Dart. It can create pdf files for both web or flutter.","homepage":"https://github.com/DavBfr/dart_pdf/tree/master/pdf","repository":"https://github.com/DavBfr/dart_pdf","issue_tracker":"https://github.com/DavBfr/dart_pdf/issues","version":"3.8.2","environment":{"sdk":">=2.12.0 <3.0.0"},"dependencies":{"archive":"^3.1.0","barcode":">=2.2.0 <3.0.0","crypto":"^3.0.0","image":">=3.0.1 <4.0.0","meta":">=1.3.0 <2.0.0","path_parsing":">=0.2.0 <2.0.0","vector_math":"^2.1.0","xml":">=5.1.0 <7.0.0"},"dev_dependencies":{"flutter_lints":"^1.0.4","test":">=1.16.0 <2.0.0"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/3.8.2.tar.gz","published":"2022-07-25T11:38:25.983876Z"},"versions":[{"version":"1.0.0","pubspec":{"version":"1.0.0","name":"pdf","dependencies":{"ttf_parser":"^1.0.0","vector_math":"^2.0.7","meta":"^1.1.5"},"author":"David PHAM-VAN ","description":"A pdf producer for Dart","homepage":"https://github.com/davbfr/dart_pdf","environment":{"sdk":">=1.8.0 <2.0.0"},"dev_dependencies":{"test":"any"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/1.0.0.tar.gz","published":"2018-07-16T21:12:28.894137Z"},{"version":"3.8.2","pubspec":{"name":"pdf","description":"A pdf producer for Dart. It can create pdf files for both web or flutter.","homepage":"https://github.com/DavBfr/dart_pdf/tree/master/pdf","repository":"https://github.com/DavBfr/dart_pdf","issue_tracker":"https://github.com/DavBfr/dart_pdf/issues","version":"3.8.2","environment":{"sdk":">=2.12.0 <3.0.0"},"dependencies":{"archive":"^3.1.0","barcode":">=2.2.0 <3.0.0","crypto":"^3.0.0","image":">=3.0.1 <4.0.0","meta":">=1.3.0 <2.0.0","path_parsing":">=0.2.0 <2.0.0","vector_math":"^2.1.0","xml":">=5.1.0 <7.0.0"},"dev_dependencies":{"flutter_lints":"^1.0.4","test":">=1.16.0 <2.0.0"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/3.8.2.tar.gz","published":"2022-07-25T11:38:25.983876Z"}]} +''' > ${BASE_API}/api_packages_pdf + +# Dart package a pubspec.yaml file at thier root. Generate some of them. + +mkdir -p ${TMP}/packages_authentication_versions_0.0.1 +echo -e '''name: authentication +description: Persistent user authentication for Flutter with optional backend API integration. +version: 0.0.1 +author: +homepage: + +environment: + sdk: ">=2.7.0 <3.0.0" + flutter: ">=1.17.0 <2.0.0" + +dependencies: + flutter: + sdk: flutter + +dev_dependencies: + flutter_test: + sdk: flutter + +# For information on the generic Dart part of this file, see the +# following page: https://dart.dev/tools/pub/pubspec + +# The following section is specific to Flutter. +flutter: + # This section identifies this Flutter project as a plugin project. + # The 'pluginClass' and Android 'package' identifiers should not ordinarily + # be modified. They are used by the tooling to maintain consistency when + # adding or updating assets for this project. + plugin: + platforms: + # This plugin project was generated without specifying any + # platforms with the `--platform` argument. If you see the `fake_platform` map below, remove it and + # then add platforms following the instruction here: + # https://flutter.dev/docs/development/packages-and-plugins/developing-packages#plugin-platforms + # ------------------- + some_platform: + pluginClass: somePluginClass + # ------------------- + + # To add assets to your plugin package, add an assets section, like this: + # assets: + # - images/a_dot_burr.jpeg + # - images/a_dot_ham.jpeg + # + # For details regarding assets in packages, see + # https://flutter.dev/assets-and-images/#from-packages + # + # An image asset can refer to one or more resolution-specific "variants", see + # https://flutter.dev/assets-and-images/#resolution-aware. + + # To add custom fonts to your plugin package, add a fonts section here, + # in this "flutter" section. Each entry in this list should have a + # "family" key with the font family name, and a "fonts" key with a + # list giving the asset and other descriptors for the font. For + # example: + # fonts: + # - family: Schyler + # fonts: + # - asset: fonts/Schyler-Regular.ttf + # - asset: fonts/Schyler-Italic.ttf + # style: italic + # - family: Trajan Pro + # fonts: + # - asset: fonts/TrajanPro.ttf + # - asset: fonts/TrajanPro_Bold.ttf + # weight: 700 + # + # For details regarding fonts in packages, see + # https://flutter.dev/custom-fonts/#from-packages +''' > ${TMP}/packages_authentication_versions_0.0.1/pubspec.yaml + + +mkdir -p ${TMP}/packages_autolinker_versions_0.1.1 +echo -e '''name: Autolinker +version: 0.1.1 +author: hackcave +homepage: https://github.com/hackcave +description: + Port of Autolinker.js to dart +''' > ${TMP}/packages_autolinker_versions_0.1.1/pubspec.yaml + +mkdir -p ${TMP}/packages_bezier_versions_1.1.5 +echo -e '''name: bezier +version: 1.1.5 +authors: + - Aaron Barrett + - Isaac Barrett +description: >- + A 2D Bézier curve math library. Based heavily on the work of @TheRealPomax + . + + Live examples can be found at . +homepage: https://github.com/aab29/bezier.dart +environment: + sdk: ">=2.0.0 <3.0.0" +dependencies: + vector_math: ^2.0.0 +dev_dependencies: + test: ^1.0.0 +''' > ${TMP}/packages_bezier_versions_1.1.5/pubspec.yaml + +mkdir -p ${TMP}/packages_pdf_versions_1.0.0 +echo -e '''name: pdf +author: David PHAM-VAN +description: A pdf producer for Dart +homepage: https://github.com/davbfr/dart_pdf +version: 1.0.0 + +environment: + sdk: ">=1.8.0 <2.0.0" + +dependencies: + meta: "^1.1.5" + ttf_parser: "^1.0.0" + vector_math: "^2.0.7" + +dev_dependencies: + test: any +''' > ${TMP}/packages_pdf_versions_1.0.0/pubspec.yaml + +mkdir -p ${TMP}/packages_pdf_versions_3.8.2 +echo -e '''name: pdf +description: A pdf producer for Dart. It can create pdf files for both web or flutter. +homepage: https://github.com/DavBfr/dart_pdf/tree/master/pdf +repository: https://github.com/DavBfr/dart_pdf +issue_tracker: https://github.com/DavBfr/dart_pdf/issues +version: 3.8.2 + +environment: + sdk: ">=2.12.0 <3.0.0" + +dependencies: + archive: ^3.1.0 + barcode: ">=2.2.0 <3.0.0" + crypto: ^3.0.0 + image: ">=3.0.1 <4.0.0" + meta: ">=1.3.0 <2.0.0" + path_parsing: ">=0.2.0 <2.0.0" + vector_math: ^2.1.0 + xml: ">=5.1.0 <7.0.0" + +dev_dependencies: + flutter_lints: ^1.0.4 + test: ">=1.16.0 <2.0.0" +''' > ${TMP}/packages_pdf_versions_3.8.2/pubspec.yaml + +cd $TMP + +tar -czf packages_authentication_versions_0.0.1.tar.gz -C packages_authentication_versions_0.0.1 . +tar -czf packages_Autolinker_versions_0.1.1.tar.gz -C packages_autolinker_versions_0.1.1 . +tar -czf packages_bezier_versions_1.1.5.tar.gz -C packages_bezier_versions_1.1.5 . +tar -czf packages_pdf_versions_1.0.0.tar.gz -C packages_pdf_versions_1.0.0 . +tar -czf packages_pdf_versions_3.8.2.tar.gz -C packages_pdf_versions_3.8.2 . + + +# Move .tar.gz archives to a servable directory +mv *.tar.gz ../../$BASE_ARCHIVES + +# Clean up removing tmp_dir +cd ../../ +rm -r tmp_dir/ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_Autolinker_versions_0.1.1.tar.gz b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_Autolinker_versions_0.1.1.tar.gz new file mode 100644 index 0000000..5dcef31 Binary files /dev/null and b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_Autolinker_versions_0.1.1.tar.gz differ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_authentication_versions_0.0.1.tar.gz b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_authentication_versions_0.0.1.tar.gz new file mode 100644 index 0000000..a4ca17e Binary files /dev/null and b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_authentication_versions_0.0.1.tar.gz differ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_bezier_versions_1.1.5.tar.gz b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_bezier_versions_1.1.5.tar.gz new file mode 100644 index 0000000..d9d15ae Binary files /dev/null and b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_bezier_versions_1.1.5.tar.gz differ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_1.0.0.tar.gz b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_1.0.0.tar.gz new file mode 100644 index 0000000..c8b5dfb Binary files /dev/null and b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_1.0.0.tar.gz differ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_3.8.2.tar.gz b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_3.8.2.tar.gz new file mode 100644 index 0000000..5e722c6 Binary files /dev/null and b/swh/loader/package/pubdev/tests/data/https_pub.dartlang.org/packages_pdf_versions_3.8.2.tar.gz differ diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_Autolinker b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_Autolinker new file mode 100644 index 0000000..addaa5a --- /dev/null +++ b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_Autolinker @@ -0,0 +1,2 @@ +{"name":"Autolinker","latest":{"version":"0.1.1","pubspec":{"version":"0.1.1","homepage":"https://github.com/hackcave","description":"Port of Autolinker.js to dart","name":"Autolinker","author":"hackcave "},"archive_url":"https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz","published":"2014-12-24T22:34:02.534090Z"},"versions":[{"version":"0.1.1","pubspec":{"version":"0.1.1","homepage":"https://github.com/hackcave","description":"Port of Autolinker.js to dart","name":"Autolinker","author":"hackcave "},"archive_url":"https://pub.dartlang.org/packages/Autolinker/versions/0.1.1.tar.gz","published":"2014-12-24T22:34:02.534090Z"}]} + diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_authentication b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_authentication new file mode 100644 index 0000000..5531232 --- /dev/null +++ b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_authentication @@ -0,0 +1,2 @@ +{"name":"authentication","latest":{"version":"0.0.1","pubspec":{"name":"authentication","description":"Persistent user authentication for Flutter with optional backend API integration.","version":"0.0.1","author":null,"homepage":null,"environment":{"sdk":">=2.7.0 <3.0.0","flutter":">=1.17.0 <2.0.0"},"dependencies":{"flutter":{"sdk":"flutter"}},"dev_dependencies":{"flutter_test":{"sdk":"flutter"}},"flutter":{"plugin":{"platforms":{"some_platform":{"pluginClass":"somePluginClass"}}}}},"archive_url":"https://pub.dartlang.org/packages/authentication/versions/0.0.1.tar.gz","archive_sha256":"0179334b346cb67e4e6e3c905e5cc5c8e488a45ebd99fd2be3a7e0476d620d99","published":"2020-08-13T04:53:34.134687Z"},"versions":[{"version":"0.0.1","pubspec":{"name":"authentication","description":"Persistent user authentication for Flutter with optional backend API integration.","version":"0.0.1","author":null,"homepage":null,"environment":{"sdk":">=2.7.0 <3.0.0","flutter":">=1.17.0 <2.0.0"},"dependencies":{"flutter":{"sdk":"flutter"}},"dev_dependencies":{"flutter_test":{"sdk":"flutter"}},"flutter":{"plugin":{"platforms":{"some_platform":{"pluginClass":"somePluginClass"}}}}},"archive_url":"https://pub.dartlang.org/packages/authentication/versions/0.0.1.tar.gz","archive_sha256":"0179334b346cb67e4e6e3c905e5cc5c8e488a45ebd99fd2be3a7e0476d620d99","published":"2020-08-13T04:53:34.134687Z"}]} + diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_bezier b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_bezier new file mode 100644 index 0000000..70b6d13 --- /dev/null +++ b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_bezier @@ -0,0 +1,4 @@ +{"name":"bezier","latest":{"version":"1.1.5","pubspec":{"name":"bezier","version":"1.1.5","authors":["Aaron Barrett ","Isaac Barrett "],"description":"A 2D Bézier curve math library. Based heavily on the work of @TheRealPomax . +Live examples can be found at .","homepage":"https://github.com/aab29/bezier.dart","environment":{"sdk":">=2.0.0 <3.0.0"},"dependencies":{"vector_math":"^2.0.0"},"dev_dependencies":{"test":"^1.0.0"}},"archive_url":"https://pub.dartlang.org/packages/bezier/versions/1.1.5.tar.gz","archive_sha256":"cc5da2fa927b5d347550f78d456cd984b7df78a7f0405119cdab12111e2f9ee8","published":"2019-12-22T03:17:30.805225Z"},"versions":[{"version":"1.1.5","pubspec":{"name":"bezier","version":"1.1.5","authors":["Aaron Barrett ","Isaac Barrett "],"description":"A 2D Bézier curve math library. Based heavily on the work of @TheRealPomax . +Live examples can be found at .","homepage":"https://github.com/aab29/bezier.dart","environment":{"sdk":">=2.0.0 <3.0.0"},"dependencies":{"vector_math":"^2.0.0"},"dev_dependencies":{"test":"^1.0.0"}},"archive_url":"https://pub.dartlang.org/packages/bezier/versions/1.1.5.tar.gz","archive_sha256":"cc5da2fa927b5d347550f78d456cd984b7df78a7f0405119cdab12111e2f9ee8","published":"2019-12-22T03:17:30.805225Z"}]} + diff --git a/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_pdf b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_pdf new file mode 100644 index 0000000..d636372 --- /dev/null +++ b/swh/loader/package/pubdev/tests/data/https_pub.dev/api_packages_pdf @@ -0,0 +1,2 @@ +{"name":"pdf","latest":{"version":"3.8.2","pubspec":{"name":"pdf","description":"A pdf producer for Dart. It can create pdf files for both web or flutter.","homepage":"https://github.com/DavBfr/dart_pdf/tree/master/pdf","repository":"https://github.com/DavBfr/dart_pdf","issue_tracker":"https://github.com/DavBfr/dart_pdf/issues","version":"3.8.2","environment":{"sdk":">=2.12.0 <3.0.0"},"dependencies":{"archive":"^3.1.0","barcode":">=2.2.0 <3.0.0","crypto":"^3.0.0","image":">=3.0.1 <4.0.0","meta":">=1.3.0 <2.0.0","path_parsing":">=0.2.0 <2.0.0","vector_math":"^2.1.0","xml":">=5.1.0 <7.0.0"},"dev_dependencies":{"flutter_lints":"^1.0.4","test":">=1.16.0 <2.0.0"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/3.8.2.tar.gz","published":"2022-07-25T11:38:25.983876Z"},"versions":[{"version":"1.0.0","pubspec":{"version":"1.0.0","name":"pdf","dependencies":{"ttf_parser":"^1.0.0","vector_math":"^2.0.7","meta":"^1.1.5"},"author":"David PHAM-VAN ","description":"A pdf producer for Dart","homepage":"https://github.com/davbfr/dart_pdf","environment":{"sdk":">=1.8.0 <2.0.0"},"dev_dependencies":{"test":"any"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/1.0.0.tar.gz","published":"2018-07-16T21:12:28.894137Z"},{"version":"3.8.2","pubspec":{"name":"pdf","description":"A pdf producer for Dart. It can create pdf files for both web or flutter.","homepage":"https://github.com/DavBfr/dart_pdf/tree/master/pdf","repository":"https://github.com/DavBfr/dart_pdf","issue_tracker":"https://github.com/DavBfr/dart_pdf/issues","version":"3.8.2","environment":{"sdk":">=2.12.0 <3.0.0"},"dependencies":{"archive":"^3.1.0","barcode":">=2.2.0 <3.0.0","crypto":"^3.0.0","image":">=3.0.1 <4.0.0","meta":">=1.3.0 <2.0.0","path_parsing":">=0.2.0 <2.0.0","vector_math":"^2.1.0","xml":">=5.1.0 <7.0.0"},"dev_dependencies":{"flutter_lints":"^1.0.4","test":">=1.16.0 <2.0.0"}},"archive_url":"https://pub.dartlang.org/packages/pdf/versions/3.8.2.tar.gz","published":"2022-07-25T11:38:25.983876Z"}]} + diff --git a/swh/loader/package/pubdev/tests/test_pubdev.py b/swh/loader/package/pubdev/tests/test_pubdev.py new file mode 100644 index 0000000..207acd5 --- /dev/null +++ b/swh/loader/package/pubdev/tests/test_pubdev.py @@ -0,0 +1,258 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +from swh.loader.package.pubdev.loader import PubDevLoader +from swh.loader.package.utils import EMPTY_AUTHOR +from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats +from swh.model.hashutil import hash_to_bytes +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + TimestampWithTimezone, +) + +EXPECTED_PACKAGES = [ + { + "url": "https://pub.dev/api/packages/Autolinker", # one version + }, + { + "url": "https://pub.dev/api/packages/pdf", # multiple versions + }, + { + "url": "https://pub.dev/api/packages/bezier", # multiple authors + }, + { + "url": "https://pub.dev/api/packages/authentication", # empty author + }, +] + + +def test_get_versions(requests_mock_datadir, swh_storage): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[1]["url"], + ) + assert loader.get_versions() == [ + "1.0.0", + "3.8.2", + ] + + +def test_get_default_version(requests_mock_datadir, swh_storage): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[1]["url"], + ) + assert loader.get_default_version() == "3.8.2" + + +def test_pubdev_loader_load_one_version(datadir, requests_mock_datadir, swh_storage): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + ) + load_status = loader.load() + assert load_status["status"] == "eventful" + assert load_status["snapshot_id"] is not None + + expected_snapshot_id = "245092931ba809e6c54ebda8f865fb5a969a4134" + expected_release_id = "919f267ea050539606344d49d14bf594c4386e5a" + + assert expected_snapshot_id == load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(load_status["snapshot_id"]), + branches={ + b"releases/0.1.1": SnapshotBranch( + target=hash_to_bytes(expected_release_id), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/0.1.1", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + stats = get_stats(swh_storage) + assert { + "content": 1, + "directory": 1, + "origin": 1, + "origin_visit": 1, + "release": 1, + "revision": 0, + "skipped_content": 0, + "snapshot": 1, + } == stats + + assert swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] == Release( + name=b"0.1.1", + message=b"Synthetic release for pub.dev source package Autolinker version" + b" 0.1.1\n\nPort of Autolinker.js to dart\n", + target=hash_to_bytes("3fb6d4f2c0334d1604357ae92b2dd38a55a78194"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"hackcave ", + name=b"hackcave", + email=b"hackers@hackcave.org", + ), + date=TimestampWithTimezone.from_iso8601("2014-12-24T22:34:02.534090+00:00"), + id=hash_to_bytes(expected_release_id), + ) + + assert_last_visit_matches( + swh_storage, + url=EXPECTED_PACKAGES[0]["url"], + status="full", + type="pubdev", + snapshot=expected_snapshot.id, + ) + + +def test_pubdev_loader_load_multiple_versions( + datadir, requests_mock_datadir, swh_storage +): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[1]["url"], + ) + load_status = loader.load() + + assert load_status["status"] == "eventful" + assert load_status["snapshot_id"] is not None + + expected_snapshot_id = "43d5b68a9fa973aa95e56916aaef70841ccbc2a0" + + assert expected_snapshot_id == load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(load_status["snapshot_id"]), + branches={ + b"releases/1.0.0": SnapshotBranch( + target=hash_to_bytes("fbf8e40af675096681954553d737861e10b57216"), + target_type=TargetType.RELEASE, + ), + b"releases/3.8.2": SnapshotBranch( + target=hash_to_bytes("627a5d586e3fb4e7319b17f1aee268fe2fb8e01c"), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/3.8.2", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + stats = get_stats(swh_storage) + assert { + "content": 1 + 1, + "directory": 1 + 1, + "origin": 1, + "origin_visit": 1, + "release": 1 + 1, + "revision": 0, + "skipped_content": 0, + "snapshot": 1, + } == stats + + assert_last_visit_matches( + swh_storage, + url=EXPECTED_PACKAGES[1]["url"], + status="full", + type="pubdev", + snapshot=expected_snapshot.id, + ) + + +def test_pubdev_loader_multiple_authors(datadir, requests_mock_datadir, swh_storage): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[2]["url"], + ) + load_status = loader.load() + assert load_status["status"] == "eventful" + assert load_status["snapshot_id"] is not None + + expected_snapshot_id = "4fa9f19d1d6ccc70921c8c50b278f510db63aa36" + expected_release_id = "538c98fd69a42d8d0561a7ca95b354de2143a3ab" + + assert expected_snapshot_id == load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(load_status["snapshot_id"]), + branches={ + b"releases/1.1.5": SnapshotBranch( + target=hash_to_bytes(expected_release_id), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/1.1.5", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + release = swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] + assert release.author == Person( + fullname=b"Aaron Barrett ", + name=b"Aaron Barrett", + email=b"aaron@aaronbarrett.com", + ) + + +def test_pubdev_loader_empty_author(datadir, requests_mock_datadir, swh_storage): + loader = PubDevLoader( + swh_storage, + url=EXPECTED_PACKAGES[3]["url"], + ) + + load_status = loader.load() + assert load_status["status"] == "eventful" + assert load_status["snapshot_id"] is not None + + expected_snapshot_id = "0c7fa6b9fced23c648d2093ad5597622683f8aed" + expected_release_id = "7d8c05181069aa1049a3f0bc1d13bedc34625d47" + + assert expected_snapshot_id == load_status["snapshot_id"] + + expected_snapshot = Snapshot( + id=hash_to_bytes(load_status["snapshot_id"]), + branches={ + b"releases/0.0.1": SnapshotBranch( + target=hash_to_bytes(expected_release_id), + target_type=TargetType.RELEASE, + ), + b"HEAD": SnapshotBranch( + target=b"releases/0.0.1", + target_type=TargetType.ALIAS, + ), + }, + ) + + check_snapshot(expected_snapshot, swh_storage) + + release = swh_storage.release_get([hash_to_bytes(expected_release_id)])[0] + assert release.author == EMPTY_AUTHOR + + +def test_pubdev_invalid_origin(swh_storage, requests_mock_datadir): + loader = PubDevLoader( + swh_storage, + "http://nowhere/api/packages/42", + ) + + load_status = loader.load() + assert load_status["status"] == "failed" diff --git a/swh/loader/package/pubdev/tests/test_tasks.py b/swh/loader/package/pubdev/tests/test_tasks.py new file mode 100644 index 0000000..6c7bb57 --- /dev/null +++ b/swh/loader/package/pubdev/tests/test_tasks.py @@ -0,0 +1,23 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + + +def test_tasks_pubdev_loader( + mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config +): + mock_load = mocker.patch("swh.loader.package.pubdev.loader.PubDevLoader.load") + mock_load.return_value = {"status": "eventful"} + + res = swh_scheduler_celery_app.send_task( + "swh.loader.package.pubdev.tasks.LoadPubDev", + kwargs=dict( + url="some-url/api/packages/some-package", + ), + ) + assert res + res.wait() + assert res.successful() + assert mock_load.called + assert res.result == {"status": "eventful"}