Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/npm/loader.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import logging | import logging | ||||
import os | import os | ||||
import re | import re | ||||
from codecs import BOM_UTF8 | from codecs import BOM_UTF8 | ||||
from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional | from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional | ||||
import chardet | import chardet | ||||
import iso8601 | import iso8601 | ||||
from urllib.parse import quote | |||||
from swh.model.identifiers import normalize_timestamp | from swh.model.identifiers import normalize_timestamp | ||||
from swh.loader.package.loader import PackageLoader | from swh.loader.package.loader import PackageLoader | ||||
from swh.loader.package.utils import api_info, release_name | from swh.loader.package.utils import api_info, release_name | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
_EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None} | _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None} | ||||
# https://github.com/jonschlinkert/author-regex | # https://github.com/jonschlinkert/author-regex | ||||
_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)' | _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)' | ||||
class NpmLoader(PackageLoader): | class NpmLoader(PackageLoader): | ||||
visit_type = 'npm' | visit_type = 'npm' | ||||
def __init__(self, package_name, package_url, package_metadata_url): | def __init__(self, url: str): | ||||
super().__init__(url=package_url) | """Constructor | ||||
self.provider_url = package_metadata_url | |||||
self._info = None | Args | ||||
str: origin url (e.g. https://www.npmjs.com/package/<package-name>) | |||||
""" | |||||
super().__init__(url=url) | |||||
package_name = url.split('https://www.npmjs.com/package/')[1] | |||||
safe_name = quote(package_name, safe='') | |||||
self.provider_url = f'https://replicate.npmjs.com/{safe_name}/' | |||||
self._info: Dict[str, Any] = {} | |||||
self._versions = None | self._versions = None | ||||
# if package_url is None: | |||||
# package_url = 'https://www.npmjs.com/package/%s' % package_name | |||||
# if package_metadata_url is None: | |||||
# package_metadata_url = 'https://replicate.npmjs.com/%s/' %\ | |||||
# quote(package_name, safe='') | |||||
@property | @property | ||||
def info(self) -> Dict: | def info(self) -> Dict[str, Any]: | ||||
anlambert: nitpick: it's better to use concrete type (here `Dict`) when typing return value | |||||
"""Return the project metadata information (fetched from npm registry) | """Return the project metadata information (fetched from npm registry) | ||||
""" | """ | ||||
if not self._info: | if not self._info: | ||||
self._info = api_info(self.provider_url) | self._info = api_info(self.provider_url) | ||||
return self._info | return self._info | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
▲ Show 20 Lines • Show All 284 Lines • Show Last 20 Lines |
nitpick: it's better to use concrete type (here Dict) when typing return value