Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/npm/client.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | |||||
import logging | import logging | ||||
import os | import os | ||||
import chardet | |||||
import requests | import requests | ||||
from swh.core import tarball | from swh.core import tarball | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.loader.npm.utils import extract_npm_package_author | from swh.loader.npm.utils import extract_npm_package_author, load_json | ||||
class NpmClient: | class NpmClient: | ||||
""" | """ | ||||
Helper class internally used by the npm loader to fetch | Helper class internally used by the npm loader to fetch | ||||
metadata for a specific package hosted on the npm registry. | metadata for a specific package hosted on the npm registry. | ||||
Args: | Args: | ||||
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | def _prepare_package_version(self, package_source_data, version_data): | ||||
# uncompress tarball | # uncompress tarball | ||||
tarball.uncompress(filepath, path) | tarball.uncompress(filepath, path) | ||||
# remove tarball | # remove tarball | ||||
os.remove(filepath) | os.remove(filepath) | ||||
# do not archive useless tarball root directory | # do not archive useless tarball root directory | ||||
package_path = os.path.join(path, 'package') | package_path = os.path.join(path, 'package') | ||||
# some old packages use their name as root directory | # some old packages use a root directory with a different name | ||||
if not os.path.exists(package_path): | if not os.path.exists(package_path): | ||||
ver_pos = package_source_data['filename'].rfind(version) | for _, dirnames, _ in os.walk(path): | ||||
package_name = package_source_data['filename'][:ver_pos-1] | if dirnames: | ||||
package_path = os.path.join(path, package_name) | package_path = os.path.join(path, dirnames[0]) | ||||
# fallback: archive root tarball directory | break | ||||
if not os.path.exists(package_path): | |||||
package_path = path | |||||
self.log.debug('Package local path: %s', package_path) | self.log.debug('Package local path: %s', package_path) | ||||
package_source_data.update(hashes) | package_source_data.update(hashes) | ||||
# parse package.json file to add its content to revision metadata | # parse package.json file to add its content to revision metadata | ||||
package_json_path = os.path.join(package_path, 'package.json') | package_json_path = os.path.join(package_path, 'package.json') | ||||
package_json = {} | package_json = {} | ||||
with open(package_json_path, 'rb') as package_json_file: | with open(package_json_path, 'rb') as package_json_file: | ||||
package_json_bytes = package_json_file.read() | package_json_bytes = package_json_file.read() | ||||
file_encoding = chardet.detect(package_json_bytes)['encoding'] | package_json = load_json(package_json_bytes) | ||||
package_json = json.loads(package_json_bytes.decode(file_encoding)) | |||||
# extract author from package.json | # extract author from package.json | ||||
author = extract_npm_package_author(package_json) | author = extract_npm_package_author(package_json) | ||||
return (package_json, author, package_source_data, package_path) | return (package_json, author, package_source_data, package_path) | ||||
def _request(self, url, throw_error=True): | def _request(self, url, throw_error=True): | ||||
response = self.session.get(url, **self.params, stream=True) | response = self.session.get(url, **self.params, stream=True) | ||||
if response.status_code != 200 and throw_error: | if response.status_code != 200 and throw_error: | ||||
raise ValueError("Fail to query '%s'. Reason: %s" % ( | raise ValueError("Fail to query '%s'. Reason: %s" % ( | ||||
url, response.status_code)) | url, response.status_code)) | ||||
return response | return response |