Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/npm/client.py
Show First 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | def prepare_package_versions(self, known_versions=None): | ||||
for version, package_source_data in sorted(new_versions.items()): | for version, package_source_data in sorted(new_versions.items()): | ||||
# filter out version with missing tarball (cases exist), | # filter out version with missing tarball (cases exist), | ||||
# package visit will be marked as partial at the end of | # package visit will be marked as partial at the end of | ||||
# the loading process | # the loading process | ||||
tarball_url = package_source_data['url'] | tarball_url = package_source_data['url'] | ||||
tarball_request = self._request(tarball_url, | tarball_request = self._request(tarball_url, | ||||
throw_error=False) | throw_error=False) | ||||
if tarball_request.status_code == 404: | if tarball_request.status_code == 404: | ||||
self.log.debug('Tarball url %s returns a 404 error.' % | self.log.debug('Tarball url %s returns a 404 error.', | ||||
tarball_url) | tarball_url) | ||||
self.log.debug(('Version %s of %s package will be missing and ' | self.log.debug(('Version %s of %s package will be missing and ' | ||||
'the visit will be marked as partial.') % | 'the visit will be marked as partial.'), | ||||
(version[0], self.package)) | version[0], self.package) | ||||
continue | continue | ||||
version_data = self.package_metadata['versions'][version[0]] | version_data = self.package_metadata['versions'][version[0]] | ||||
yield self._prepare_package_version(package_source_data, | yield self._prepare_package_version(package_source_data, | ||||
version_data) | version_data) | ||||
def _prepare_package_version(self, package_source_data, version_data): | def _prepare_package_version(self, package_source_data, version_data): | ||||
version = version_data['version'] | version = version_data['version'] | ||||
self.log.debug('Processing version %s for npm package %s' % | self.log.debug('Processing version %s for npm package %s', | ||||
(version, self.package)) | version, self.package) | ||||
# create temp dir to download and extract package tarball | # create temp dir to download and extract package tarball | ||||
path = os.path.join(self.temp_dir, version) | path = os.path.join(self.temp_dir, version) | ||||
os.makedirs(path, exist_ok=True) | os.makedirs(path, exist_ok=True) | ||||
filepath = os.path.join(path, package_source_data['filename']) | filepath = os.path.join(path, package_source_data['filename']) | ||||
self.log.debug('Package local path: %s' % filepath) | |||||
# download tarball | # download tarball | ||||
url = package_source_data['url'] | url = package_source_data['url'] | ||||
response = self._request(url) | response = self._request(url) | ||||
hash_names = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'} | hash_names = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'} | ||||
h = hashutil.MultiHash(hash_names=hash_names) | h = hashutil.MultiHash(hash_names=hash_names) | ||||
with open(filepath, 'wb') as f: | with open(filepath, 'wb') as f: | ||||
for chunk in response.iter_content(chunk_size=None): | for chunk in response.iter_content(chunk_size=None): | ||||
h.update(chunk) | h.update(chunk) | ||||
f.write(chunk) | f.write(chunk) | ||||
# check tarball integrity | # check tarball integrity | ||||
hashes = h.hexdigest() | hashes = h.hexdigest() | ||||
expected_digest = package_source_data['sha1'] | expected_digest = package_source_data['sha1'] | ||||
actual_digest = hashes['sha1'] | actual_digest = hashes['sha1'] | ||||
if actual_digest != expected_digest: | if actual_digest != expected_digest: | ||||
raise ValueError( | raise ValueError( | ||||
'%s %s: Checksum mismatched: %s != %s' % ( | '%s %s: Checksum mismatched: %s != %s' % ( | ||||
self.package, version, expected_digest, actual_digest)) | self.package, version, expected_digest, actual_digest)) | ||||
# uncompress tarball | # uncompress tarball | ||||
tarball.uncompress(filepath, path) | tarball.uncompress(filepath, path) | ||||
# remove tarball | |||||
os.remove(filepath) | |||||
# do not archive useless tarball root directory | # do not archive useless tarball root directory | ||||
package_path = os.path.join(path, 'package') | package_path = os.path.join(path, 'package') | ||||
# some old packages use their name as root directory | # some old packages use their name as root directory | ||||
if not os.path.exists(package_path): | if not os.path.exists(package_path): | ||||
ver_pos = package_source_data['filename'].rfind(version) | ver_pos = package_source_data['filename'].rfind(version) | ||||
package_name = package_source_data['filename'][:ver_pos-1] | package_name = package_source_data['filename'][:ver_pos-1] | ||||
package_path = os.path.join(path, package_name) | package_path = os.path.join(path, package_name) | ||||
# fallback: archive root tarball directory | # fallback: archive root tarball directory | ||||
if not os.path.exists(package_path): | if not os.path.exists(package_path): | ||||
package_path = path | package_path = path | ||||
olasd: while you're moving this around you can switch the interpolation to a comma | |||||
self.log.debug('Package local path: %s', package_path) | |||||
package_source_data.update(hashes) | package_source_data.update(hashes) | ||||
# parse package.json file to add its content to revision metadata | # parse package.json file to add its content to revision metadata | ||||
package_json_path = os.path.join(package_path, 'package.json') | package_json_path = os.path.join(package_path, 'package.json') | ||||
package_json = {} | package_json = {} | ||||
with open(package_json_path, 'rb') as package_json_file: | with open(package_json_path, 'rb') as package_json_file: | ||||
package_json_bytes = package_json_file.read() | package_json_bytes = package_json_file.read() | ||||
file_encoding = chardet.detect(package_json_bytes)['encoding'] | file_encoding = chardet.detect(package_json_bytes)['encoding'] | ||||
Show All 13 Lines |
while you're moving this around you can switch the interpolation to a comma