Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312208
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
36 KB
Subscribers
None
View Options
diff --git a/swh/loader/npm/client.py b/swh/loader/npm/client.py
index e5a5522..20dc85d 100644
--- a/swh/loader/npm/client.py
+++ b/swh/loader/npm/client.py
@@ -1,214 +1,221 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
import os
import requests
from swh.core import tarball
from swh.model import hashutil
from swh.loader.npm.utils import extract_npm_package_author, load_json
class NpmClient:
"""
Helper class internally used by the npm loader to fetch
metadata for a specific package hosted on the npm registry.
Args:
temp_dir (str): Path to the temporary disk location used
to uncompress the package tarballs
"""
def __init__(self, temp_dir, log=None):
self.root_temp_dir = temp_dir
self.session = requests.session()
self.params = {
'headers': {
'User-Agent': 'Software Heritage npm loader'
}
}
self.log = log or logging
def fetch_package_metadata(self, package_metadata_url):
"""
Fetch metadata for a given package and make it the focused one.
This must be called prior any other operations performed
by the other methods below.
Args:
package_metadata_url: the package metadata url provided
by the npm loader
"""
self.package_metadata_url = package_metadata_url
self.package_metadata = self._request(self.package_metadata_url).json()
self.package = self.package_metadata['name']
self.temp_dir = os.path.join(self.root_temp_dir, self.package)
def latest_package_version(self):
"""
Return the last released version of the focused package.
Returns:
str: the last releases package version
"""
latest = ''
if 'latest' in self.package_metadata['dist-tags']:
latest = self.package_metadata['dist-tags']['latest']
return latest
def package_versions(self, known_versions=None):
"""
Return the available versions for the focused package.
Args:
known_versions (dict): may be provided by the loader, it enables
to filter out versions already ingested in the archive.
Returns:
dict: A dict whose keys are Tuple[version, tarball_sha1] and
values dicts with the following entries:
* **name**: the package name
* **version**: the package version
* **filename**: the package source tarball filename
* **sha1**: the package source tarball sha1 checksum
* **date**: the package release date
* **url**: the package source tarball download url
"""
versions = {}
if 'versions' in self.package_metadata:
for version, data in self.package_metadata['versions'].items():
sha1 = data['dist']['shasum']
key = (version, sha1)
if known_versions and key in known_versions:
continue
tarball_url = data['dist']['tarball']
filename = os.path.basename(tarball_url)
date = self.package_metadata['time'][version]
versions[key] = {
'name': self.package,
'version': version,
'filename': filename,
'sha1': sha1,
'date': date,
'url': tarball_url
}
return versions
def prepare_package_versions(self, known_versions=None):
"""
Instantiate a generator that will process a specific package released
version at each iteration step. The following operations will be
performed:
1. Create a temporary directory to download and extract the
release tarball
2. Download the tarball
3. Check downloaded tarball integrity
4. Uncompress the tarball
5. Parse ``package.json`` file associated to the package version
6. Extract author from the parsed ``package.json`` file
Args:
known_versions (dict): may be provided by the loader, it enables
to filter out versions already ingested in the archive.
Yields:
Tuple[dict, dict, dict, str]: tuples containing the following
members:
* a dict holding the parsed ``package.json`` file
* a dict holding package author information
* a dict holding package tarball information
* a string holding the path of the uncompressed package to
load into the archive
"""
new_versions = self.package_versions(known_versions)
for version, package_source_data in sorted(new_versions.items()):
# filter out version with missing tarball (cases exist),
# package visit will be marked as partial at the end of
# the loading process
tarball_url = package_source_data['url']
tarball_request = self._request(tarball_url,
throw_error=False)
if tarball_request.status_code == 404:
self.log.debug('Tarball url %s returns a 404 error.',
tarball_url)
self.log.debug(('Version %s of %s package will be missing and '
'the visit will be marked as partial.'),
version[0], self.package)
continue
version_data = self.package_metadata['versions'][version[0]]
yield self._prepare_package_version(package_source_data,
version_data)
def _prepare_package_version(self, package_source_data, version_data):
version = version_data['version']
self.log.debug('Processing version %s for npm package %s',
version, self.package)
# create temp dir to download and extract package tarball
path = os.path.join(self.temp_dir, version)
os.makedirs(path, exist_ok=True)
filepath = os.path.join(path, package_source_data['filename'])
# download tarball
url = package_source_data['url']
response = self._request(url)
hash_names = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'}
h = hashutil.MultiHash(hash_names=hash_names)
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=None):
h.update(chunk)
f.write(chunk)
# check tarball integrity
hashes = h.hexdigest()
expected_digest = package_source_data['sha1']
actual_digest = hashes['sha1']
if actual_digest != expected_digest:
raise ValueError(
'%s %s: Checksum mismatched: %s != %s' % (
self.package, version, expected_digest, actual_digest))
# uncompress tarball
- tarball.uncompress(filepath, path)
+ tarball_invalid = False
+ try:
+ tarball.uncompress(filepath, path)
+ except Exception:
+ tarball_invalid = True
# remove tarball
os.remove(filepath)
+ if tarball_invalid:
+ return (None, None, None, None)
+
# do not archive useless tarball root directory
package_path = os.path.join(path, 'package')
# some old packages use a root directory with a different name
if not os.path.exists(package_path):
for _, dirnames, _ in os.walk(path):
if dirnames:
package_path = os.path.join(path, dirnames[0])
break
self.log.debug('Package local path: %s', package_path)
package_source_data.update(hashes)
# parse package.json file to add its content to revision metadata
package_json_path = os.path.join(package_path, 'package.json')
package_json = {}
with open(package_json_path, 'rb') as package_json_file:
package_json_bytes = package_json_file.read()
package_json = load_json(package_json_bytes)
# extract author from package.json
author = extract_npm_package_author(package_json)
return (package_json, author, package_source_data, package_path)
def _request(self, url, throw_error=True):
response = self.session.get(url, **self.params, stream=True)
if response.status_code != 200 and throw_error:
raise ValueError("Fail to query '%s'. Reason: %s" % (
url, response.status_code))
return response
diff --git a/swh/loader/npm/loader.py b/swh/loader/npm/loader.py
index 8883f4e..6e0695f 100644
--- a/swh/loader/npm/loader.py
+++ b/swh/loader/npm/loader.py
@@ -1,318 +1,322 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import shutil
from tempfile import mkdtemp
from urllib.parse import quote
from dateutil import parser as date_parser
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.core.loader import BufferedLoader
from swh.model.from_disk import Directory
from swh.model.identifiers import (
revision_identifier, snapshot_identifier,
identifier_to_bytes, normalize_timestamp
)
from swh.storage.algos.snapshot import snapshot_get_all_branches
from swh.loader.npm.client import NpmClient
TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.npm.'
class NpmLoader(BufferedLoader):
"""
Loader for ingesting source packages from the npm registry
into the Software Heritage archive.
"""
CONFIG_BASE_FILENAME = 'loader/npm'
ADDITIONAL_CONFIG = {
'temp_directory': ('str', '/tmp/swh.loader.npm/'),
'debug': ('bool', False)
}
def __init__(self):
super().__init__(logging_class='swh.loader.npm.NpmLoader')
self.origin_id = None
temp_directory = self.config['temp_directory']
os.makedirs(temp_directory, exist_ok=True)
self.temp_directory = mkdtemp(suffix='-%s' % os.getpid(),
prefix=TEMPORARY_DIR_PREFIX_PATTERN,
dir=temp_directory)
self.debug = self.config['debug']
self.done = False
self.npm_client = NpmClient(self.temp_directory, self.log)
def pre_cleanup(self):
"""
To prevent disk explosion if some other workers exploded
in mid-air (OOM killed), we try and clean up dangling files.
"""
if self.debug:
self.log.warning('DEBUG: will not pre-clean up temp dir %s',
self.temp_directory)
return
clean_dangling_folders(self.config['temp_directory'],
pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
log=self.log)
def cleanup(self):
"""
Clean up temporary disk use after downloading and extracting
npm source package tarballs.
"""
if self.debug:
self.log.warning('DEBUG: will not clean up temp dir %s',
self.temp_directory)
return
if os.path.exists(self.temp_directory):
self.log.debug('Clean up %s', self.temp_directory)
shutil.rmtree(self.temp_directory)
def load(self, package_name, package_url=None,
package_metadata_url=None):
"""
Loader entrypoint to ingest source tarballs for a npm package.
Args:
package_name (str): the name of the npm package
package_url (str): the url of the package description,
if not provided the following one will be used:
https://www.npmjs.com/package/<package_name>
package_metadata_url (str): the url for the package JSON metadata,
if not provided the following one will be used:
https://replicate.npmjs.com/<package_name>/
"""
if package_url is None:
package_url = 'https://www.npmjs.com/package/%s' % package_name
if package_metadata_url is None:
package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
quote(package_name, safe='')
return super().load(package_name, package_url, package_metadata_url)
def prepare_origin_visit(self, package_name, package_url,
package_metadata_url):
"""
Prepare npm package visit.
Args:
package_name (str): the name of the npm package
package_url (str): the url of the package description
package_metadata_url (str): the url for the package JSON metadata
"""
# reset statuses
self._load_status = 'uneventful'
self._visit_status = 'full'
self.done = False
# fetch the npm package metadata from the registry
self.npm_client.fetch_package_metadata(package_metadata_url)
self.origin = {
'url': package_url,
'type': 'npm',
}
self.visit_date = None # loader core will populate it
def _known_versions(self, last_snapshot):
"""
Retrieve the known release versions for the npm package
(i.e. those already ingested into the archive).
Args
last_snapshot (dict): Last snapshot for the visit
Returns:
dict: Dict whose keys are Tuple[filename, sha1] and values
are revision ids.
"""
if not last_snapshot or 'branches' not in last_snapshot:
return {}
revs = [rev['target']
for rev in last_snapshot['branches'].values()
if rev and rev['target_type'] == 'revision']
known_revisions = self.storage.revision_get(revs)
ret = {}
for revision in known_revisions:
if not revision:
continue
if 'package_source' in revision['metadata']:
package = revision['metadata']['package_source']
ret[(package['version'], package['sha1'])] = revision['id']
return ret
- def _last_snapshot(self):
+ def last_snapshot(self):
"""
Retrieve the last snapshot of the npm package if any.
"""
snapshot = self.storage.snapshot_get_latest(self.origin_id)
if snapshot and snapshot.pop('next_branch', None):
snapshot = snapshot_get_all_branches(self.storage, snapshot['id'])
return snapshot
def prepare(self, package_name, package_url, package_metadata_url):
"""
Prepare effective loading of source tarballs for a npm
package.
Args:
package_name (str): the name of the npm package
package_url (str): the url of the package description
package_metadata_url (str): the url for the package JSON metadata
"""
self.package_name = package_name
self.origin_url = package_url
self.package_contents = []
self.package_directories = []
self.package_revisions = []
self.package_load_status = 'uneventful'
self.package_visit_status = 'full'
- last_snapshot = self._last_snapshot()
+ last_snapshot = self.last_snapshot()
self.known_versions = self._known_versions(last_snapshot)
self.new_versions = \
self.npm_client.prepare_package_versions(self.known_versions)
def fetch_data(self):
"""
Called once per package release version to process.
This will for each call:
- download a tarball associated to a package release version
- uncompress it and compute the necessary information
- compute the swh objects
Returns:
True as long as data to fetch exist
"""
data = None
if self.done:
return False
try:
data = next(self.new_versions)
self.package_load_status = 'eventful'
except StopIteration:
self.done = True
return False
package_metadata, author, package_source_data, dir_path = data
+ # package release tarball was corrupted
+ if package_metadata is None:
+ return not self.done
+
dir_path = dir_path.encode('utf-8')
directory = Directory.from_disk(path=dir_path, data=True)
objects = directory.collect()
self.package_contents = objects['content'].values()
self.package_directories = objects['directory'].values()
date = date_parser.parse(package_source_data['date'])
date = normalize_timestamp(int(date.timestamp()))
message = package_source_data['version'].encode('ascii')
revision = {
'synthetic': True,
'metadata': {
'package_source': package_source_data,
'package': package_metadata,
},
'author': author,
'date': date,
'committer': author,
'committer_date': date,
'message': message,
'directory': directory.hash,
'parents': [],
'type': 'tar',
}
revision['id'] = identifier_to_bytes(revision_identifier(revision))
self.package_revisions.append(revision)
package_key = (package_source_data['version'],
package_source_data['sha1'])
self.known_versions[package_key] = revision['id']
self.log.debug('Removing unpacked package files at %s', dir_path)
shutil.rmtree(dir_path)
return not self.done
def _target_from_version(self, version, sha1):
"""
Return revision information if any for a given package version.
"""
target = self.known_versions.get((version, sha1))
return {
'target': target,
'target_type': 'revision',
} if target else None
def _generate_and_load_snapshot(self):
"""
Generate snapshot for the npm package visit.
"""
branches = {}
latest_version = self.npm_client.latest_package_version()
for version_data in self.npm_client.package_versions().values():
version = version_data['version']
sha1 = version_data['sha1']
branch_name = ('releases/%s' % version).encode('ascii')
target = self._target_from_version(version, sha1)
branches[branch_name] = target
if version == latest_version:
branches[b'HEAD'] = {
'target_type': 'alias',
'target': branch_name,
}
if not target:
self.package_visit_status = 'partial'
snapshot = {
'branches': branches,
}
snapshot['id'] = identifier_to_bytes(snapshot_identifier(snapshot))
self.maybe_load_snapshot(snapshot)
def store_data(self):
"""
Send collected objects to storage.
"""
self.maybe_load_contents(self.package_contents)
self.maybe_load_directories(self.package_directories)
self.maybe_load_revisions(self.package_revisions)
if self.done:
self._generate_and_load_snapshot()
self.flush()
def load_status(self):
return {
'status': self.package_load_status,
}
def visit_status(self):
return self.package_visit_status
if __name__ == '__main__':
import logging
import sys
logging.basicConfig(level=logging.DEBUG)
if len(sys.argv) != 2:
logging.error('Usage: %s <package-name>' % sys.argv[0])
sys.exit(1)
package_name = sys.argv[1]
loader = NpmLoader()
loader.load(package_name)
diff --git a/swh/loader/npm/tests/test_loader.py b/swh/loader/npm/tests/test_loader.py
index 7bb1216..cf8fa50 100644
--- a/swh/loader/npm/tests/test_loader.py
+++ b/swh/loader/npm/tests/test_loader.py
@@ -1,354 +1,384 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import unittest
import requests_mock
+from unittest.mock import patch
+
+from swh.core import tarball
from swh.loader.core.tests import BaseLoaderStorageTest
from swh.loader.npm.loader import NpmLoader
from swh.model.identifiers import snapshot_identifier
from .common import (
empty_package, package, package_non_utf8_encoding,
package_url, package_metadata_url, package_metadata_file,
init_test_data,
)
_LOADER_TESTS_CONFIG = {
'content_packet_size': 10000,
'content_packet_size_bytes': 104857600,
'content_size_limit': 104857600,
'debug': False,
'directory_packet_size': 25000,
'occurrence_packet_size': 100000,
'release_packet_size': 100000,
'revision_packet_size': 100000,
'send_contents': True,
'send_directories': True,
'send_releases': True,
'send_revisions': True,
'send_snapshot': True,
'storage': {'args': {}, 'cls': 'memory'},
'temp_directory': '/tmp/swh.loader.pypi/'
}
_expected_new_contents_first_visit = [
'4ce3058e16ab3d7e077f65aabf855c34895bf17c',
'858c3ceee84c8311adc808f8cdb30d233ddc9d18',
'0fa33b4f5a4e0496da6843a38ff1af8b61541996',
'85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
'9163ac8025923d5a45aaac482262893955c9b37b',
'692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
'18c03aac6d3e910efb20039c15d70ab5e0297101',
'41265c42446aac17ca769e67d1704f99e5a1394d',
'783ff33f5882813dca9239452c4a7cadd4dba778',
'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
'112d1900b4c2e3e9351050d1b542c9744f9793f3',
'5439bbc4bd9a996f1a38244e6892b71850bc98fd',
'd83097a2f994b503185adf4e719d154123150159',
'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
'b3523a26f7147e4af40d9d462adaae6d49eda13e',
'cd065fb435d6fb204a8871bcd623d0d0e673088c',
'2854a40855ad839a54f4b08f5cff0cf52fca4399',
'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
'0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
'0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
'585fc5caab9ead178a327d3660d35851db713df1',
'e8cd41a48d79101977e3036a87aeb1aac730686f',
'5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
'9c3cc2763bf9e9e37067d3607302c4776502df98',
'3649a68410e354c83cd4a38b66bd314de4c8f5c9',
'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
'078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
'38de737da99514de6559ff163c988198bc91367a',
]
_expected_new_contents_second_visit = [
'135cb2000df4dfcfd8012d18ba23a54d6f89b105',
'1e8e0943ee08958ab0a710dbba110f88068cab74',
'25c8e3104daec559482ee1b480262be5da993e0e',
'51245e983ebf91468fc59a072fcdddb837676abb',
'55833e56224af0cf6fbbdca586c79d1e0e257b37',
'785e0e16f2753b7683dd5f9e1bd1b98287334e6a',
'876d655e927a95c7511853850c9c078be5f1a44b',
'a2b331450408a22d3026c0444990b3235017c7e1',
'a3f4f4d2055b21445defff5dada6cddb7c815f15',
'b3aeed7cf5be703bd8a029928b431eecf5d205af',
'b93d5e2006138f03e8ae84d0b72350fe6c37753a',
'd196b2fa26032df86c8470e9f47a45cdeb5e23a2',
'e3bae46f8f4f0347dab7ad567bf2f64bff3c1c53',
'f2746efa0b38dcd3bbe7591cc075ee4a618c5943'
]
_expected_new_directories_first_visit = [
'80579be563e2ef3e385226fe7a3f079b377f142c',
'3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
'bcad03ce58ac136f26f000990fc9064e559fe1c0',
'5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
'584b5b4b6cf7f038095e820b99386a9c232de931',
'184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
'bb5f4ee143c970367eb409f2e4c1104898048b9d',
'1b95491047add1103db0dfdfa84a9735dcb11e88',
'a00c6de13471a2d66e64aca140ddb21ef5521e62',
'5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
'202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
'775cc516543be86c15c1dc172f49c0d4e6e78235',
'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
]
_expected_new_directories_second_visit = [
'025bca14fcc9f84b7ebb09df4ec1b3fadd89a74c',
'14f88da1a1efe2efe1bde2da9245ea1346ed49a0',
'513965efeb9dc5832a8c69f354e57c0e1df4cb31',
'5281878409fa2ab0d35feeef2fe6463346f4418d',
'60b7c18bc5922a81060425edd7a623a4759ba657',
'8c81ff424af1c26ff913e16d340f06ea7da0171c',
'8c96171056490917a3b953c2a70cecace44f3606',
'8faa8fbcbba90c36ab7dd076fd8fda5a9c405f8a',
'b1224309f00536ea6f421af9f690bffab7bdb735',
'c2f820f60db474714853c59765b0f914feb0fcfe',
'e267845618e77ae0db8ca05428c0ee421df06a11',
'e5a783a68869f7bc2fb9126b9100d98f18ea747c'
]
_expected_new_revisions_first_visit = {
'969e0340155266e2a5b851e428e602152c08385f':
'3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
'c9b9ae8360ce8a1e22867226987f61163c12d4c4':
'5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
'47831123f42cea24d6023e5570825cb62c3c1898':
'5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
}
_expected_new_revisions_second_visit = {
'a4ffa8770a901c895a67bec7a501036e83aae256':
'8faa8fbcbba90c36ab7dd076fd8fda5a9c405f8a',
'e6e41e3deb8df2b183f2d45e8f2e49a991c069a9':
'b1224309f00536ea6f421af9f690bffab7bdb735',
'ca12202b8a0eee7364204687649146e73e19ed32':
'025bca14fcc9f84b7ebb09df4ec1b3fadd89a74c'
}
_expected_new_snapshot_first_visit = 'f2f59503de5a8aeabe7b68ce761d9e112713d996'
_expected_branches_first_visit = {
'HEAD': {
'target': 'releases/0.0.4',
'target_type': 'alias'
},
'releases/0.0.2': {
'target': '969e0340155266e2a5b851e428e602152c08385f',
'target_type': 'revision'
},
'releases/0.0.3': {
'target': 'c9b9ae8360ce8a1e22867226987f61163c12d4c4',
'target_type': 'revision'
},
'releases/0.0.4': {
'target': '47831123f42cea24d6023e5570825cb62c3c1898',
'target_type': 'revision'
}
}
_expected_new_snapshot_second_visit = '57957179a0ea016fcf9d02874b68547f2bd5698d' # noqa
_expected_branches_second_visit = {
'HEAD': {
'target': 'releases/0.2.0',
'target_type': 'alias'
},
'releases/0.0.2': {
'target': '969e0340155266e2a5b851e428e602152c08385f',
'target_type': 'revision'
},
'releases/0.0.3': {
'target': 'c9b9ae8360ce8a1e22867226987f61163c12d4c4',
'target_type': 'revision'
},
'releases/0.0.4': {
'target': '47831123f42cea24d6023e5570825cb62c3c1898',
'target_type': 'revision'
},
'releases/0.0.5': {
'target': 'a4ffa8770a901c895a67bec7a501036e83aae256',
'target_type': 'revision'
},
'releases/0.1.0': {
'target': 'e6e41e3deb8df2b183f2d45e8f2e49a991c069a9',
'target_type': 'revision'
},
'releases/0.2.0': {
'target': 'ca12202b8a0eee7364204687649146e73e19ed32',
'target_type': 'revision'
}
}
class NpmLoaderTest(NpmLoader):
def parse_config_file(self, *args, **kwargs):
return _LOADER_TESTS_CONFIG
@requests_mock.Mocker()
class TestNpmLoader(unittest.TestCase, BaseLoaderStorageTest):
@classmethod
def setUpClass(cls):
cls.reset_loader()
@classmethod
def reset_loader(cls):
cls.loader = NpmLoaderTest()
cls.storage = cls.loader.storage
def reset_loader_counters(self):
counters_reset = dict.fromkeys(self.loader.counters.keys(), 0)
self.loader.counters.update(counters_reset)
def test_npm_loader_1_empty_package(self, m):
init_test_data(m, package_metadata_file(empty_package),
package_metadata_url(empty_package))
self.loader.load(empty_package, package_url(empty_package),
package_metadata_url(empty_package))
self.assertCountContents(0)
self.assertCountDirectories(0)
self.assertCountRevisions(0)
self.assertCountReleases(0)
self.assertCountSnapshots(1)
expected_branches = {}
self.assertSnapshotEqual(
snapshot_identifier({'branches': expected_branches}),
expected_branches
)
self.assertEqual(self.loader.load_status(), {'status': 'uneventful'})
self.assertEqual(self.loader.visit_status(), 'full')
self.assertFalse(os.path.exists(self.loader.temp_directory))
def test_npm_loader_2_first_visit(self, m):
self.reset_loader()
init_test_data(m, package_metadata_file(package, visit=1),
package_metadata_url(package))
self.loader.load(package, package_url(package),
package_metadata_url(package))
self.assertCountContents(len(_expected_new_contents_first_visit))
self.assertCountDirectories(len(_expected_new_directories_first_visit))
self.assertCountRevisions(3, '3 releases so 3 revisions should be created') # noqa
self.assertCountReleases(0, 'No release is created by the npm loader')
self.assertCountSnapshots(1, 'Only 1 snapshot targeting all revisions')
self.assertContentsContain(_expected_new_contents_first_visit)
self.assertDirectoriesContain(_expected_new_directories_first_visit)
self.assertRevisionsContain(_expected_new_revisions_first_visit)
self.assertSnapshotEqual(_expected_new_snapshot_first_visit,
_expected_branches_first_visit)
self.assertEqual(self.loader.counters['contents'],
len(_expected_new_contents_first_visit))
self.assertEqual(self.loader.counters['directories'],
len(_expected_new_directories_first_visit))
self.assertEqual(self.loader.counters['revisions'],
len(_expected_new_revisions_first_visit))
self.assertEqual(self.loader.counters['releases'], 0)
self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
self.assertEqual(self.loader.visit_status(), 'full')
self.assertFalse(os.path.exists(self.loader.temp_directory))
def test_npm_loader_3_first_visit_again(self, m):
self.reset_loader_counters()
init_test_data(m, package_metadata_file(package, visit=1),
package_metadata_url(package))
self.loader.load(package, package_url(package),
package_metadata_url(package))
# previously loaded objects should still be here
self.assertCountContents(len(_expected_new_contents_first_visit))
self.assertCountDirectories(len(_expected_new_directories_first_visit))
self.assertCountRevisions(len(_expected_new_revisions_first_visit))
self.assertCountReleases(0)
self.assertCountSnapshots(1)
self.assertSnapshotEqual(_expected_new_snapshot_first_visit,
_expected_branches_first_visit)
# no objects should have been loaded in that visit
counters_reset = dict.fromkeys(self.loader.counters.keys(), 0)
self.assertEqual(self.loader.counters, counters_reset)
self.assertEqual(self.loader.load_status(), {'status': 'uneventful'})
self.assertEqual(self.loader.visit_status(), 'full')
self.assertFalse(os.path.exists(self.loader.temp_directory))
def test_npm_loader_4_second_visit(self, m):
self.reset_loader_counters()
init_test_data(m, package_metadata_file(package, visit=2),
package_metadata_url(package))
self.loader.load(package, package_url(package),
package_metadata_url(package))
expected_nb_contents = sum([len(_expected_new_contents_first_visit),
len(_expected_new_contents_second_visit)])
expected_nb_directories = sum([len(_expected_new_directories_first_visit), # noqa
len(_expected_new_directories_second_visit)]) # noqa
expected_nb_revisions = sum([len(_expected_new_revisions_first_visit),
len(_expected_new_revisions_second_visit)]) # noqa
self.assertCountContents(expected_nb_contents)
self.assertCountDirectories(expected_nb_directories)
self.assertCountRevisions(expected_nb_revisions)
self.assertCountReleases(0)
self.assertCountSnapshots(2)
self.assertContentsContain(_expected_new_contents_first_visit)
self.assertContentsContain(_expected_new_contents_second_visit)
self.assertDirectoriesContain(_expected_new_directories_first_visit)
self.assertDirectoriesContain(_expected_new_directories_second_visit)
self.assertRevisionsContain(_expected_new_revisions_first_visit)
self.assertRevisionsContain(_expected_new_revisions_second_visit)
self.assertSnapshotEqual(_expected_new_snapshot_first_visit,
_expected_branches_first_visit)
self.assertSnapshotEqual(_expected_new_snapshot_second_visit,
_expected_branches_second_visit)
self.assertEqual(self.loader.counters['contents'],
len(_expected_new_contents_second_visit))
self.assertEqual(self.loader.counters['directories'],
len(_expected_new_directories_second_visit))
self.assertEqual(self.loader.counters['revisions'],
len(_expected_new_revisions_second_visit))
self.assertEqual(self.loader.counters['releases'], 0)
self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
self.assertEqual(self.loader.visit_status(), 'full')
self.assertFalse(os.path.exists(self.loader.temp_directory))
def test_npm_loader_5_package_json_non_unicode_encoding(self, m):
init_test_data(m, package_metadata_file(package_non_utf8_encoding),
package_metadata_url(package_non_utf8_encoding))
self.loader.load(package_non_utf8_encoding,
package_url(package_non_utf8_encoding),
package_metadata_url(package_non_utf8_encoding))
self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
self.assertEqual(self.loader.visit_status(), 'full')
self.assertFalse(os.path.exists(self.loader.temp_directory))
+
+ @patch('swh.loader.npm.client.tarball')
+ def test_npm_loader_6_invalid_tarball(self, m, mock_tarball):
+
+ def _tarball_uncompress(filepath, path):
+ if filepath.endswith('0.0.3.tgz'):
+ raise Exception('Invalid tarball !')
+ else:
+ tarball.uncompress(filepath, path)
+
+ mock_tarball.uncompress.side_effect = _tarball_uncompress
+
+ self.reset_loader()
+ init_test_data(m, package_metadata_file(package, visit=1),
+ package_metadata_url(package))
+ self.loader.load(package, package_url(package),
+ package_metadata_url(package))
+
+ snapshot = self.loader.last_snapshot()
+ for branch, target in snapshot['branches'].items():
+ if branch == b'releases/0.0.3':
+ self.assertTrue(target is None)
+ else:
+ self.assertTrue(target is not None)
+
+ self.assertEqual(self.loader.load_status(), {'status': 'eventful'})
+ self.assertEqual(self.loader.visit_status(), 'partial')
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 3, 10:46 AM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3251691
Attached To
rDLDNPM npm loader
Event Timeline
Log In to Comment