diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py index 2a48bfe..c1e5a01 100644 --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -1,267 +1,269 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import logging import os from codecs import BOM_UTF8 from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional import chardet import iso8601 from urllib.parse import quote from swh.model.identifiers import normalize_timestamp from swh.loader.package.loader import PackageLoader from swh.loader.package.utils import ( api_info, release_name, parse_author, swh_author ) logger = logging.getLogger(__name__) class NpmLoader(PackageLoader): """Load npm origin's artifact releases into swh archive. """ visit_type = 'npm' def __init__(self, url: str): """Constructor Args str: origin url (e.g. https://www.npmjs.com/package/) """ super().__init__(url=url) package_name = url.split('https://www.npmjs.com/package/')[1] safe_name = quote(package_name, safe='') self.provider_url = f'https://replicate.npmjs.com/{safe_name}/' self._info: Dict[str, Any] = {} self._versions = None @property def info(self) -> Dict[str, Any]: """Return the project metadata information (fetched from npm registry) """ if not self._info: self._info = api_info(self.provider_url) return self._info def get_versions(self) -> Sequence[str]: return sorted(list(self.info['versions'].keys())) def get_default_version(self) -> str: return self.info['dist-tags'].get('latest', '') def get_package_info(self, version: str) -> Generator[ Tuple[str, Mapping[str, Any]], None, None]: meta = self.info['versions'][version] url = meta['dist']['tarball'] p_info = { 'url': url, 'filename': os.path.basename(url), 'raw': meta, } yield release_name(version), p_info def resolve_revision_from( self, known_artifacts: Dict, artifact_metadata: Dict) \ -> Optional[bytes]: return artifact_to_revision_id(known_artifacts, artifact_metadata) def build_revision( self, a_metadata: Dict, uncompressed_path: str) -> Dict: i_metadata = extract_intrinsic_metadata(uncompressed_path) + if not i_metadata: + return {} # from intrinsic metadata author = extract_npm_package_author(i_metadata) message = i_metadata['version'].encode('ascii') # from extrinsic metadata # No date available in intrinsic metadata: retrieve it from the API # metadata, using the version number that the API claims this package # has. extrinsic_version = a_metadata['version'] date = self.info['time'][extrinsic_version] date = iso8601.parse_date(date) date = normalize_timestamp(int(date.timestamp())) return { 'type': 'tar', 'message': message, 'author': author, 'date': date, 'committer': author, 'committer_date': date, 'parents': [], 'metadata': { 'intrinsic': { 'tool': 'package.json', 'raw': i_metadata, }, 'extrinsic': { 'provider': self.provider_url, 'when': self.visit_date.isoformat(), 'raw': a_metadata, }, }, } def artifact_to_revision_id( known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]: """Given metadata artifact, solves the associated revision id. The following code allows to deal with 2 metadata formats: - old format sample:: { 'package_source': { 'sha1': '05181c12cd8c22035dd31155656826b85745da37', } } - new format sample:: { 'original_artifact': [{ 'checksums': { 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa ... }, }], ... } """ shasum = artifact_metadata['dist']['shasum'] for rev_id, known_artifact in known_artifacts.items(): known_original_artifact = known_artifact.get('original_artifact') if not known_original_artifact: # previous loader-npm version kept original artifact elsewhere known_original_artifact = known_artifact.get('package_source') if not known_original_artifact: continue original_hash = known_original_artifact['sha1'] else: assert isinstance(known_original_artifact, list) original_hash = known_original_artifact[0]['checksums']['sha1'] if shasum == original_hash: return rev_id return None def extract_npm_package_author(package_json): """ Extract package author from a ``package.json`` file content and return it in swh format. Args: package_json (dict): Dict holding the content of parsed ``package.json`` file Returns: dict: A dict with the following keys: * fullname * name * email """ def _author_str(author_data): if type(author_data) is dict: author_str = '' if 'name' in author_data: author_str += author_data['name'] if 'email' in author_data: author_str += ' <%s>' % author_data['email'] return author_str elif type(author_data) is list: return _author_str(author_data[0]) if len(author_data) > 0 else '' else: return author_data author_data = {} for author_key in ('author', 'authors'): if author_key in package_json: author_str = _author_str(package_json[author_key]) author_data = parse_author(author_str) return swh_author(author_data) def _lstrip_bom(s, bom=BOM_UTF8): if s.startswith(bom): return s[len(bom):] else: return s def load_json(json_bytes): """ Try to load JSON from bytes and return a dictionary. First try to decode from utf-8. If the decoding failed, try to detect the encoding and decode again with replace error handling. If JSON is malformed, an empty dictionary will be returned. Args: json_bytes (bytes): binary content of a JSON file Returns: dict: JSON data loaded in a dictionary """ json_data = {} try: json_str = _lstrip_bom(json_bytes).decode('utf-8') except UnicodeDecodeError: encoding = chardet.detect(json_bytes)['encoding'] if encoding: json_str = json_bytes.decode(encoding, 'replace') try: json_data = json.loads(json_str) except json.decoder.JSONDecodeError: pass return json_data def extract_intrinsic_metadata(dir_path: str) -> Dict: """Given an uncompressed path holding the pkginfo file, returns a pkginfo parsed structure as a dict. The release artifact contains at their root one folder. For example: $ tar tvf zprint-0.0.6.tar.gz drwxr-xr-x root/root 0 2018-08-22 11:01 zprint-0.0.6/ ... Args: dir_path (str): Path to the uncompressed directory representing a release artifact from npm. Returns: the pkginfo parsed structure as a dict if any or None if none was present. """ # Retrieve the root folder of the archive if not os.path.exists(dir_path): return {} lst = os.listdir(dir_path) if len(lst) == 0: return {} project_dirname = lst[0] package_json_path = os.path.join(dir_path, project_dirname, 'package.json') if not os.path.exists(package_json_path): return {} with open(package_json_path, 'rb') as package_json_file: package_json_bytes = package_json_file.read() return load_json(package_json_bytes) diff --git a/swh/loader/package/npm/tests/data/https_registry.npmjs.org/nativescript-telerik-analytics_-_nativescript-telerik-analytics-1.0.0.tgz b/swh/loader/package/npm/tests/data/https_registry.npmjs.org/nativescript-telerik-analytics_-_nativescript-telerik-analytics-1.0.0.tgz new file mode 100644 index 0000000..9a638c7 Binary files /dev/null and b/swh/loader/package/npm/tests/data/https_registry.npmjs.org/nativescript-telerik-analytics_-_nativescript-telerik-analytics-1.0.0.tgz differ diff --git a/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/nativescript-telerik-analytics b/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/nativescript-telerik-analytics new file mode 100644 index 0000000..da38627 --- /dev/null +++ b/swh/loader/package/npm/tests/data/https_replicate.npmjs.com/nativescript-telerik-analytics @@ -0,0 +1,87 @@ +{ + "_id": "nativescript-telerik-analytics", + "_rev": "1-f9b1d737db8a1828db1da3d2deb8cd07", + "name": "nativescript-telerik-analytics", + "description": "Telerik Analytics NativeScript SDK", + "dist-tags": { + "latest": "1.0.8" + }, + "versions": { + "1.0.0": { + "name": "nativescript-telerik-analytics", + "version": "1.0.0", + "description": "Telerik Analytics NativeScript SDK", + "main": "./nativescript/index.js", + "keywords": [ + "analytics", + "telerik", + "nativescript", + "plugin", + "sdk" + ], + "author": { + "name": "Telerik" + }, + "license": "See LICENSE file", + "nativescript": { + "platforms": { + "ios": "1.0.0", + "android": "1.1.0" + } + }, + "files": [ + "common", + "nativescript", + "platforms" + ], + "_id": "nativescript-telerik-analytics@1.0.0", + "_shasum": "7589d5254a84aee46c33f56f21845a983139f801", + "_resolved": "file:upload\\nativescript-telerik-analytics-1.0.0.tgz", + "_from": "upload\\nativescript-telerik-analytics-1.0.0.tgz", + "scripts": null, + "_npmVersion": "2.14.9", + "_nodeVersion": "0.12.9", + "_npmUser": { + "name": "telerik.analytics", + "email": "analytics@telerik.com" + }, + "dist": { + "shasum": "7589d5254a84aee46c33f56f21845a983139f801", + "tarball": "https://registry.npmjs.org/nativescript-telerik-analytics/-/nativescript-telerik-analytics-1.0.0.tgz" + }, + "maintainers": [ + { + "name": "telerik.analytics", + "email": "analytics@telerik.com" + } + ] + } + }, + "readme": "# Telerik Analytics Plugin for NativeScript\n\n- [Getting started](#getting-started)\n- [API Reference](#api)\n- [Troubleshooting](#troubleshooting)\n\n## Getting started\n\n1. Obtain an AppId\n\n [Create a new application](http://docs.telerik.com/platform/help/workspaces/workspace-management/create-workspace) in Telerik Platform by choosing the *Native* application type.\n\n2. Enable Analytics\n\n Select Analytics from the left navigation menu and click *Enable Analytics*.\n\n3. Create a new NativeScript application\n\n tns create MyApp\n\n or use an existing one.\n\n4. Add the Analytics plugin (from npm). This will install the nativescript-telerik-analytics plugin in node_modules in the root of the project. When adding a new platform (or using an existing one) the plugin will be added there as well. Go to the application folder and add the Analytics plugin:\n\n tns plugin add nativescript-telerik-analytics\n\n5. Go to the application folder and add the Android (or iOS) platform to the application:\n\n tns platform add android\n\n6. Initialize the plugin and start a new session in the `onLaunch` event (app.js):\n\n ```javascript\n var application = require('application');\n application.mainModule = 'main-page';\n application.cssFile = './app.css';\n\n application.on(application.launchEvent, function(context) {\n var Analytics = require('nativescript-telerik-analytics');\n Analytics.init({ appId: 'oamq6lixk0rak4dl' });\n Analytics.start();\n });\n\n application.start();\n ```\n\n7. Track some events in your application:\n\n ```javascript\n var Analytics = require('nativescript-telerik-analytics'),\n timer = require('timer');\n\n Analytics.trackEvent('MyCategory.MyEvent');\n\n Analytics.trackValue('myvalue', 245);\n\n var timingScope = Analytics.trackTimingStart('mytiming');\n timer.setTimeout(function() {\n timingScope.stop(); // or timingScope.cancel(); if you want to ignore the timing\n }, 3500);\n\n Analytics.trackTimingRaw('myrawtiming', 1300); // track timing of 1300 ms\n\n try {\n throw new Error('error message');\n } catch (e) {\n Analytics.trackException(e, 'some error context');\n }\n ```\n\n8. Attach your phone to the PC, ensure `adb devices` command lists it and run the app on the phone:\n\n tns run android\n\n## API\n\nTo use the Analytics plugin you need to require the `nativescript-telerik-analytics` module:\n\n```javascript\nvar Analytics = require('nativescript-telerik-analytics');\n```\n\nAnd then call any of the available methods on it:\n\n- ***init(settings)*** - used to initialize the plugin with different configuration options. This method must be called before starting a new session or tracking events. It is the first method that needs to be called.\n\n ```javascript\n var settings = {\n appId: 'oamq6lixk0rak4dl', // Required identifier of the application obtained in Telerik Platform\n productVersion: '1.2.3.4', // Optional - the version of the monitored application\n location: { // optionally associate some geo location coordinates with the user \n latitude: 40.719618,\n longitude: -74.010282\n },\n clientIP: '193.42.34.123', // optionally override the IP of the user\n isInternalData: false, // Optional flag allowing to enable test mode for this session. This will mark all events tracked in this particular session as \"Internal\"\n autoTrackUnhandledExceptions: false, // Optionally turn off automatic exception handling. The default value is true. The plugin subscribes to the \"application.uncaughtErrorEvent\" and automatically tracks the exception\n logger: new MyLogger() // Optionally specify a custom logger. This should be an instance of a class with info(message, obj) and error(message, obj) functions.\n };\n Analytics.init(settings);\n ```\n\n- ***start()*** - starts a new Analytics session. The SDK needs to be initialized with the init method prior to calling this method. \n\n ```javascript\n Analytics.start();\n ```\n\n- ***trackEvent(name)*** - registers a feature usage. It is recommended that related features are grouped by using simple dot-notation in the name such as e.g. relating print to pdf and print to file by naming the features \"print.pdf\" and \"print.file\" respectively \n\n ```javascript\n Analytics.trackEvent('Printing.PDF');\n ```\n\n- ***trackValue(name, value)*** - register a value on a specific feature. While calls to `trackEvent` increments the use of a feature in the session a call to this methods will associate a given value with a named feature. Use this method to e.g. track the distribution of file sizes imported or the number of results registered. Tracking this distribution across all your application usage will give insights to what scenarios your applications are handling. The value parameter must be a valid integer. \n\n ```javascript\n Analytics.trackValue('FilesProcessed', 152);\n ```\n\n- ***trackException(e, context)*** - Call to track an exception that occurred in the application. An optional context string can be associated with the exception. \n\n ```javascript\n try {\n throw new Error('error message');\n } catch (e) {\n Analytics.trackException(e, 'some optional context');\n }\n ```\n\n- ***trackTimingStart(name)*** - Starts a named timer for measuring elapsed time on operation and returns a scope that can be used to stop or cancel the timing operation. \n\n ```javascript\n var timer = require('timer'),\n timingScope = Analytics.trackTimingStart('MyTiming');\n timer.setTimeout(function() {\n timingScope.stop(); // at this stage the timer will be stopped and the elapsed time submitted to Analytics in milliseconds. You can abort the timing operation by calling timingScope.cancel(); \n }, 1450);\n ```\n\n- ***trackTimingRaw(name, durationInMilliseconds)*** - Registers elapsed time measured by some other means. \n\n ```javascript\n Analytics.trackTimingRaw('MyTiming', 563);\n ```\n\n\n## Troubleshooting\n\nIn case the application doesn't work as expected, here are some things you can verify:\n\n- For Android ensure that the AndroindManifest.xml located at `platforms\\android` contains the following permission:\n\n ```xml\n \n ```\n\n- Enable logging to see if there are some information or error messages logged. You could enable logging by writing the following module (`mylogger.js`):\n\n ```javascript\n (function(global) {\n var MyLogger = function() {\n };\n\n exports = module.exports = MyLogger;\n\n MyLogger.prototype.info = function(message, obj) {\n console.log('INFO: ' + message + (obj ? ' : ' + JSON.stringify(obj) : ''));\n };\n\n MyLogger.prototype.error = function(message, obj) {\n if (obj instanceof Error) {\n console.log('ERROR: ' + message + (obj ? ' : ' + obj.message : ''));\n } else {\n console.log('ERROR: ' + message + (obj ? ' : ' + JSON.stringify(obj) : ''));\n }\n };\n }(this || global));\n ```\n\n and then set this logger when initializing the plugin:\n\n ```javascript\n var Analytics = require('nativescript-telerik-analytics'),\n MyLogger = require('./mylogger');\n\n Analytics.init({\n appId : 'oamq6lixk0rak4dl',\n logger: new MyLogger()\n });\n ```", + "maintainers": [ + { + "name": "telerik.analytics", + "email": "analytics@telerik.com" + } + ], + "time": { + "modified": "2017-10-11T03:57:58.291Z", + "created": "2016-01-21T13:22:05.605Z", + "1.0.0": "2016-01-21T13:22:05.605Z" + }, + "keywords": [ + "analytics", + "telerik", + "nativescript", + "plugin", + "sdk" + ], + "author": { + "name": "Telerik" + }, + "license": "See LICENSE file", + "readmeFilename": "README.md", + "users": { + "wenhsiaoyi": true + } +} diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py index dc5438f..d11ffa4 100644 --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -1,511 +1,535 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import pytest from swh.model.hashutil import hash_to_bytes from swh.loader.package.npm.loader import ( NpmLoader, extract_npm_package_author, artifact_to_revision_id ) from swh.loader.package.tests.common import ( check_snapshot, check_metadata_paths, get_stats ) def test_extract_npm_package_author(datadir): package_metadata_filepath = os.path.join( datadir, 'https_replicate.npmjs.com', 'org_visit1') with open(package_metadata_filepath) as json_file: package_metadata = json.load(json_file) extract_npm_package_author(package_metadata['versions']['0.0.2']) == \ { 'fullname': b'mooz ', 'name': b'mooz', 'email': b'stillpedant@gmail.com' } assert ( extract_npm_package_author(package_metadata['versions']['0.0.3']) == { 'fullname': b'Masafumi Oyamada ', 'name': b'Masafumi Oyamada', 'email': b'stillpedant@gmail.com' } ) package_json = json.loads(''' { "name": "highlightjs-line-numbers.js", "version": "2.7.0", "description": "Highlight.js line numbers plugin.", "main": "src/highlightjs-line-numbers.js", "dependencies": {}, "devDependencies": { "gulp": "^4.0.0", "gulp-rename": "^1.4.0", "gulp-replace": "^0.6.1", "gulp-uglify": "^1.2.0" }, "repository": { "type": "git", "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git" }, "author": "Yauheni Pakala ", "license": "MIT", "bugs": { "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues" }, "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/" }''') # noqa assert extract_npm_package_author(package_json) == \ { 'fullname': b'Yauheni Pakala ', 'name': b'Yauheni Pakala', 'email': b'evgeniy.pakalo@gmail.com' } package_json = json.loads(''' { "name": "3-way-diff", "version": "0.0.1", "description": "3-way diffing of JavaScript objects", "main": "index.js", "authors": [ { "name": "Shawn Walsh", "url": "https://github.com/shawnpwalsh" }, { "name": "Markham F Rollins IV", "url": "https://github.com/mrollinsiv" } ], "keywords": [ "3-way diff", "3 way diff", "three-way diff", "three way diff" ], "devDependencies": { "babel-core": "^6.20.0", "babel-preset-es2015": "^6.18.0", "mocha": "^3.0.2" }, "dependencies": { "lodash": "^4.15.0" } }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'Shawn Walsh', 'name': b'Shawn Walsh', 'email': None } package_json = json.loads(''' { "name": "yfe-ynpm", "version": "1.0.0", "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm", "repository": { "type": "git", "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git" }, "author": [ "fengmk2 (https://fengmk2.com)", "xufuzi (https://7993.org)" ], "license": "MIT" }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'fengmk2 ', 'name': b'fengmk2', 'email': b'fengmk2@gmail.com' } package_json = json.loads(''' { "name": "umi-plugin-whale", "version": "0.0.8", "description": "Internal contract component", "authors": { "name": "xiaohuoni", "email": "448627663@qq.com" }, "repository": "alitajs/whale", "devDependencies": { "np": "^3.0.4", "umi-tools": "*" }, "license": "MIT" }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'xiaohuoni <448627663@qq.com>', 'name': b'xiaohuoni', 'email': b'448627663@qq.com' } def normalize_hashes(hashes): if isinstance(hashes, str): return hash_to_bytes(hashes) if isinstance(hashes, list): return [hash_to_bytes(x) for x in hashes] return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()} _expected_new_contents_first_visit = normalize_hashes([ '4ce3058e16ab3d7e077f65aabf855c34895bf17c', '858c3ceee84c8311adc808f8cdb30d233ddc9d18', '0fa33b4f5a4e0496da6843a38ff1af8b61541996', '85a410f8ef8eb8920f2c384a9555566ad4a2e21b', '9163ac8025923d5a45aaac482262893955c9b37b', '692cf623b8dd2c5df2c2998fd95ae4ec99882fb4', '18c03aac6d3e910efb20039c15d70ab5e0297101', '41265c42446aac17ca769e67d1704f99e5a1394d', '783ff33f5882813dca9239452c4a7cadd4dba778', 'b029cfb85107aee4590c2434a3329bfcf36f8fa1', '112d1900b4c2e3e9351050d1b542c9744f9793f3', '5439bbc4bd9a996f1a38244e6892b71850bc98fd', 'd83097a2f994b503185adf4e719d154123150159', 'd0939b4898e83090ee55fd9d8a60e312cfadfbaf', 'b3523a26f7147e4af40d9d462adaae6d49eda13e', 'cd065fb435d6fb204a8871bcd623d0d0e673088c', '2854a40855ad839a54f4b08f5cff0cf52fca4399', 'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe', '0f73d56e1cf480bded8a1ecf20ec6fc53c574713', '0d9882b2dfafdce31f4e77fe307d41a44a74cefe', '585fc5caab9ead178a327d3660d35851db713df1', 'e8cd41a48d79101977e3036a87aeb1aac730686f', '5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7', '9c3cc2763bf9e9e37067d3607302c4776502df98', '3649a68410e354c83cd4a38b66bd314de4c8f5c9', 'e96ed0c091de1ebdf587104eaf63400d1974a1fe', '078ca03d2f99e4e6eab16f7b75fbb7afb699c86c', '38de737da99514de6559ff163c988198bc91367a', ]) _expected_new_directories_first_visit = normalize_hashes([ '3370d20d6f96dc1c9e50f083e2134881db110f4f', '42753c0c2ab00c4501b552ac4671c68f3cf5aece', 'd7895533ef5edbcffdea3f057d9fef3a1ef845ce', '80579be563e2ef3e385226fe7a3f079b377f142c', '3b0ddc6a9e58b4b53c222da4e27b280b6cda591c', 'bcad03ce58ac136f26f000990fc9064e559fe1c0', '5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca', 'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd', '584b5b4b6cf7f038095e820b99386a9c232de931', '184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a', 'bb5f4ee143c970367eb409f2e4c1104898048b9d', '1b95491047add1103db0dfdfa84a9735dcb11e88', 'a00c6de13471a2d66e64aca140ddb21ef5521e62', '5ce6c1cd5cda2d546db513aaad8c72a44c7771e2', 'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2', '202fafcd7c0f8230e89d5496ad7f44ab12b807bf', '775cc516543be86c15c1dc172f49c0d4e6e78235', 'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e', ]) _expected_new_revisions_first_visit = normalize_hashes({ 'd8a1c7474d2956ac598a19f0f27d52f7015f117e': '42753c0c2ab00c4501b552ac4671c68f3cf5aece', '5f9eb78af37ffd12949f235e86fac04898f9f72a': '3370d20d6f96dc1c9e50f083e2134881db110f4f', 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a': 'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'} ) def package_url(package): return 'https://www.npmjs.com/package/%s' % package def package_metadata_url(package): return 'https://replicate.npmjs.com/%s/' % package def test_revision_metadata_structure(swh_config, requests_mock_datadir): package = 'org' loader = NpmLoader(package_url(package)) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' assert actual_load_status['snapshot_id'] is not None expected_revision_id = hash_to_bytes( 'd8a1c7474d2956ac598a19f0f27d52f7015f117e') revision = list(loader.storage.revision_get([expected_revision_id]))[0] assert revision is not None check_metadata_paths(revision['metadata'], paths=[ ('intrinsic.tool', str), ('intrinsic.raw', dict), ('extrinsic.provider', str), ('extrinsic.when', str), ('extrinsic.raw', dict), ('original_artifact', list), ]) for original_artifact in revision['metadata']['original_artifact']: check_metadata_paths(original_artifact, paths=[ ('filename', str), ('length', int), ('checksums', dict), ]) def test_npm_loader_first_visit(swh_config, requests_mock_datadir): package = 'org' loader = NpmLoader(package_url(package)) actual_load_status = loader.load() expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d' assert actual_load_status == { 'status': 'eventful', 'snapshot_id': expected_snapshot_id } stats = get_stats(loader.storage) assert { 'content': len(_expected_new_contents_first_visit), 'directory': len(_expected_new_directories_first_visit), 'origin': 1, 'origin_visit': 1, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit), 'skipped_content': 0, 'snapshot': 1, } == stats assert len(list(loader.storage.content_get( _expected_new_contents_first_visit))) == len( _expected_new_contents_first_visit) assert list(loader.storage.directory_missing( _expected_new_directories_first_visit)) == [] assert list(loader.storage.revision_missing( _expected_new_revisions_first_visit)) == [] expected_snapshot = { 'id': expected_snapshot_id, 'branches': { 'HEAD': { 'target': 'releases/0.0.4', 'target_type': 'alias' }, 'releases/0.0.2': { 'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e', 'target_type': 'revision' }, 'releases/0.0.3': { 'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a', 'target_type': 'revision' }, 'releases/0.0.4': { 'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a', 'target_type': 'revision' } } } check_snapshot(expected_snapshot, loader.storage) def test_npm_loader_incremental_visit( swh_config, requests_mock_datadir_visits): package = 'org' url = package_url(package) loader = NpmLoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' assert actual_load_status['status'] is not None origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'npm' stats = get_stats(loader.storage) assert { 'content': len(_expected_new_contents_first_visit), 'directory': len(_expected_new_directories_first_visit), 'origin': 1, 'origin_visit': 1, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit), 'skipped_content': 0, 'snapshot': 1, } == stats loader._info = None # reset loader internal state actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'eventful' snap_id2 = actual_load_status2['snapshot_id'] assert snap_id2 is not None assert snap_id2 != actual_load_status['snapshot_id'] origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' assert origin_visit2['type'] == 'npm' stats = get_stats(loader.storage) assert { # 3 new releases artifacts 'content': len(_expected_new_contents_first_visit) + 14, 'directory': len(_expected_new_directories_first_visit) + 15, 'origin': 1, 'origin_visit': 2, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit) + 3, 'skipped_content': 0, 'snapshot': 2, } == stats urls = [ m.url for m in requests_mock_datadir_visits.request_history if m.url.startswith('https://registry.npmjs.org') ] assert len(urls) == len(set(urls)) # we visited each artifact once across @pytest.mark.usefixtures('requests_mock_datadir') def test_npm_loader_version_divergence(swh_config): package = '@aller_shared' url = package_url(package) loader = NpmLoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' assert actual_load_status['status'] is not None origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' assert origin_visit['type'] == 'npm' stats = get_stats(loader.storage) assert { # 1 new releases artifacts 'content': 534, 'directory': 153, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 2, 'skipped_content': 0, 'snapshot': 1, } == stats expected_snapshot = { 'id': 'b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92', 'branches': { 'HEAD': { 'target_type': 'alias', 'target': 'releases/0.1.0' }, 'releases/0.1.0': { 'target_type': 'revision', 'target': '845673bfe8cbd31b1eaf757745a964137e6f9116', }, 'releases/0.1.1-alpha.14': { 'target_type': 'revision', 'target': '05181c12cd8c22035dd31155656826b85745da37', }, }, } check_snapshot(expected_snapshot, loader.storage) def test_npm_artifact_to_revision_id_none(): """Current loader version should stop soon if nothing can be found """ artifact_metadata = { 'dist': { 'shasum': '05181c12cd8c22035dd31155656826b85745da37', }, } known_artifacts = { 'b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92': {}, } assert artifact_to_revision_id(known_artifacts, artifact_metadata) is None def test_npm_artifact_to_revision_id_old_loader_version(): """Current loader version should solve old metadata scheme """ artifact_metadata = { 'dist': { 'shasum': '05181c12cd8c22035dd31155656826b85745da37', } } known_artifacts = { hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): { 'package_source': { 'sha1': "something-wrong" } }, hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): { 'package_source': { 'sha1': '05181c12cd8c22035dd31155656826b85745da37', } } } assert artifact_to_revision_id(known_artifacts, artifact_metadata) \ == hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116') def test_npm_artifact_to_revision_id_current_loader_version(): """Current loader version should be able to solve current metadata scheme """ artifact_metadata = { 'dist': { 'shasum': '05181c12cd8c22035dd31155656826b85745da37', } } known_artifacts = { hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): { 'original_artifact': [{ 'checksums': { 'sha1': "05181c12cd8c22035dd31155656826b85745da37" }, }], }, hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): { 'original_artifact': [{ 'checksums': { 'sha1': 'something-wrong' }, }], }, } assert artifact_to_revision_id(known_artifacts, artifact_metadata) \ == hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92') + + +def test_npm_artifact_with_no_intrinsic_metadata( + swh_config, requests_mock_datadir): + """Skip artifact with no intrinsic metadata during ingestion + + """ + package = 'nativescript-telerik-analytics' + url = package_url(package) + loader = NpmLoader(url) + + actual_load_status = loader.load() + assert actual_load_status['status'] == 'eventful' + + # no branch as one artifact without any intrinsic metadata + expected_snapshot = { + 'id': '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e', + 'branches': {}, + } + check_snapshot(expected_snapshot, loader.storage) + + origin_visit = list(loader.storage.origin_visit_get(url))[-1] + assert origin_visit['status'] == 'full' + assert origin_visit['type'] == 'npm'