diff --git a/swh/loader/npm/tests/test_utils.py b/swh/loader/npm/tests/test_utils.py index 3e18a93..b63db1d 100644 --- a/swh/loader/npm/tests/test_utils.py +++ b/swh/loader/npm/tests/test_utils.py @@ -1,268 +1,319 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os import unittest from swh.loader.npm.utils import ( parse_npm_package_author, extract_npm_package_author ) from swh.loader.npm.tests.common import ( RESOURCES_PATH, package, package_metadata_file ) class TestNpmClient(unittest.TestCase): def _parse_author_string_test(self, author_str, expected_result): self.assertEqual( parse_npm_package_author(author_str), expected_result ) self.assertEqual( parse_npm_package_author(' %s' % author_str), expected_result ) self.assertEqual( parse_npm_package_author('%s ' % author_str), expected_result ) def test_parse_npm_package_author(self): self._parse_author_string_test( 'John Doe', { 'name': 'John Doe' } ) self._parse_author_string_test( '', { 'email': 'john.doe@foo.bar' } ) self._parse_author_string_test( '(https://john.doe)', { 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe ', { 'name': 'John Doe', 'email': 'john.doe@foo.bar' } ) self._parse_author_string_test( 'John Doe', { 'name': 'John Doe', 'email': 'john.doe@foo.bar' } ) self._parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe(https://john.doe)', { 'name': 'John Doe', 'url': 'https://john.doe' } ) self._parse_author_string_test( ' (https://john.doe)', { 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test( '(https://john.doe) ', { 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe (https://john.doe) ', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test( 'John Doe(https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) self._parse_author_string_test('', {}) self._parse_author_string_test('<>', {}) self._parse_author_string_test(' <>', {}) self._parse_author_string_test('<>()', {}) self._parse_author_string_test('<> ()', {}) self._parse_author_string_test('()', {}) self._parse_author_string_test(' ()', {}) self._parse_author_string_test( 'John Doe <> ()', { 'name': 'John Doe' } ) self._parse_author_string_test( 'John Doe <>', { 'name': 'John Doe' } ) self._parse_author_string_test( 'John Doe ()', { 'name': 'John Doe' } ) def test_extract_npm_package_author(self): package_metadata_filepath = os.path.join( RESOURCES_PATH, package_metadata_file(package, visit=2)) with open(package_metadata_filepath) as json_file: package_metadata = json.load(json_file) self.assertEqual( extract_npm_package_author(package_metadata['versions']['0.0.2']), { 'fullname': b'mooz ', 'name': b'mooz', 'email': b'stillpedant@gmail.com' } ) self.assertEqual( extract_npm_package_author(package_metadata['versions']['0.0.3']), { 'fullname': b'Masafumi Oyamada ', 'name': b'Masafumi Oyamada', 'email': b'stillpedant@gmail.com' } ) package_json = json.loads(''' { "name": "highlightjs-line-numbers.js", "version": "2.7.0", "description": "Highlight.js line numbers plugin.", "main": "src/highlightjs-line-numbers.js", "dependencies": {}, "devDependencies": { "gulp": "^4.0.0", "gulp-rename": "^1.4.0", "gulp-replace": "^0.6.1", "gulp-uglify": "^1.2.0" }, "repository": { "type": "git", "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git" }, "author": "Yauheni Pakala ", "license": "MIT", "bugs": { "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues" }, "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/" }''') # noqa self.assertEqual( extract_npm_package_author(package_json), { 'fullname': b'Yauheni Pakala ', 'name': b'Yauheni Pakala', 'email': b'evgeniy.pakalo@gmail.com' } ) package_json = json.loads(''' { "name": "3-way-diff", "version": "0.0.1", "description": "3-way diffing of JavaScript objects", "main": "index.js", "authors": [ { "name": "Shawn Walsh", "url": "https://github.com/shawnpwalsh" }, { "name": "Markham F Rollins IV", "url": "https://github.com/mrollinsiv" } ], "keywords": [ "3-way diff", "3 way diff", "three-way diff", "three way diff" ], "devDependencies": { "babel-core": "^6.20.0", "babel-preset-es2015": "^6.18.0", "mocha": "^3.0.2" }, "dependencies": { "lodash": "^4.15.0" } }''') self.assertEqual( extract_npm_package_author(package_json), { 'fullname': b'Shawn Walsh', 'name': b'Shawn Walsh', 'email': None } ) + + package_json = json.loads(''' + { + "name": "yfe-ynpm", + "version": "1.0.0", + "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm", + "repository": { + "type": "git", + "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git" + }, + "author": [ + "fengmk2 (https://fengmk2.com)", + "xufuzi (https://7993.org)" + ], + "license": "MIT" + }''') + + self.assertEqual( + extract_npm_package_author(package_json), + { + 'fullname': b'fengmk2 ', + 'name': b'fengmk2', + 'email': b'fengmk2@gmail.com' + } + ) + + package_json = json.loads(''' + { + "name": "umi-plugin-whale", + "version": "0.0.8", + "description": "Internal contract component", + "authors": { + "name": "xiaohuoni", + "email": "448627663@qq.com" + }, + "repository": "alitajs/whale", + "devDependencies": { + "np": "^3.0.4", + "umi-tools": "*" + }, + "license": "MIT" + }''') + + self.assertEqual( + extract_npm_package_author(package_json), + { + 'fullname': b'xiaohuoni <448627663@qq.com>', + 'name': b'xiaohuoni', + 'email': b'448627663@qq.com' + } + ) diff --git a/swh/loader/npm/utils.py b/swh/loader/npm/utils.py index 0bd09a6..04d474c 100644 --- a/swh/loader/npm/utils.py +++ b/swh/loader/npm/utils.py @@ -1,122 +1,122 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import re _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None} # https://github.com/jonschlinkert/author-regex _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)' def parse_npm_package_author(author_str): """ Parse npm package author string. It works with a flexible range of formats, as detailed below:: name name (url) name (url) name (url) name(url) name (url) name (url) name(url) name(url) name (url) name(url) name name (url) (url) (url) (url) (url) Args: author_str (str): input author string Returns: dict: A dict that may contain the following keys: * name * email * url """ author = {} matches = re.findall(_author_regexp, author_str.replace('<>', '').replace('()', ''), re.M) for match in matches: if match[0].strip(): author['name'] = match[0].strip() if match[1].strip(): author['email'] = match[1].strip() if match[2].strip(): author['url'] = match[2].strip() return author def extract_npm_package_author(package_json): """ Extract package author from a ``package.json`` file content and return it in swh format. Args: package_json (dict): Dict holding the content of parsed ``package.json`` file Returns: dict: A dict with the following keys: * fullname * name * email """ def _author_str(author_data): if type(author_data) is dict: author_str = '' if 'name' in author_data: author_str += author_data['name'] if 'email' in author_data: author_str += ' <%s>' % author_data['email'] return author_str + elif type(author_data) is list: + return _author_str(author_data[0]) if len(author_data) > 0 else '' else: return author_data author_data = {} - if 'author' in package_json: - author_str = _author_str(package_json['author']) - author_data = parse_npm_package_author(author_str) - elif 'authors' in package_json and len(package_json['authors']) > 0: - author_str = _author_str(package_json['authors'][0]) - author_data = parse_npm_package_author(author_str) + for author_key in ('author', 'authors'): + if author_key in package_json: + author_str = _author_str(package_json[author_key]) + author_data = parse_npm_package_author(author_str) name = author_data.get('name') email = author_data.get('email') fullname = None if name and email: fullname = '%s <%s>' % (name, email) elif name: fullname = name if not fullname: return _EMPTY_AUTHOR if fullname: fullname = fullname.encode('utf-8') if name: name = name.encode('utf-8') if email: email = email.encode('utf-8') return {'fullname': fullname, 'name': name, 'email': email}