diff --git a/swh/loader/pypi/converters.py b/swh/loader/pypi/converters.py index 69411f9..8f7a4c7 100644 --- a/swh/loader/pypi/converters.py +++ b/swh/loader/pypi/converters.py @@ -1,70 +1,74 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None} + + def info(data): """Given a dict of a PyPI project information, returns a project subset. Args: data (dict): Representing either artifact information or release information. Returns: A dict subset of project information. """ _info = data['info'] default = { 'home_page': _info['home_page'], 'description': _info['description'], 'summary': _info['summary'], 'license': _info['license'], 'package_url': _info['package_url'], 'project_url': _info['project_url'], 'upstream': None, } project_urls = _info.get('project_urls') if project_urls: homepage = project_urls.get('Homepage') if homepage: default['upstream'] = homepage return default def author(data): """Given a dict of project/release artifact information (coming from PyPI), returns an author subset. Args: data (dict): Representing either artifact information or release information. Returns: swh-model dict representing a person. """ - name = data['author'] - email = data['author_email'] + name = data.get('author') + email = data.get('author_email') + if email: fullname = '%s <%s>' % (name, email) else: fullname = name if not fullname: - return {'fullname': b'', 'name': None, 'email': None} + return EMPTY_AUTHOR if fullname: fullname = fullname.encode('utf-8') if name: name = name.encode('utf-8') if email: email = email.encode('utf-8') return {'fullname': fullname, 'name': name, 'email': email} diff --git a/swh/loader/pypi/tests/test_converters.py b/swh/loader/pypi/tests/test_converters.py index cfced07..0e2f804 100644 --- a/swh/loader/pypi/tests/test_converters.py +++ b/swh/loader/pypi/tests/test_converters.py @@ -1,113 +1,130 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest import TestCase from nose.tools import istest -from swh.loader.pypi.converters import author, info +from swh.loader.pypi.converters import author, EMPTY_AUTHOR from .common import WithProjectTest class Test(WithProjectTest): @istest def info(self): actual_info = self.project.info() expected_info = { 'home_page': self.data['info']['home_page'], 'description': self.data['info']['description'], 'summary': self.data['info']['summary'], 'license': self.data['info']['license'], 'package_url': self.data['info']['package_url'], 'project_url': self.data['info']['project_url'], 'upstream': self.data['info']['project_urls']['Homepage'], } self.assertEqual(expected_info, actual_info) @istest def author(self): info = self.data['info'] actual_author = author(info) name = info['author'].encode('utf-8') email = info['author_email'].encode('utf-8') expected_author = { 'fullname': b'%s <%s>' % (name, email), 'name': name, 'email': email, } self.assertEqual(expected_author, actual_author) + @istest + def no_author(self): + actual_author = author({}) + + self.assertEqual(EMPTY_AUTHOR, actual_author) + + @istest + def partial_author(self): + actual_author = author({'author': 'someone'}) + expected_author = { + 'name': b'someone', + 'fullname': b'someone', + 'email': None, + } + + self.assertEqual(expected_author, actual_author) + class ParseAuthorTest(TestCase): @istest def author_basic(self): data = { 'author': "i-am-groot", 'author_email': 'iam@groot.org', } actual_author = author(data) expected_author = { 'fullname': b'i-am-groot ', 'name': b'i-am-groot', 'email': b'iam@groot.org', } self.assertEquals(actual_author, expected_author) @istest def author_malformed(self): data = { 'author': "['pierre', 'paul', 'jacques']", 'author_email': None, } actual_author = author(data) expected_author = { 'fullname': b"['pierre', 'paul', 'jacques']", 'name': b"['pierre', 'paul', 'jacques']", 'email': None, } self.assertEquals(actual_author, expected_author) @istest def author_malformed_2(self): data = { 'author': '[marie, jeanne]', 'author_email': '[marie@some, jeanne@thing]', } actual_author = author(data) expected_author = { 'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>', 'name': b'[marie, jeanne]', 'email': b'[marie@some, jeanne@thing]', } self.assertEquals(actual_author, expected_author) @istest def author_malformed_3(self): data = { 'author': '[marie, jeanne, pierre]', 'author_email': '[marie@somewhere.org, jeanne@somewhere.org]', } actual_author = author(data) expected_author = { 'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa 'name': b'[marie, jeanne, pierre]', 'email': b'[marie@somewhere.org, jeanne@somewhere.org]', } self.assertEquals(actual_author, expected_author)