diff --git a/conftest.py b/conftest.py
index 21d6ebd..3375fe2 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,66 +1,67 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import pytest
 import yaml
 
 from typing import Any, Dict
 
 from swh.storage.tests.conftest import * # noqa
 from swh.scheduler.tests.conftest import *  # noqa
 
 
 @pytest.fixture
 def swh_loader_config(swh_storage_postgresql) -> Dict[str, Any]:
     return {
         'storage': {
             'cls': 'local',
             'args': {
                 'db': swh_storage_postgresql.dsn,
                 'objstorage': {
                     'cls': 'memory',
                     'args': {}
                 },
             },
         },
         'deposit': {
             'url': 'https://deposit.softwareheritage.org/1/private',
             'auth': {
                 'username': 'user',
                 'password': 'pass',
             }
         },
     }
 
 
 @pytest.fixture
 def swh_config(swh_loader_config, monkeypatch, tmp_path):
     conffile = os.path.join(str(tmp_path), 'loader.yml')
     with open(conffile, 'w') as f:
         f.write(yaml.dump(swh_loader_config))
     monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile)
     return conffile
 
 
 @pytest.fixture(autouse=True, scope='session')
 def swh_proxy():
     """Automatically inject this fixture in all tests to ensure no outside
        connection takes place.
 
     """
     os.environ['http_proxy'] = 'http://localhost:999'
     os.environ['https_proxy'] = 'http://localhost:999'
 
 
 @pytest.fixture(scope='session')  # type: ignore  # expected redefinition
 def celery_includes():
     return [
         'swh.loader.package.archive.tasks',
+        'swh.loader.package.cran.tasks',
         'swh.loader.package.debian.tasks',
         'swh.loader.package.deposit.tasks',
         'swh.loader.package.npm.tasks',
         'swh.loader.package.pypi.tasks',
     ]
diff --git a/requirements.txt b/requirements.txt
index 35eb9f7..ade368b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,11 @@
 # Add here external Python modules dependencies, one per line. Module names
 # should match https://pypi.python.org/pypi names. For the full spec or
 # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
 vcversioner
 retrying
 psutil
 requests
 iso8601
 pkginfo
 python-debian
+python-dateutil
diff --git a/setup.py b/setup.py
index 2ad98e6..d491c79 100755
--- a/setup.py
+++ b/setup.py
@@ -1,75 +1,76 @@
 #!/usr/bin/env python3
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from setuptools import setup, find_packages
 
 from os import path
 from io import open
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, 'README.md'), encoding='utf-8') as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = 'requirements-%s.txt' % name
     else:
         reqf = 'requirements.txt'
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
     name='swh.loader.core',
     description='Software Heritage Base Loader',
     long_description=long_description,
     long_description_content_type='text/markdown',
     author='Software Heritage developers',
     author_email='swh-devel@inria.fr',
     url='https://forge.softwareheritage.org/diffusion/DLDBASE',
     packages=find_packages(),  # packages's modules
     scripts=[],   # scripts to package
     install_requires=parse_requirements() + parse_requirements('swh'),
     setup_requires=['vcversioner'],
     extras_require={'testing': parse_requirements('test')},
     vcversioner={},
     include_package_data=True,
     entry_points='''
         [swh.workers]
         loader.archive=swh.loader.package.archive:register
+        loader.cran=swh.loader.package.cran:register
         loader.debian=swh.loader.package.debian:register
         loader.deposit=swh.loader.package.deposit:register
         loader.npm=swh.loader.package.npm:register
         loader.pypi=swh.loader.package.pypi:register
         [swh.cli.subcommands]
         loader=swh.loader.cli:loader
     ''',
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 5 - Production/Stable",
     ],
     project_urls={
         'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
         'Funding': 'https://www.softwareheritage.org/donate',
         'Source': 'https://forge.softwareheritage.org/source/swh-loader-core',
     },
 )
diff --git a/swh/loader/package/cran/__init__.py b/swh/loader/package/cran/__init__.py
new file mode 100644
index 0000000..cb8bd97
--- /dev/null
+++ b/swh/loader/package/cran/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+from typing import Any, Mapping
+
+
+def register() -> Mapping[str, Any]:
+    """Register the current worker module's definition"""
+    from .loader import CRANLoader
+    return {
+        'task_modules': [f'{__name__}.tasks'],
+        'loader': CRANLoader,
+    }
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
new file mode 100644
index 0000000..1ed24f8
--- /dev/null
+++ b/swh/loader/package/cran/loader.py
@@ -0,0 +1,160 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import dateutil.parser
+import datetime
+import os
+import logging
+import re
+
+from datetime import timezone
+from os import path
+from typing import Any, Generator, Dict, List, Mapping, Optional, Tuple
+
+from debian.deb822 import Deb822
+
+from swh.loader.package.loader import PackageLoader
+from swh.loader.package.utils import release_name, parse_author, swh_author
+from swh.model.identifiers import normalize_timestamp
+
+
+logger = logging.getLogger(__name__)
+
+
+DATE_PATTERN = re.compile(r'^(?P<year>\d{4})-(?P<month>\d{2})$')
+
+
+class CRANLoader(PackageLoader):
+    visit_type = 'cran'
+
+    def __init__(self, url: str, version: str):
+        """Loader constructor.
+
+        Args:
+            url: Origin url to retrieve cran artifact from
+            version: version of the cran artifact
+
+        """
+        super().__init__(url=url)
+        self.version = version
+        self.provider_url = url
+
+    def get_versions(self) -> List[str]:
+        # only 1 artifact
+        return [self.version]
+
+    def get_default_version(self) -> str:
+        return self.version
+
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Dict[str, Any]], None, None]:
+        p_info = {
+            'url': self.url,
+            'filename': path.split(self.url)[-1],
+            'raw': {}
+        }
+        yield release_name(version), p_info
+
+    def build_revision(
+            self, a_metadata: Mapping[str, Any],
+            uncompressed_path: str) -> Dict[str, Any]:
+        # a_metadata is empty
+        metadata = extract_intrinsic_metadata(uncompressed_path)
+        normalized_date = normalize_timestamp(parse_date(metadata.get('Date')))
+        author = swh_author(parse_author(metadata.get('Maintainer', {})))
+        version = metadata.get('Version', self.version)
+        return {
+            'message': version.encode('utf-8'),
+            'type': 'tar',
+            'date': normalized_date,
+            'author': author,
+            'committer': author,
+            'committer_date': normalized_date,
+            'parents': [],
+            'metadata': {
+                'intrinsic': {
+                    'tool': 'DESCRIPTION',
+                    'raw': metadata,
+                },
+                'extrinsic': {
+                    'provider': self.provider_url,
+                    'when': self.visit_date.isoformat(),
+                    'raw': a_metadata,
+                },
+            },
+        }
+
+
+def parse_debian_control(filepath: str) -> Dict[str, Any]:
+    """Parse debian control at filepath"""
+    metadata: Dict = {}
+    logger.debug('Debian control file %s', filepath)
+    for paragraph in Deb822.iter_paragraphs(open(filepath)):
+        logger.debug('paragraph: %s', paragraph)
+        metadata.update(**paragraph)
+
+    logger.debug('metadata parsed: %s', metadata)
+    return metadata
+
+
+def extract_intrinsic_metadata(dir_path: str) -> Dict[str, Any]:
+    """Given an uncompressed path holding the DESCRIPTION file, returns a
+       DESCRIPTION parsed structure as a dict.
+
+    Cran origins describes their intrinsic metadata within a DESCRIPTION file
+    at the root tree of a tarball. This DESCRIPTION uses a simple file format
+    called DCF, the Debian control format.
+
+    The release artifact contains at their root one folder. For example:
+    $ tar tvf zprint-0.0.6.tar.gz
+    drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
+    ...
+
+    Args:
+        dir_path (str): Path to the uncompressed directory
+                        representing a release artifact from pypi.
+
+    Returns:
+        the DESCRIPTION parsed structure as a dict (or empty dict if missing)
+
+    """
+    # Retrieve the root folder of the archive
+    if not os.path.exists(dir_path):
+        return {}
+    lst = os.listdir(dir_path)
+    if len(lst) != 1:
+        return {}
+    project_dirname = lst[0]
+    description_path = os.path.join(dir_path, project_dirname, 'DESCRIPTION')
+    if not os.path.exists(description_path):
+        return {}
+    return parse_debian_control(description_path)
+
+
+def parse_date(date: Optional[str]) -> Optional[datetime.datetime]:
+    """Parse a date into a datetime
+
+    """
+    assert not date or isinstance(date, str)
+    dt: Optional[datetime.datetime] = None
+    if not date:
+        return dt
+    try:
+        specific_date = DATE_PATTERN.match(date)
+        if specific_date:
+            year = int(specific_date.group('year'))
+            month = int(specific_date.group('month'))
+            dt = datetime.datetime(year, month, 1)
+        else:
+            dt = dateutil.parser.parse(date)
+
+        if not dt.tzinfo:
+            # up for discussion the timezone needs to be set or
+            # normalize_timestamp is not happy: ValueError: normalize_timestamp
+            # received datetime without timezone: 2001-06-08 00:00:00
+            dt = dt.replace(tzinfo=timezone.utc)
+    except Exception as e:
+        logger.warning('Fail to parse date %s. Reason: %s', (date, e))
+    return dt
diff --git a/swh/loader/package/cran/tasks.py b/swh/loader/package/cran/tasks.py
new file mode 100644
index 0000000..64ba6e5
--- /dev/null
+++ b/swh/loader/package/cran/tasks.py
@@ -0,0 +1,14 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from celery import shared_task
+
+from swh.loader.package.cran.loader import CRANLoader
+
+
+@shared_task(name=__name__ + '.LoadCran')
+def load_cran(url=None, version=None):
+    """Load archive's artifacts (e.g gnu, etc...)"""
+    return CRANLoader(url, version).load()
diff --git a/swh/loader/package/cran/tests/__init__.py b/swh/loader/package/cran/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/loader/package/cran/tests/data/https_cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz b/swh/loader/package/cran/tests/data/https_cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz
new file mode 100644
index 0000000..52d1037
Binary files /dev/null and b/swh/loader/package/cran/tests/data/https_cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz differ
diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py
new file mode 100644
index 0000000..fabd1c4
--- /dev/null
+++ b/swh/loader/package/cran/tests/test_cran.py
@@ -0,0 +1,198 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import pytest
+
+from datetime import datetime, timezone
+from dateutil.tz import tzlocal
+
+from os import path
+
+from swh.loader.package.cran.loader import (
+    extract_intrinsic_metadata, CRANLoader, parse_date
+)
+from swh.core.tarball import uncompress
+
+from swh.loader.package.tests.common import (
+    check_snapshot, get_stats
+)
+
+
+def test_cran_parse_date():
+    data = [
+        # parsable, some have debatable results though
+        ('2001-June-08',
+         datetime(2001, 6, 8, 0, 0, tzinfo=timezone.utc)),
+        ('Tue Dec 27 15:06:08 PST 2011',
+         datetime(2011, 12, 27, 15, 6, 8, tzinfo=timezone.utc)),
+        ('8-14-2013',
+         datetime(2013, 8, 14, 0, 0, tzinfo=timezone.utc)),
+        ('2011-01',
+         datetime(2011, 1, 1, 0, 0, tzinfo=timezone.utc)),
+        ('201109',
+         datetime(2009, 11, 20, 0, 0, tzinfo=timezone.utc)),
+        ('04-12-2014',
+         datetime(2014, 4, 12, 0, 0, tzinfo=timezone.utc)),
+        ('2018-08-24, 10:40:10',
+         datetime(2018, 8, 24, 10, 40, 10, tzinfo=timezone.utc)),
+        ('2013-October-16',
+         datetime(2013, 10, 16, 0, 0, tzinfo=timezone.utc)),
+        ('Aug 23, 2013',
+         datetime(2013, 8, 23, 0, 0, tzinfo=timezone.utc)),
+        ('27-11-2014',
+         datetime(2014, 11, 27, 0, 0, tzinfo=timezone.utc)),
+        ('2019-09-26,',
+         datetime(2019, 9, 26, 0, 0, tzinfo=timezone.utc)),
+        ('9/25/2014',
+         datetime(2014, 9, 25, 0, 0, tzinfo=timezone.utc)),
+        ('Fri Jun 27 17:23:53 2014',
+         datetime(2014, 6, 27, 17, 23, 53, tzinfo=timezone.utc)),
+        ('28-04-2014',
+         datetime(2014, 4, 28, 0, 0, tzinfo=timezone.utc)),
+        ('04-14-2014',
+         datetime(2014, 4, 14, 0, 0, tzinfo=timezone.utc)),
+        ('2019-05-08 14:17:31 UTC',
+         datetime(2019, 5, 8, 14, 17, 31, tzinfo=timezone.utc)),
+        ('Wed May 21 13:50:39 CEST 2014',
+         datetime(2014, 5, 21, 13, 50, 39, tzinfo=tzlocal())),
+        ('2018-04-10 00:01:04 KST',
+         datetime(2018, 4, 10, 0, 1, 4, tzinfo=timezone.utc)),
+        ('2019-08-25 10:45',
+         datetime(2019, 8, 25, 10, 45, tzinfo=timezone.utc)),
+        ('March 9, 2015',
+         datetime(2015, 3, 9, 0, 0, tzinfo=timezone.utc)),
+        ('Aug. 18, 2012',
+         datetime(2012, 8, 18, 0, 0, tzinfo=timezone.utc)),
+        ('2014-Dec-17',
+         datetime(2014, 12, 17, 0, 0, tzinfo=timezone.utc)),
+        ('March 01, 2013',
+         datetime(2013, 3, 1, 0, 0, tzinfo=timezone.utc)),
+        ('2017-04-08.',
+         datetime(2017, 4, 8, 0, 0, tzinfo=timezone.utc)),
+        ('2014-Apr-22',
+         datetime(2014, 4, 22, 0, 0, tzinfo=timezone.utc)),
+        ('Mon Jan 12 19:54:04 2015',
+         datetime(2015, 1, 12, 19, 54, 4, tzinfo=timezone.utc)),
+        ('May 22, 2014',
+         datetime(2014, 5, 22, 0, 0, tzinfo=timezone.utc)),
+        ('2014-08-12 09:55:10 EDT',
+         datetime(2014, 8, 12, 9, 55, 10, tzinfo=timezone.utc)),
+        # unparsable
+        ('Fabruary 21, 2012', None),
+        ('2019-05-28"', None),
+        ('2017-03-01 today', None),
+        ('2016-11-0110.1093/icesjms/fsw182', None),
+        ('2019-07-010', None),
+        ('2015-02.23', None),
+        ('20013-12-30', None),
+        ('2016-08-017', None),
+        ('2019-02-07l', None),
+        ('2018-05-010', None),
+        ('2019-09-27 KST', None),
+        ('$Date$', None),
+        ('2019-09-27 KST', None),
+        ('2019-06-22 $Date$', None),
+        ('$Date: 2013-01-18 12:49:03 -0600 (Fri, 18 Jan 2013) $', None),
+        ('2015-7-013', None),
+        ('2018-05-023', None),
+        ("Check NEWS file for changes: news(package='simSummary')", None)
+    ]
+    for date, expected_date in data:
+        actual_date = parse_date(date)
+        assert actual_date == expected_date, f'input date to parse {date}'
+
+
+@pytest.mark.fs
+def test_extract_intrinsic_metadata(tmp_path, datadir):
+    """Parsing existing archive's PKG-INFO should yield results"""
+    uncompressed_archive_path = str(tmp_path)
+    # sample url
+    # https://cran.r-project.org/src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz  # noqa
+    archive_path = path.join(
+        datadir, 'https_cran.r-project.org',
+        'src_contrib_1.4.0_Recommended_KernSmooth_2.22-6.tar.gz')
+    uncompress(archive_path, dest=uncompressed_archive_path)
+
+    actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
+
+    expected_metadata = {
+        'Package': 'KernSmooth',
+        'Priority': 'recommended',
+        'Version': '2.22-6',
+        'Date': '2001-June-08',
+        'Title': 'Functions for kernel smoothing for Wand & Jones (1995)',
+        'Author': 'S original by Matt Wand.\n\tR port by  Brian Ripley <ripley@stats.ox.ac.uk>.',  # noqa
+        'Maintainer': 'Brian Ripley <ripley@stats.ox.ac.uk>',
+        'Description': 'functions for kernel smoothing (and density estimation)\n  corresponding to the book: \n  Wand, M.P. and Jones, M.C. (1995) "Kernel Smoothing".',  # noqa
+        'License': 'Unlimited use and distribution (see LICENCE).',
+        'URL': 'http://www.biostat.harvard.edu/~mwand'
+    }
+
+    assert actual_metadata == expected_metadata
+
+
+@pytest.mark.fs
+def test_extract_intrinsic_metadata_failures(tmp_path):
+    """Parsing inexistent path/archive/PKG-INFO yield None"""
+    # inexistent first level path
+    assert extract_intrinsic_metadata('/something-inexistent') == {}
+    # inexistent second level path (as expected by pypi archives)
+    assert extract_intrinsic_metadata(tmp_path) == {}
+    # inexistent PKG-INFO within second level path
+    existing_path_no_pkginfo = str(tmp_path / 'something')
+    os.mkdir(existing_path_no_pkginfo)
+    assert extract_intrinsic_metadata(tmp_path) == {}
+
+
+def test_cran_one_visit(swh_config, requests_mock_datadir):
+    version = '2.22-6'
+    base_url = 'https://cran.r-project.org'
+    url = f'{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz'  # noqa
+    loader = CRANLoader(url, version=version)
+
+    actual_load_status = loader.load()
+
+    expected_snapshot_id = '920adcccc78aaeedd3cfa4459dd900d8c3431a21'
+    assert actual_load_status == {
+        'status': 'eventful',
+        'snapshot_id': expected_snapshot_id
+    }
+
+    expected_snapshot = {
+        'id': expected_snapshot_id,
+        'branches': {
+            'HEAD': {'target': f'releases/{version}', 'target_type': 'alias'},
+            f'releases/{version}': {
+                'target': '42bdb16facd5140424359c8ce89a28ecfa1ce603',
+                'target_type': 'revision'
+            }
+        }
+    }
+    check_snapshot(expected_snapshot, loader.storage)
+
+    origin_visit = next(loader.storage.origin_visit_get(url))
+    assert origin_visit['status'] == 'full'
+    assert origin_visit['type'] == 'cran'
+
+    visit_stats = get_stats(loader.storage)
+    assert {
+        'content': 33,
+        'directory': 7,
+        'origin': 1,
+        'origin_visit': 1,
+        'person': 1,
+        'release': 0,
+        'revision': 1,
+        'skipped_content': 0,
+        'snapshot': 1
+    } == visit_stats
+
+    urls = [
+        m.url for m in requests_mock_datadir.request_history
+        if m.url.startswith(base_url)
+    ]
+    # visited each artifact once across 2 visits
+    assert len(urls) == 1
diff --git a/swh/loader/package/cran/tests/test_tasks.py b/swh/loader/package/cran/tests/test_tasks.py
new file mode 100644
index 0000000..4406375
--- /dev/null
+++ b/swh/loader/package/cran/tests/test_tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def test_cran_loader(mocker, swh_app, celery_session_worker, swh_config):
+    mock_loader = mocker.patch(
+        'swh.loader.package.cran.loader.CRANLoader.load')
+    mock_loader.return_value = {'status': 'eventful'}
+
+    res = swh_app.send_task(
+        'swh.loader.package.cran.tasks.LoadCran',
+        (), dict(url='some-url', version='1.2.3'))
+    assert res
+    res.wait()
+    assert res.successful()
+
+    assert res.result == {'status': 'eventful'}
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
index 2569946..6ae7669 100644
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -1,344 +1,267 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 import os
-import re
 
 from codecs import BOM_UTF8
 from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional
 
 import chardet
 import iso8601
 
 from urllib.parse import quote
 from swh.model.identifiers import normalize_timestamp
 from swh.loader.package.loader import PackageLoader
-from swh.loader.package.utils import api_info, release_name
+from swh.loader.package.utils import (
+    api_info, release_name, parse_author, swh_author
+)
 
 
 logger = logging.getLogger(__name__)
 
 
-_EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
-
-# https://github.com/jonschlinkert/author-regex
-_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
-
-
 class NpmLoader(PackageLoader):
     """Load npm origin's artifact releases into swh archive.
 
     """
     visit_type = 'npm'
 
     def __init__(self, url: str):
         """Constructor
 
         Args
             str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
         """
         super().__init__(url=url)
         package_name = url.split('https://www.npmjs.com/package/')[1]
         safe_name = quote(package_name, safe='')
         self.provider_url = f'https://replicate.npmjs.com/{safe_name}/'
         self._info: Dict[str, Any] = {}
         self._versions = None
 
     @property
     def info(self) -> Dict[str, Any]:
         """Return the project metadata information (fetched from npm registry)
 
         """
         if not self._info:
             self._info = api_info(self.provider_url)
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return sorted(list(self.info['versions'].keys()))
 
     def get_default_version(self) -> str:
         return self.info['dist-tags'].get('latest', '')
 
     def get_package_info(self, version: str) -> Generator[
             Tuple[str, Mapping[str, Any]], None, None]:
         meta = self.info['versions'][version]
         url = meta['dist']['tarball']
         p_info = {
             'url': url,
             'filename': os.path.basename(url),
             'raw': meta,
         }
         yield release_name(version), p_info
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         return artifact_to_revision_id(known_artifacts, artifact_metadata)
 
     def build_revision(
             self, a_metadata: Dict, uncompressed_path: str) -> Dict:
         i_metadata = extract_intrinsic_metadata(uncompressed_path)
         # from intrinsic metadata
         author = extract_npm_package_author(i_metadata)
         message = i_metadata['version'].encode('ascii')
 
         # from extrinsic metadata
 
         # No date available in intrinsic metadata: retrieve it from the API
         # metadata, using the version number that the API claims this package
         # has.
         extrinsic_version = a_metadata['version']
         date = self.info['time'][extrinsic_version]
         date = iso8601.parse_date(date)
         date = normalize_timestamp(int(date.timestamp()))
 
         return {
             'type': 'tar',
             'message': message,
             'author': author,
             'date': date,
             'committer': author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic': {
                     'tool': 'package.json',
                     'raw': i_metadata,
                 },
                 'extrinsic': {
                     'provider': self.provider_url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             },
         }
 
 
 def artifact_to_revision_id(
         known_artifacts: Dict, artifact_metadata: Dict) -> Optional[bytes]:
     """Given metadata artifact, solves the associated revision id.
 
     The following code allows to deal with 2 metadata formats:
 
     - old format sample:
 
         {
             'package_source': {
                 'sha1': '05181c12cd8c22035dd31155656826b85745da37',
             }
         }
 
     - new format sample:
 
         {
             'original_artifact': [{
                 'checksums': {
                     'sha256': "6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec",  # noqa
                     ...
                 },
             }],
             ...
         }
 
     """
     shasum = artifact_metadata['dist']['shasum']
     for rev_id, known_artifact in known_artifacts.items():
         known_original_artifact = known_artifact.get('original_artifact')
         if not known_original_artifact:
             # previous loader-npm version kept original artifact elsewhere
             known_original_artifact = known_artifact.get('package_source')
             if not known_original_artifact:
                 continue
             original_hash = known_original_artifact['sha1']
         else:
             assert isinstance(known_original_artifact, list)
             original_hash = known_original_artifact[0]['checksums']['sha1']
         if shasum == original_hash:
             return rev_id
     return None
 
 
-def parse_npm_package_author(author_str):
-    """
-    Parse npm package author string.
-
-    It works with a flexible range of formats, as detailed below::
-
-        name
-        name <email> (url)
-        name <email>(url)
-        name<email> (url)
-        name<email>(url)
-        name (url) <email>
-        name (url)<email>
-        name(url) <email>
-        name(url)<email>
-        name (url)
-        name(url)
-        name <email>
-        name<email>
-        <email> (url)
-        <email>(url)
-        (url) <email>
-        (url)<email>
-        <email>
-        (url)
-
-    Args:
-        author_str (str): input author string
-
-    Returns:
-        dict: A dict that may contain the following keys:
-            * name
-            * email
-            * url
-
-    """
-    author = {}
-    matches = re.findall(_author_regexp,
-                         author_str.replace('<>', '').replace('()', ''),
-                         re.M)
-    for match in matches:
-        if match[0].strip():
-            author['name'] = match[0].strip()
-        if match[1].strip():
-            author['email'] = match[1].strip()
-        if match[2].strip():
-            author['url'] = match[2].strip()
-    return author
-
-
 def extract_npm_package_author(package_json):
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json (dict): Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         dict: A dict with the following keys:
             * fullname
             * name
             * email
 
     """
 
     def _author_str(author_data):
         if type(author_data) is dict:
             author_str = ''
             if 'name' in author_data:
                 author_str += author_data['name']
             if 'email' in author_data:
                 author_str += ' <%s>' % author_data['email']
             return author_str
         elif type(author_data) is list:
             return _author_str(author_data[0]) if len(author_data) > 0 else ''
         else:
             return author_data
 
     author_data = {}
     for author_key in ('author', 'authors'):
         if author_key in package_json:
             author_str = _author_str(package_json[author_key])
-            author_data = parse_npm_package_author(author_str)
-
-    name = author_data.get('name')
-    email = author_data.get('email')
-
-    fullname = None
-
-    if name and email:
-        fullname = '%s <%s>' % (name, email)
-    elif name:
-        fullname = name
-
-    if not fullname:
-        return _EMPTY_AUTHOR
-
-    if fullname:
-        fullname = fullname.encode('utf-8')
-
-    if name:
-        name = name.encode('utf-8')
-
-    if email:
-        email = email.encode('utf-8')
+            author_data = parse_author(author_str)
 
-    return {'fullname': fullname, 'name': name, 'email': email}
+    return swh_author(author_data)
 
 
 def _lstrip_bom(s, bom=BOM_UTF8):
     if s.startswith(bom):
         return s[len(bom):]
     else:
         return s
 
 
 def load_json(json_bytes):
     """
     Try to load JSON from bytes and return a dictionary.
 
     First try to decode from utf-8. If the decoding failed,
     try to detect the encoding and decode again with replace
     error handling.
 
     If JSON is malformed, an empty dictionary will be returned.
 
     Args:
         json_bytes (bytes): binary content of a JSON file
 
     Returns:
         dict: JSON data loaded in a dictionary
     """
     json_data = {}
     try:
         json_str = _lstrip_bom(json_bytes).decode('utf-8')
     except UnicodeDecodeError:
         encoding = chardet.detect(json_bytes)['encoding']
         if encoding:
             json_str = json_bytes.decode(encoding, 'replace')
     try:
         json_data = json.loads(json_str)
     except json.decoder.JSONDecodeError:
         pass
     return json_data
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from npm.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) == 0:
         return {}
     project_dirname = lst[0]
     package_json_path = os.path.join(dir_path, project_dirname, 'package.json')
     if not os.path.exists(package_json_path):
         return {}
     with open(package_json_path, 'rb') as package_json_file:
         package_json_bytes = package_json_file.read()
         return load_json(package_json_bytes)
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
index b3bb0dd..dc5438f 100644
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -1,653 +1,511 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import os
 import pytest
 
 from swh.model.hashutil import hash_to_bytes
 
 from swh.loader.package.npm.loader import (
-    NpmLoader, parse_npm_package_author, extract_npm_package_author,
+    NpmLoader, extract_npm_package_author,
     artifact_to_revision_id
 )
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths, get_stats
 )
 
 
-def _parse_author_string_test(author_str, expected_result):
-    assert parse_npm_package_author(author_str) == expected_result
-    assert parse_npm_package_author(' %s' % author_str) == expected_result
-    assert parse_npm_package_author('%s ' % author_str) == expected_result
-
-
-def test_parse_npm_package_author():
-    _parse_author_string_test(
-        'John Doe',
-        {
-            'name': 'John Doe'
-        }
-    )
-
-    _parse_author_string_test(
-        '<john.doe@foo.bar>',
-        {
-            'email': 'john.doe@foo.bar'
-        }
-    )
-
-    _parse_author_string_test(
-        '(https://john.doe)',
-        {
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe <john.doe@foo.bar>',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe<john.doe@foo.bar>',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe (https://john.doe)',
-        {
-            'name': 'John Doe',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe(https://john.doe)',
-        {
-            'name': 'John Doe',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        '<john.doe@foo.bar> (https://john.doe)',
-        {
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        '(https://john.doe) <john.doe@foo.bar>',
-        {
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe <john.doe@foo.bar> (https://john.doe)',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe (https://john.doe) <john.doe@foo.bar>',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe<john.doe@foo.bar> (https://john.doe)',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe<john.doe@foo.bar>(https://john.doe)',
-        {
-            'name': 'John Doe',
-            'email': 'john.doe@foo.bar',
-            'url': 'https://john.doe'
-        }
-    )
-
-    _parse_author_string_test('', {})
-    _parse_author_string_test('<>', {})
-    _parse_author_string_test(' <>', {})
-    _parse_author_string_test('<>()', {})
-    _parse_author_string_test('<> ()', {})
-    _parse_author_string_test('()', {})
-    _parse_author_string_test(' ()', {})
-
-    _parse_author_string_test(
-        'John Doe <> ()',
-        {
-            'name': 'John Doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe <>',
-        {
-            'name': 'John Doe'
-        }
-    )
-
-    _parse_author_string_test(
-        'John Doe ()',
-        {
-            'name': 'John Doe'
-        }
-    )
-
-
 def test_extract_npm_package_author(datadir):
     package_metadata_filepath = os.path.join(
         datadir, 'https_replicate.npmjs.com', 'org_visit1')
 
     with open(package_metadata_filepath) as json_file:
         package_metadata = json.load(json_file)
 
     extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
         {
             'fullname': b'mooz <stillpedant@gmail.com>',
             'name': b'mooz',
             'email': b'stillpedant@gmail.com'
         }
 
     assert (
         extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
         {
             'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
             'name': b'Masafumi Oyamada',
             'email': b'stillpedant@gmail.com'
         }
     )
 
     package_json = json.loads('''
     {
         "name": "highlightjs-line-numbers.js",
         "version": "2.7.0",
         "description": "Highlight.js line numbers plugin.",
         "main": "src/highlightjs-line-numbers.js",
         "dependencies": {},
         "devDependencies": {
             "gulp": "^4.0.0",
             "gulp-rename": "^1.4.0",
             "gulp-replace": "^0.6.1",
             "gulp-uglify": "^1.2.0"
         },
         "repository": {
             "type": "git",
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
         },
         "author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
         "license": "MIT",
         "bugs": {
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
         },
         "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
     }''') # noqa
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
             'name': b'Yauheni Pakala',
             'email': b'evgeniy.pakalo@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "3-way-diff",
         "version": "0.0.1",
         "description": "3-way diffing of JavaScript objects",
         "main": "index.js",
         "authors": [
             {
                 "name": "Shawn Walsh",
                 "url": "https://github.com/shawnpwalsh"
             },
             {
                 "name": "Markham F Rollins IV",
                 "url": "https://github.com/mrollinsiv"
             }
         ],
         "keywords": [
             "3-way diff",
             "3 way diff",
             "three-way diff",
             "three way diff"
         ],
         "devDependencies": {
             "babel-core": "^6.20.0",
             "babel-preset-es2015": "^6.18.0",
             "mocha": "^3.0.2"
         },
         "dependencies": {
             "lodash": "^4.15.0"
         }
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Shawn Walsh',
             'name': b'Shawn Walsh',
             'email': None
         }
 
     package_json = json.loads('''
     {
         "name": "yfe-ynpm",
         "version": "1.0.0",
         "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
         "repository": {
             "type": "git",
             "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
         },
         "author": [
             "fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
             "xufuzi <xufuzi@ywwl.com> (https://7993.org)"
         ],
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'fengmk2 <fengmk2@gmail.com>',
             'name': b'fengmk2',
             'email': b'fengmk2@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "umi-plugin-whale",
         "version": "0.0.8",
         "description": "Internal contract component",
         "authors": {
             "name": "xiaohuoni",
             "email": "448627663@qq.com"
         },
         "repository": "alitajs/whale",
         "devDependencies": {
             "np": "^3.0.4",
             "umi-tools": "*"
         },
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'xiaohuoni <448627663@qq.com>',
             'name': b'xiaohuoni',
             'email': b'448627663@qq.com'
         }
 
 
 def normalize_hashes(hashes):
     if isinstance(hashes, str):
         return hash_to_bytes(hashes)
     if isinstance(hashes, list):
         return [hash_to_bytes(x) for x in hashes]
     return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
 
 
 _expected_new_contents_first_visit = normalize_hashes([
     '4ce3058e16ab3d7e077f65aabf855c34895bf17c',
     '858c3ceee84c8311adc808f8cdb30d233ddc9d18',
     '0fa33b4f5a4e0496da6843a38ff1af8b61541996',
     '85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
     '9163ac8025923d5a45aaac482262893955c9b37b',
     '692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
     '18c03aac6d3e910efb20039c15d70ab5e0297101',
     '41265c42446aac17ca769e67d1704f99e5a1394d',
     '783ff33f5882813dca9239452c4a7cadd4dba778',
     'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
     '112d1900b4c2e3e9351050d1b542c9744f9793f3',
     '5439bbc4bd9a996f1a38244e6892b71850bc98fd',
     'd83097a2f994b503185adf4e719d154123150159',
     'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
     'b3523a26f7147e4af40d9d462adaae6d49eda13e',
     'cd065fb435d6fb204a8871bcd623d0d0e673088c',
     '2854a40855ad839a54f4b08f5cff0cf52fca4399',
     'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
     '0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
     '0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
     '585fc5caab9ead178a327d3660d35851db713df1',
     'e8cd41a48d79101977e3036a87aeb1aac730686f',
     '5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
     '9c3cc2763bf9e9e37067d3607302c4776502df98',
     '3649a68410e354c83cd4a38b66bd314de4c8f5c9',
     'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
     '078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
     '38de737da99514de6559ff163c988198bc91367a',
 ])
 
 _expected_new_directories_first_visit = normalize_hashes([
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
     '80579be563e2ef3e385226fe7a3f079b377f142c',
     '3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
     'bcad03ce58ac136f26f000990fc9064e559fe1c0',
     '5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
     'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
     '584b5b4b6cf7f038095e820b99386a9c232de931',
     '184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
     'bb5f4ee143c970367eb409f2e4c1104898048b9d',
     '1b95491047add1103db0dfdfa84a9735dcb11e88',
     'a00c6de13471a2d66e64aca140ddb21ef5521e62',
     '5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
     'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
     '202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
     '775cc516543be86c15c1dc172f49c0d4e6e78235',
     'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
 ])
 
 _expected_new_revisions_first_visit = normalize_hashes({
     'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     '5f9eb78af37ffd12949f235e86fac04898f9f72a':
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
 )
 
 
 def package_url(package):
     return 'https://www.npmjs.com/package/%s' % package
 
 
 def package_metadata_url(package):
     return 'https://replicate.npmjs.com/%s/' % package
 
 
 def test_revision_metadata_structure(swh_config, requests_mock_datadir):
     package = 'org'
     loader = NpmLoader(package_url(package))
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     assert actual_load_status['snapshot_id'] is not None
 
     expected_revision_id = hash_to_bytes(
         'd8a1c7474d2956ac598a19f0f27d52f7015f117e')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('intrinsic.tool', str),
         ('intrinsic.raw', dict),
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
         ('original_artifact', list),
     ])
 
     for original_artifact in revision['metadata']['original_artifact']:
         check_metadata_paths(original_artifact, paths=[
             ('filename', str),
             ('length', int),
             ('checksums', dict),
         ])
 
 
 def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
 
     package = 'org'
     loader = NpmLoader(package_url(package))
 
     actual_load_status = loader.load()
     expected_snapshot_id = 'd0587e1195aed5a8800411a008f2f2d627f18e2d'
     assert actual_load_status == {
         'status': 'eventful',
         'snapshot_id': expected_snapshot_id
     }
 
     stats = get_stats(loader.storage)
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     assert len(list(loader.storage.content_get(
         _expected_new_contents_first_visit))) == len(
             _expected_new_contents_first_visit)
 
     assert list(loader.storage.directory_missing(
         _expected_new_directories_first_visit)) == []
 
     assert list(loader.storage.revision_missing(
         _expected_new_revisions_first_visit)) == []
 
     expected_snapshot = {
         'id': expected_snapshot_id,
         'branches': {
             'HEAD': {
                 'target': 'releases/0.0.4',
                 'target_type': 'alias'
             },
             'releases/0.0.2': {
                 'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
                 'target_type': 'revision'
             },
             'releases/0.0.3': {
                 'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
                 'target_type': 'revision'
             },
             'releases/0.0.4': {
                 'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
                 'target_type': 'revision'
             }
         }
     }
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_npm_loader_incremental_visit(
         swh_config, requests_mock_datadir_visits):
     package = 'org'
     url = package_url(package)
     loader = NpmLoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     assert actual_load_status['status'] is not None
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
     assert origin_visit['type'] == 'npm'
 
     stats = get_stats(loader.storage)
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     loader._info = None  # reset loader internal state
     actual_load_status2 = loader.load()
     assert actual_load_status2['status'] == 'eventful'
     snap_id2 = actual_load_status2['snapshot_id']
     assert snap_id2 is not None
     assert snap_id2 != actual_load_status['snapshot_id']
 
     origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit2['status'] == 'full'
     assert origin_visit2['type'] == 'npm'
 
     stats = get_stats(loader.storage)
 
     assert {  # 3 new releases artifacts
         'content': len(_expected_new_contents_first_visit) + 14,
         'directory': len(_expected_new_directories_first_visit) + 15,
         'origin': 1,
         'origin_visit': 2,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit) + 3,
         'skipped_content': 0,
         'snapshot': 2,
     } == stats
 
     urls = [
         m.url for m in requests_mock_datadir_visits.request_history
         if m.url.startswith('https://registry.npmjs.org')
     ]
     assert len(urls) == len(set(urls))  # we visited each artifact once across
 
 
 @pytest.mark.usefixtures('requests_mock_datadir')
 def test_npm_loader_version_divergence(swh_config):
     package = '@aller_shared'
     url = package_url(package)
     loader = NpmLoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     assert actual_load_status['status'] is not None
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
     assert origin_visit['type'] == 'npm'
 
     stats = get_stats(loader.storage)
 
     assert {  # 1 new releases artifacts
         'content': 534,
         'directory': 153,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     expected_snapshot = {
         'id': 'b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92',
         'branches': {
             'HEAD': {
                 'target_type': 'alias',
                 'target': 'releases/0.1.0'
             },
             'releases/0.1.0': {
                 'target_type': 'revision',
                 'target': '845673bfe8cbd31b1eaf757745a964137e6f9116',
             },
             'releases/0.1.1-alpha.14': {
                 'target_type': 'revision',
                 'target': '05181c12cd8c22035dd31155656826b85745da37',
             },
         },
     }
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_npm_artifact_to_revision_id_none():
     """Current loader version should stop soon if nothing can be found
 
     """
     artifact_metadata = {
         'dist': {
             'shasum': '05181c12cd8c22035dd31155656826b85745da37',
         },
     }
 
     known_artifacts = {
         'b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92': {},
     }
 
     assert artifact_to_revision_id(known_artifacts, artifact_metadata) is None
 
 
 def test_npm_artifact_to_revision_id_old_loader_version():
     """Current loader version should solve old metadata scheme
 
     """
     artifact_metadata = {
         'dist': {
             'shasum': '05181c12cd8c22035dd31155656826b85745da37',
         }
     }
 
     known_artifacts = {
         hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): {
             'package_source': {
                 'sha1': "something-wrong"
             }
         },
         hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): {
             'package_source': {
                 'sha1': '05181c12cd8c22035dd31155656826b85745da37',
             }
         }
 
     }
 
     assert artifact_to_revision_id(known_artifacts, artifact_metadata) \
         == hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116')
 
 
 def test_npm_artifact_to_revision_id_current_loader_version():
     """Current loader version should be able to solve current metadata scheme
 
     """
     artifact_metadata = {
         'dist': {
             'shasum': '05181c12cd8c22035dd31155656826b85745da37',
         }
     }
 
     known_artifacts = {
         hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92'): {
             'original_artifact': [{
                 'checksums': {
                     'sha1': "05181c12cd8c22035dd31155656826b85745da37"
                 },
             }],
         },
         hash_to_bytes('845673bfe8cbd31b1eaf757745a964137e6f9116'): {
             'original_artifact': [{
                 'checksums': {
                     'sha1': 'something-wrong'
                 },
             }],
         },
     }
 
     assert artifact_to_revision_id(known_artifacts, artifact_metadata) \
         == hash_to_bytes('b11ebac8c9d0c9e5063a2df693a18e3aba4b2f92')
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
index 549defc..63024b8 100644
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -1,157 +1,307 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import os
 import pytest
 
 
 import swh.loader.package
-from swh.loader.package.utils import download, api_info, release_name
+from swh.loader.package.utils import (
+    download, api_info, release_name, parse_author
+)
 
 
 def test_version_generation():
     assert swh.loader.package.__version__ != 'devel', \
         "Make sure swh.loader.core is installed (e.g. pip install -e .)"
 
 
 @pytest.mark.fs
 def test_download_fail_to_download(tmp_path, requests_mock):
     url = 'https://pypi.org/pypi/arrow/json'
     status_code = 404
     requests_mock.get(url, status_code=status_code)
 
     with pytest.raises(ValueError) as e:
         download(url, tmp_path)
 
     assert e.value.args[0] == "Fail to query '%s'. Reason: %s" % (
         url, status_code)
 
 
 @pytest.mark.fs
 def test_download_ok(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     requests_mock.get(url, text=data, headers={
         'content-length': str(len(data))
     })
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes['length'] == len(data)
     assert actual_hashes['checksums']['sha1'] == 'fdd1ce606a904b08c816ba84f3125f2af44d92b2'  # noqa
     assert (actual_hashes['checksums']['sha256'] ==
             '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5')
 
 
 @pytest.mark.fs
 def test_download_ok_no_header(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     requests_mock.get(url, text=data)  # no header information
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes['length'] == len(data)
     assert actual_hashes['checksums']['sha1'] == 'fdd1ce606a904b08c816ba84f3125f2af44d92b2'  # noqa
     assert (actual_hashes['checksums']['sha256'] ==
             '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5')
 
 
 @pytest.mark.fs
 def test_download_ok_with_hashes(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     requests_mock.get(url, text=data, headers={
         'content-length': str(len(data))
     })
 
     # good hashes for such file
     good = {
         'sha1': 'fdd1ce606a904b08c816ba84f3125f2af44d92b2',
         'sha256': '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5',  # noqa
     }
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path),
                                               hashes=good)
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes['length'] == len(data)
     assert actual_hashes['checksums']['sha1'] == good['sha1']
     assert actual_hashes['checksums']['sha256'] == good['sha256']
 
 
 @pytest.mark.fs
 def test_download_fail_hashes_mismatch(tmp_path, requests_mock):
     """Mismatch hash after download should raise
 
     """
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     requests_mock.get(url, text=data, headers={
         'content-length': str(len(data))
     })
 
     # good hashes for such file
     good = {
         'sha1': 'fdd1ce606a904b08c816ba84f3125f2af44d92b2',
         'sha256': '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5',  # noqa
     }
 
     for hash_algo in good.keys():
         wrong_hash = good[hash_algo].replace('1', '0')
         expected_hashes = good.copy()
         expected_hashes[hash_algo] = wrong_hash  # set the wrong hash
 
         expected_msg = ("Failure when fetching %s. "
                         "Checksum mismatched: %s != %s" % (
                             url, wrong_hash, good[hash_algo]
                         ))
 
         with pytest.raises(ValueError, match=expected_msg):
             download(url, dest=str(tmp_path), hashes=expected_hashes)
 
 
 def test_api_info_failure(requests_mock):
     """Failure to fetch info/release information should raise"""
     url = 'https://pypi.org/pypi/requests/json'
     status_code = 400
     requests_mock.get(url, status_code=status_code)
 
     with pytest.raises(ValueError) as e0:
         api_info(url)
 
     assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (
         url, status_code
     )
 
 
 def test_api_info(requests_mock):
     """Fetching json info from pypi project should be ok"""
     url = 'https://pypi.org/pypi/requests/json'
     requests_mock.get(url, text='{"version": "0.0.1"}')
     actual_info = api_info(url)
     assert actual_info == {
         'version': '0.0.1',
     }
 
 
 def test_release_name():
     for version, filename, expected_release in [
             ('0.0.1', None, 'releases/0.0.1'),
             ('0.0.2', 'something', 'releases/0.0.2/something')]:
         assert release_name(version, filename) == expected_release
+
+
+def _parse_author_string_test(author_str, expected_result):
+    assert parse_author(author_str) == expected_result
+    assert parse_author(' %s' % author_str) == expected_result
+    assert parse_author('%s ' % author_str) == expected_result
+
+
+def test_parse_author():
+    _parse_author_string_test(
+        'John Doe',
+        {
+            'name': 'John Doe'
+        }
+    )
+
+    _parse_author_string_test(
+        '<john.doe@foo.bar>',
+        {
+            'email': 'john.doe@foo.bar'
+        }
+    )
+
+    _parse_author_string_test(
+        '(https://john.doe)',
+        {
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe <john.doe@foo.bar>',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe<john.doe@foo.bar>',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe (https://john.doe)',
+        {
+            'name': 'John Doe',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe(https://john.doe)',
+        {
+            'name': 'John Doe',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        '<john.doe@foo.bar> (https://john.doe)',
+        {
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        '(https://john.doe) <john.doe@foo.bar>',
+        {
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe <john.doe@foo.bar> (https://john.doe)',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe (https://john.doe) <john.doe@foo.bar>',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe<john.doe@foo.bar> (https://john.doe)',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe<john.doe@foo.bar>(https://john.doe)',
+        {
+            'name': 'John Doe',
+            'email': 'john.doe@foo.bar',
+            'url': 'https://john.doe'
+        }
+    )
+
+    _parse_author_string_test('', {})
+    _parse_author_string_test('<>', {})
+    _parse_author_string_test(' <>', {})
+    _parse_author_string_test('<>()', {})
+    _parse_author_string_test('<> ()', {})
+    _parse_author_string_test('()', {})
+    _parse_author_string_test(' ()', {})
+
+    _parse_author_string_test(
+        'John Doe <> ()',
+        {
+            'name': 'John Doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe <>',
+        {
+            'name': 'John Doe'
+        }
+    )
+
+    _parse_author_string_test(
+        'John Doe ()',
+        {
+            'name': 'John Doe'
+        }
+    )
+
+
+# def test_swh_author():
+#     for author, expected_author in [
+#             ({}, )
+#     ]:
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
index 14330dd..9f78a35 100644
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -1,113 +1,197 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import logging
 import os
 import requests
+import re
 
 from typing import Dict, Optional, Tuple
 
 from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
 from swh.loader.package import DEFAULT_PARAMS
 
 
 logger = logging.getLogger(__name__)
 
 
 DOWNLOAD_HASHES = set(['sha1', 'sha256', 'length'])
 
 
+# https://github.com/jonschlinkert/author-regex
+_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
+
+
+_EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
+
+
 def api_info(url: str) -> Dict:
     """Basic api client to retrieve information on project. This deals with
        fetching json metadata about pypi projects.
 
     Args:
         url (str): The api url (e.g PyPI, npm, etc...)
 
     Raises:
         ValueError in case of query failures (for some reasons: 404, ...)
 
     Returns:
         The associated response's information dict
 
     """
     response = requests.get(url, **DEFAULT_PARAMS)
     if response.status_code != 200:
         raise ValueError("Fail to query '%s'. Reason: %s" % (
             url, response.status_code))
     return response.json()
 
 
 def download(url: str, dest: str, hashes: Dict = {},
              filename: Optional[str] = None,
              auth: Optional[Tuple[str, str]] = None) -> Tuple[str, Dict]:
     """Download a remote tarball from url, uncompresses and computes swh hashes
        on it.
 
     Args:
         url: Artifact uri to fetch, uncompress and hash
         dest: Directory to write the archive to
         hashes: Dict of expected hashes (key is the hash algo) for the artifact
             to download (those hashes are expected to be hex string)
         auth: Optional tuple of login/password (for http authentication
             service, e.g. deposit)
 
     Raises:
         ValueError in case of any error when fetching/computing (length,
         checksums mismatched...)
 
     Returns:
         Tuple of local (filepath, hashes of filepath)
 
     """
     params = copy.deepcopy(DEFAULT_PARAMS)
     if auth is not None:
         params['auth'] = auth
     response = requests.get(url, **params, stream=True)
     if response.status_code != 200:
         raise ValueError("Fail to query '%s'. Reason: %s" % (
             url, response.status_code))
 
     filename = filename if filename else os.path.basename(url)
     logger.debug('filename: %s', filename)
     filepath = os.path.join(dest, filename)
     logger.debug('filepath: %s', filepath)
 
     h = MultiHash(hash_names=DOWNLOAD_HASHES)
     with open(filepath, 'wb') as f:
         for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE):
             h.update(chunk)
             f.write(chunk)
 
     # Also check the expected hashes if provided
     if hashes:
         actual_hashes = h.hexdigest()
         for algo_hash in hashes.keys():
             actual_digest = actual_hashes[algo_hash]
             expected_digest = hashes[algo_hash]
             if actual_digest != expected_digest:
                 raise ValueError(
                     'Failure when fetching %s. '
                     'Checksum mismatched: %s != %s' % (
                         url, expected_digest, actual_digest))
 
     computed_hashes = h.hexdigest()
     length = computed_hashes.pop('length')
     extrinsic_metadata = {
         'length': length,
         'filename': filename,
         'checksums': computed_hashes,
     }
 
     logger.debug('extrinsic_metadata', extrinsic_metadata)
 
     return filepath, extrinsic_metadata
 
 
 def release_name(version: str, filename: Optional[str] = None) -> str:
     if filename:
         return 'releases/%s/%s' % (version, filename)
     return 'releases/%s' % version
+
+
+def parse_author(author_str: str) -> Dict[str, str]:
+    """
+    Parse npm package author string.
+
+    It works with a flexible range of formats, as detailed below::
+
+        name
+        name <email> (url)
+        name <email>(url)
+        name<email> (url)
+        name<email>(url)
+        name (url) <email>
+        name (url)<email>
+        name(url) <email>
+        name(url)<email>
+        name (url)
+        name(url)
+        name <email>
+        name<email>
+        <email> (url)
+        <email>(url)
+        (url) <email>
+        (url)<email>
+        <email>
+        (url)
+
+    Args:
+        author_str (str): input author string
+
+    Returns:
+        dict: A dict that may contain the following keys:
+            * name
+            * email
+            * url
+
+    """
+    author = {}
+    matches = re.findall(_author_regexp,
+                         author_str.replace('<>', '').replace('()', ''),
+                         re.M)
+    for match in matches:
+        if match[0].strip():
+            author['name'] = match[0].strip()
+        if match[1].strip():
+            author['email'] = match[1].strip()
+        if match[2].strip():
+            author['url'] = match[2].strip()
+    return author
+
+
+def swh_author(author: Dict[str, str]) -> Dict[str, Optional[bytes]]:
+    """Transform an author like dict to an expected swh like dict (values are
+    bytes)
+
+    """
+    name = author.get('name')
+    email = author.get('email')
+
+    fullname = None
+
+    if name and email:
+        fullname = '%s <%s>' % (name, email)
+    elif name:
+        fullname = name
+
+    if not fullname:
+        r = _EMPTY_AUTHOR
+    else:
+        r = {
+            'fullname': fullname.encode('utf-8') if fullname else None,
+            'name': name.encode('utf-8') if name else None,
+            'email': email.encode('utf-8') if email else None
+        }
+    return r
diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py
index f29ef9e..3fb4bd5 100644
--- a/swh/loader/tests/test_cli.py
+++ b/swh/loader/tests/test_cli.py
@@ -1,112 +1,112 @@
 # Copyright (C) 2019 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 
 
 from swh.loader.cli import run, list, get_loader, SUPPORTED_LOADERS
 from swh.loader.package.loader import PackageLoader
 
 from click.testing import CliRunner
 
 
 def test_get_loader_wrong_input(swh_config):
     """Unsupported loader should raise
 
     """
     loader_type = 'unknown'
     assert loader_type not in SUPPORTED_LOADERS
     with pytest.raises(ValueError, match='Invalid loader'):
         get_loader(loader_type, url='db-url')
 
 
 def test_get_loader(swh_config):
     """Instantiating a supported loader should be ok
 
     """
     loader_input = {
         'archive': {
             'url': 'some-url',
             'artifacts': [],
         },
         'debian': {
             'url': 'some-url',
             'date': 'something',
             'packages': [],
         },
         'deposit': {
             'url': 'some-url',
             'deposit_id': 1,
         },
         'npm': {
             'url': 'https://www.npmjs.com/package/onepackage',
         },
         'pypi': {
             'url': 'some-url',
         },
     }
     for loader_type, kwargs in loader_input.items():
         loader = get_loader(loader_type, **kwargs)
         assert isinstance(loader, PackageLoader)
 
 
 def test_run_help(swh_config):
     """Help message should be ok
 
     """
     runner = CliRunner()
     result = runner.invoke(run, ['-h'])
 
     assert result.exit_code == 0
-    expected_help_msg = """Usage: run [OPTIONS] [archive|debian|deposit|npm|pypi] URL [OPTIONS]...
+    expected_help_msg = """Usage: run [OPTIONS] [archive|cran|debian|deposit|npm|pypi] URL [OPTIONS]...
 
   Ingest with loader <type> the origin located at <url>
 
 Options:
   -h, --help  Show this message and exit.
 """  # noqa
 
     assert result.output.startswith(expected_help_msg)
 
 
 def test_run_pypi(mocker, swh_config):
     """Triggering a load should be ok
 
     """
     mock_loader = mocker.patch('swh.loader.package.pypi.loader.PyPILoader')
     runner = CliRunner()
     result = runner.invoke(run, ['pypi', 'https://some-url'])
     assert result.exit_code == 0
     mock_loader.assert_called_once_with(url='https://some-url')  # constructor
 
 
 def test_list_help(mocker, swh_config):
     """Triggering a load should be ok
 
     """
     runner = CliRunner()
     result = runner.invoke(list, ['--help'])
     assert result.exit_code == 0
-    expected_help_msg = """Usage: list [OPTIONS] [[all|archive|debian|deposit|npm|pypi]]
+    expected_help_msg = """Usage: list [OPTIONS] [[all|archive|cran|debian|deposit|npm|pypi]]
 
   List supported loaders and optionally their arguments
 
 Options:
   -h, --help  Show this message and exit.
 """  # noqa
     assert result.output.startswith(expected_help_msg)
 
 
 def test_list_help_npm(mocker, swh_config):
     """Triggering a load should be ok
 
     """
     runner = CliRunner()
     result = runner.invoke(list, ['npm'])
     assert result.exit_code == 0
     expected_help_msg = '''Loader: Load npm origin's artifact releases into swh archive.
 signature: (url: str)
 '''  # noqa
     assert result.output.startswith(expected_help_msg)