diff --git a/README.md b/README.md
index 181e7aa..527f11b 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,84 @@
 swh-loader-npm
 ==============
 
-Software Heritage loader to ingest npm packages into the archive.
\ No newline at end of file
+Software Heritage loader to ingest [`npm`](https://www.npmjs.com/) packages into the archive.
+
+# What does the loader do?
+
+The npm loader visits and loads a npm package [1].
+
+Each visit will result in:
+- 1 snapshot (which targets n revisions ; 1 per package release version)
+- 1 revision (which targets 1 directory ; the package release version uncompressed)
+
+[1] https://docs.npmjs.com/about-packages-and-modules
+
+## First visit
+
+Given a npm package (origin), the loader, for the first visit:
+
+- retrieves information for the given package (notably released versions)
+- then for each associated released version:
+  - retrieves the associated tarball (with checks)
+  - uncompresses locally the archive
+  - computes the hashes of the uncompressed directory
+  - then creates a revision (using ``package.json`` metadata file) targeting such directory
+- finally, creates a snapshot targeting all seen revisions (uncompressed npm package released versions and metadata).
+
+## Next visit
+
+The loader starts by checking if something changed since the last visit. If nothing changed, the visit's snapshot is left unchanged. The new visit targets the same snapshot.
+
+If something changed, the already seen package release versions are skipped. Only the new ones are loaded. In the end, the loader creates a new snapshot based on the previous one. Thus, the new snapshot targets both the old and new package release versions.
+
+# Development
+
+## Configuration file
+
+### Location
+
+Either:
+- `/etc/softwareheritage/loader/npm.yml`
+- `~/.config/swh/loader/npm.yml`
+
+### Configuration sample
+
+```lang=yaml
+storage:
+  cls: remote
+  args:
+    url: http://localhost:5002/
+
+debug: false
+```
+
+## Local run
+
+The built-in command-line will run the loader for a specified npm package.
+
+For instance, to load `jquery`:
+```lang=bash
+$ python3 -m swh.loader.npm.loader jquery
+```
+
+If you need more control, you can use the loader directly. It expects
+three arguments:
+- `package_name` (required): a npm package name
+- `package_url` (optional): URL of the npm package description (human-readable html page) that will be used as the associated origin URL in the archive
+- `project_metadata_url` (optional): URL of the npm package metadata information (machine-parsable JSON document)
+
+```lang=python
+import logging
+
+from urllib.parse import quote
+
+from swh.loader.npm.loader import NpmLoader
+
+logging.basicConfig(level=logging.DEBUG)
+
+package_name='webpack'
+
+NpmLoader().load(package_name,
+                 'https://www.npmjs.com/package/%s/' % package_name,
+                 'https://replicate.npmjs.com/%s/' % quote(package_name, safe=''))
+```
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 5891dcd..7176c21 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,18 +1,12 @@
-.. _swh-py-template:
+.. _swh-loader-npm:
 
 Software Heritage - npm loader
 ==============================
 
 Loader for `npm <https://wwww.npmjs.com/>`_ packages.
 
 .. toctree::
    :maxdepth: 2
    :caption: Contents:
 
-
-Indices and tables
-==================
-
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+   /apidoc/swh.loader.npm
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 9e518b1..9197768 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
-swh.core
+swh.core >= 0.0.57
 swh.model >= 0.0.28
-swh.storage >= 0.0.108
+swh.storage >= 0.0.131
 swh.scheduler
-swh.loader.core >= 0.0.35
+swh.loader.core >= 0.0.40
diff --git a/requirements.txt b/requirements.txt
index ae22f85..0ff2a23 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,4 @@
+python-dateutil
+requests
 setuptools
 vcversioner
diff --git a/setup.py b/setup.py
index 484e064..dff7ecf 100755
--- a/setup.py
+++ b/setup.py
@@ -1,66 +1,67 @@
 #!/usr/bin/env python3
-# Copyright (C) 2015-2018  The Software Heritage developers
+# Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from setuptools import setup, find_packages
 
 from os import path
 from io import open
 
 here = path.abspath(path.dirname(__file__))
 
 # Get the long description from the README file
 with open(path.join(here, 'README.md'), encoding='utf-8') as f:
     long_description = f.read()
 
 
 def parse_requirements(name=None):
     if name:
         reqf = 'requirements-%s.txt' % name
     else:
         reqf = 'requirements.txt'
 
     requirements = []
     if not path.exists(reqf):
         return requirements
 
     with open(reqf) as f:
         for line in f.readlines():
             line = line.strip()
             if not line or line.startswith('#'):
                 continue
             requirements.append(line)
     return requirements
 
 
 setup(
     name='swh.loader.npm',
     description='Software Heritage loader for npm packages',
     long_description=long_description,
     long_description_content_type='text/markdown',
     author='Software Heritage developers',
     author_email='swh-devel@inria.fr',
     url='https://forge.softwareheritage.org/source/swh-loader-npm.git',
     packages=find_packages(),
+    scripts=[],
     install_requires=parse_requirements() + parse_requirements('swh'),
     tests_require=parse_requirements('test'),
     setup_requires=['vcversioner'],
     extras_require={'testing': parse_requirements('test')},
     vcversioner={},
     include_package_data=True,
     entry_points={},
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
         "Operating System :: OS Independent",
         "Development Status :: 3 - Alpha",
     ],
     project_urls={
         'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
         'Funding': 'https://www.softwareheritage.org/donate',
         'Source': 'https://forge.softwareheritage.org/source/swh-loader-npm',
     },
 )
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
new file mode 100644
index 0000000..69e3be5
--- /dev/null
+++ b/swh/loader/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/loader/npm/client.py b/swh/loader/npm/client.py
new file mode 100644
index 0000000..49be475
--- /dev/null
+++ b/swh/loader/npm/client.py
@@ -0,0 +1,209 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import logging
+import os
+
+import requests
+
+from swh.core import tarball
+from swh.model import hashutil
+
+from swh.loader.npm.utils import extract_npm_package_author
+
+
+class NpmClient:
+    """
+    Helper class internally used by the npm loader to fetch
+    metadata for a specific package hosted on the npm registry.
+
+    Args:
+        temp_dir (str): Path to the temporary disk location used
+            to uncompress the package tarballs
+    """
+    def __init__(self, temp_dir, log=None):
+        self.root_temp_dir = temp_dir
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage npm loader'
+            }
+        }
+        self.log = log or logging
+
+    def fetch_package_metadata(self, package_metadata_url):
+        """
+        Fetch metadata for a given package and make it the focused one.
+        This must be called prior any other operations performed
+        by the other methods below.
+
+        Args:
+            package_metadata_url: the package metadata url provided
+                by the npm loader
+        """
+        self.package_metadata_url = package_metadata_url
+        self.package_metadata = self._request(self.package_metadata_url).json()
+        self.package = self.package_metadata['name']
+        self.temp_dir = os.path.join(self.root_temp_dir, self.package)
+
+    def latest_package_version(self):
+        """
+        Return the last released version of the focused package.
+
+        Returns:
+            str: the last releases package version
+        """
+        return self.package_metadata['dist-tags']['latest']
+
+    def package_versions(self, known_versions=None):
+        """
+        Return the available versions for the focused package.
+
+        Args:
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Returns:
+            dict: A dict whose keys are Tuple[version, tarball_sha1] and
+            values dicts with the following entries:
+
+                    * **name**: the package name
+                    * **version**: the package version
+                    * **filename**: the package source tarball filename
+                    * **sha1**: the package source tarball sha1 checksum
+                    * **date**: the package release date
+                    * **url**: the package source tarball download url
+        """
+        versions = {}
+        if 'versions' in self.package_metadata:
+            for version, data in self.package_metadata['versions'].items():
+                sha1 = data['dist']['shasum']
+                key = (version, sha1)
+                if known_versions and key in known_versions:
+                    continue
+                tarball_url = data['dist']['tarball']
+                filename = os.path.basename(tarball_url)
+                date = self.package_metadata['time'][version]
+                versions[key] = {
+                    'name': self.package,
+                    'version': version,
+                    'filename': filename,
+                    'sha1': sha1,
+                    'date': date,
+                    'url': tarball_url
+                }
+        return versions
+
+    def prepare_package_versions(self, known_versions=None):
+        """
+        Instantiate a generator that will process a specific package released
+        version at each iteration step. The following operations will be
+        performed:
+
+            1. Create a temporary directory to download and extract the
+               release tarball
+            2. Download the tarball
+            3. Check downloaded tarball integrity
+            4. Uncompress the tarball
+            5. Parse ``package.json`` file associated to the package version
+            6. Extract author from the parsed ``package.json`` file
+
+        Args:
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Yields:
+            Tuple[dict, dict, dict, str]: tuples containing the following
+            members:
+
+                * a dict holding the parsed ``package.json`` file
+                * a dict holding package author information
+                * a dict holding package tarball information
+                * a string holding the path of the uncompressed package to
+                  load into the archive
+        """
+        new_versions = self.package_versions(known_versions)
+        for version, package_source_data in sorted(new_versions.items()):
+            # filter out version with missing tarball (cases exist),
+            # package visit will be marked as partial at the end of
+            # the loading process
+            tarball_url = package_source_data['url']
+            tarball_request = self._request(tarball_url,
+                                            throw_error=False)
+            if tarball_request.status_code == 404:
+                self.log.debug('Tarball url %s returns a 404 error.' %
+                               tarball_url)
+                self.log.debug(('Version %s of %s package will be missing and '
+                                'the visit will be marked as partial.') %
+                               (version[0], self.package))
+                continue
+            version_data = self.package_metadata['versions'][version[0]]
+            yield self._prepare_package_version(package_source_data,
+                                                version_data)
+
+    def _prepare_package_version(self, package_source_data, version_data):
+        version = version_data['version']
+        self.log.debug('Processing version %s for npm package %s' %
+                       (version, self.package))
+
+        # create temp dir to download and extract package tarball
+        path = os.path.join(self.temp_dir, version)
+        os.makedirs(path, exist_ok=True)
+        filepath = os.path.join(path, package_source_data['filename'])
+        self.log.debug('Package local path: %s' % filepath)
+
+        # download tarball
+        url = package_source_data['url']
+        response = self._request(url)
+        hash_names = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'}
+        h = hashutil.MultiHash(hash_names=hash_names)
+        with open(filepath, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=None):
+                h.update(chunk)
+                f.write(chunk)
+
+        # check tarball integrity
+        hashes = h.hexdigest()
+        expected_digest = package_source_data['sha1']
+        actual_digest = hashes['sha1']
+        if actual_digest != expected_digest:
+            raise ValueError(
+                '%s %s: Checksum mismatched: %s != %s' % (
+                    self.package, version, expected_digest, actual_digest))
+
+        # uncompress tarball
+        tarball.uncompress(filepath, path)
+
+        # do not archive useless tarball root directory
+        package_path = os.path.join(path, 'package')
+        # some old packages use their name as root directory
+        if not os.path.exists(package_path):
+            ver_pos = package_source_data['filename'].rfind(version)
+            package_name = package_source_data['filename'][:ver_pos-1]
+            package_path = os.path.join(path, package_name)
+        # fallback: archive root tarball directory
+        if not os.path.exists(package_path):
+            package_path = path
+
+        package_source_data.update(hashes)
+
+        # parse package.json file to add its content to revision metadata
+        package_json_path = os.path.join(package_path, 'package.json')
+        package_json = {}
+        with open(package_json_path, "r") as package_json_file:
+            package_json = json.load(package_json_file)
+
+        # extract author from package.json
+        author = extract_npm_package_author(package_json)
+
+        return (package_json, author, package_source_data, package_path)
+
+    def _request(self, url, throw_error=True):
+        response = self.session.get(url, **self.params, stream=True)
+        if response.status_code != 200 and throw_error:
+            raise ValueError("Fail to query '%s'. Reason: %s" % (
+                url, response.status_code))
+        return response
diff --git a/swh/loader/npm/loader.py b/swh/loader/npm/loader.py
new file mode 100644
index 0000000..cd97048
--- /dev/null
+++ b/swh/loader/npm/loader.py
@@ -0,0 +1,315 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import shutil
+from tempfile import mkdtemp
+from urllib.parse import quote
+
+from dateutil import parser as date_parser
+
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import BufferedLoader
+from swh.model.from_disk import Directory
+from swh.model.identifiers import (
+    revision_identifier, snapshot_identifier,
+    identifier_to_bytes, normalize_timestamp
+)
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+
+from swh.loader.npm.client import NpmClient
+
+
+TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.npm.'
+
+
+class NpmLoader(BufferedLoader):
+    """
+    Loader for ingesting source packages from the npm registry
+    into the Software Heritage archive.
+    """
+
+    CONFIG_BASE_FILENAME = 'loader/npm'
+    ADDITIONAL_CONFIG = {
+        'temp_directory': ('str', '/tmp/swh.loader.npm/'),
+        'debug': ('bool', False)
+    }
+
+    def __init__(self):
+        super().__init__(logging_class='swh.loader.npm.NpmLoader')
+        self.origin_id = None
+        temp_directory = self.config['temp_directory']
+        os.makedirs(temp_directory, exist_ok=True)
+        self.temp_directory = mkdtemp(suffix='-%s' % os.getpid(),
+                                      prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+                                      dir=temp_directory)
+        self.debug = self.config['debug']
+        self.done = False
+        self.npm_client = NpmClient(self.temp_directory, self.log)
+
+    def pre_cleanup(self):
+        """
+        To prevent disk explosion if some other workers exploded
+        in mid-air (OOM killed), we try and clean up dangling files.
+        """
+        if self.debug:
+            self.log.warning('DEBUG: will not pre-clean up temp dir %s' %
+                             self.temp_directory)
+            return
+        clean_dangling_folders(self.config['temp_directory'],
+                               pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+                               log=self.log)
+
+    def cleanup(self):
+        """
+        Clean up temporary disk use after downloading and extracting
+        npm source package tarballs.
+        """
+        if self.debug:
+            self.log.warning('DEBUG: will not clean up temp dir %s' %
+                             self.temp_directory)
+            return
+        if os.path.exists(self.temp_directory):
+            self.log.debug('Clean up %s' % self.temp_directory)
+            shutil.rmtree(self.temp_directory)
+
+    def load(self, package_name, package_url=None,
+             package_metadata_url=None):
+        """
+        Loader entrypoint to ingest source tarballs for a npm package.
+
+        Args:
+            package_name (str): the name of the npm package
+            package_url (str): the url of the package description,
+                if not provided the following one will be used:
+                https://www.npmjs.com/package/<package_name>
+            package_metadata_url (str): the url for the package JSON metadata,
+                if not provided the following one will be used:
+                https://replicate.npmjs.com/<package_name>/
+        """
+        if package_url is None:
+            package_url = 'https://www.npmjs.com/package/%s' % package_name
+        if package_metadata_url is None:
+            package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
+                                    quote(package_name, safe='')
+        return super().load(package_name, package_url, package_metadata_url)
+
+    def prepare_origin_visit(self, package_name, package_url,
+                             package_metadata_url):
+        """
+        Prepare npm package visit.
+
+        Args:
+            package_name (str): the name of the npm package
+            package_url (str): the url of the package description
+            package_metadata_url (str): the url for the package JSON metadata
+
+        """
+        # reset statuses
+        self._load_status = 'uneventful'
+        self._visit_status = 'full'
+        self.done = False
+        # fetch the npm package metadata from the registry
+        self.npm_client.fetch_package_metadata(package_metadata_url)
+        self.origin = {
+            'url': package_url,
+            'type': 'npm',
+        }
+        self.visit_date = None  # loader core will populate it
+
+    def _known_versions(self, last_snapshot):
+        """
+        Retrieve the known release versions for the npm package
+        (i.e. those already ingested into the archive).
+
+        Args
+            last_snapshot (dict): Last snapshot for the visit
+
+        Returns:
+            dict: Dict whose keys are Tuple[filename, sha1] and values
+            are revision ids.
+
+        """
+        if not last_snapshot or 'branches' not in last_snapshot:
+            return {}
+
+        revs = [rev['target']
+                for rev in last_snapshot['branches'].values()
+                if rev and rev['target_type'] == 'revision']
+
+        known_revisions = self.storage.revision_get(revs)
+        ret = {}
+        for revision in known_revisions:
+            if not revision:
+                continue
+            if 'package_source' in revision['metadata']:
+                package = revision['metadata']['package_source']
+                ret[(package['version'], package['sha1'])] = revision['id']
+        return ret
+
+    def _last_snapshot(self):
+        """
+        Retrieve the last snapshot of the npm package if any.
+        """
+        snapshot = self.storage.snapshot_get_latest(self.origin_id)
+        if snapshot and snapshot.pop('next_branch', None):
+            snapshot = snapshot_get_all_branches(self.storage, snapshot['id'])
+        return snapshot
+
+    def prepare(self, package_name, package_url, package_metadata_url):
+        """
+        Prepare effective loading of source tarballs for a npm
+        package.
+
+        Args:
+            package_name (str): the name of the npm package
+            package_url (str): the url of the package description
+            package_metadata_url (str): the url for the package JSON metadata
+        """
+        self.package_name = package_name
+        self.origin_url = package_url
+        self.package_contents = []
+        self.package_directories = []
+        self.package_revisions = []
+        self.package_load_status = 'uneventful'
+        self.package_visit_status = 'full'
+
+        last_snapshot = self._last_snapshot()
+        self.known_versions = self._known_versions(last_snapshot)
+
+        self.new_versions = \
+            self.npm_client.prepare_package_versions(self.known_versions)
+
+    def fetch_data(self):
+        """
+        Called once per package release version to process.
+
+        This will for each call:
+        - download a tarball associated to a package release version
+        - uncompress it and compute the necessary information
+        - compute the swh objects
+
+        Returns:
+            True as long as data to fetch exist
+
+        """
+        data = None
+        if self.done:
+            return False
+
+        try:
+            data = next(self.new_versions)
+            self.package_load_status = 'eventful'
+        except StopIteration:
+            self.done = True
+            return False
+
+        package_metadata, author, package_source_data, dir_path = data
+
+        dir_path = dir_path.encode('utf-8')
+        directory = Directory.from_disk(path=dir_path, data=True)
+        objects = directory.collect()
+
+        self.package_contents = objects['content'].values()
+        self.package_directories = objects['directory'].values()
+
+        date = date_parser.parse(package_source_data['date'])
+
+        date = normalize_timestamp(int(date.timestamp()))
+
+        message = package_source_data['version'].encode('ascii')
+
+        revision = {
+            'synthetic': True,
+            'metadata': {
+                'package_source': package_source_data,
+                'package': package_metadata,
+            },
+            'author': author,
+            'date': date,
+            'committer': author,
+            'committer_date': date,
+            'message': message,
+            'directory': directory.hash,
+            'parents': [],
+            'type': 'tar',
+        }
+        revision['id'] = identifier_to_bytes(revision_identifier(revision))
+
+        self.package_revisions.append(revision)
+
+        package_key = (package_source_data['version'],
+                       package_source_data['sha1'])
+        self.known_versions[package_key] = revision['id']
+
+        return not self.done
+
+    def _target_from_version(self, version, sha1):
+        """
+        Return revision information if any for a given package version.
+        """
+        target = self.known_versions.get((version, sha1))
+        return {
+            'target': target,
+            'target_type': 'revision',
+        } if target else None
+
+    def _generate_and_load_snapshot(self):
+        """
+        Generate snapshot for the npm package visit.
+        """
+        branches = {}
+        latest_version = self.npm_client.latest_package_version()
+        for version_data in self.npm_client.package_versions().values():
+            version = version_data['version']
+            sha1 = version_data['sha1']
+            branch_name = ('releases/%s' % version).encode('ascii')
+            target = self._target_from_version(version, sha1)
+            branches[branch_name] = target
+            if version == latest_version:
+                branches[b'HEAD'] = {
+                    'target_type': 'alias',
+                    'target': branch_name,
+                }
+            if not target:
+                self.package_visit_status = 'partial'
+        snapshot = {
+            'branches': branches,
+        }
+        snapshot['id'] = identifier_to_bytes(snapshot_identifier(snapshot))
+
+        self.maybe_load_snapshot(snapshot)
+
+    def store_data(self):
+        """
+        Send collected objects to storage.
+        """
+        self.maybe_load_contents(self.package_contents)
+        self.maybe_load_directories(self.package_directories)
+        self.maybe_load_revisions(self.package_revisions)
+
+        if self.done:
+            self._generate_and_load_snapshot()
+            self.flush()
+
+    def load_status(self):
+        return {
+            'status': self.package_load_status,
+        }
+
+    def visit_status(self):
+        return self.package_visit_status
+
+
+if __name__ == '__main__':
+    import logging
+    import sys
+    logging.basicConfig(level=logging.DEBUG)
+    if len(sys.argv) != 2:
+        logging.error('Usage: %s <package-name>' % sys.argv[0])
+        sys.exit(1)
+    package_name = sys.argv[1]
+    loader = NpmLoader()
+    loader.load(package_name)
diff --git a/swh/loader/npm/utils.py b/swh/loader/npm/utils.py
new file mode 100644
index 0000000..f09edd3
--- /dev/null
+++ b/swh/loader/npm/utils.py
@@ -0,0 +1,116 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+
+_EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
+
+# https://github.com/jonschlinkert/author-regex
+_author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
+
+
+def parse_npm_package_author(author_str):
+    """
+    Parse npm package author string.
+
+    It works with a flexible range of formats, as detailed below::
+
+        name
+        name <email> (url)
+        name <email>(url)
+        name<email> (url)
+        name<email>(url)
+        name (url) <email>
+        name (url)<email>
+        name(url) <email>
+        name(url)<email>
+        name (url)
+        name(url)
+        name <email>
+        name<email>
+        <email> (url)
+        <email>(url)
+        (url) <email>
+        (url)<email>
+        <email>
+        (url)
+
+    Args:
+        author_str (str): input author string
+
+    Returns:
+        dict: A dict that may contain the following keys:
+            * name
+            * email
+            * url
+
+    """
+    author = {}
+    matches = re.findall(_author_regexp,
+                         author_str.replace('<>', '').replace('()', ''),
+                         re.M)
+    for match in matches:
+        if match[0].strip():
+            author['name'] = match[0].strip()
+        if match[1].strip():
+            author['email'] = match[1].strip()
+        if match[2].strip():
+            author['url'] = match[2].strip()
+    return author
+
+
+def extract_npm_package_author(package_json):
+    """
+    Extract package author from a ``package.json`` file content and
+    return it in swh format.
+
+    Args:
+        package_json (dict): Dict holding the content of parsed
+            ``package.json`` file
+
+    Returns:
+        dict: A dict with the following keys:
+            * fullname
+            * name
+            * email
+
+    """
+    author_data = {}
+    if 'author' in package_json:
+        if type(package_json['author']) is str:
+            author_data = parse_npm_package_author(package_json['author'])
+        elif type(package_json['author']) is dict:
+            author_str = ''
+            if 'name' in package_json['author']:
+                author_str += package_json['author']['name']
+            if 'email' in package_json['author']:
+                author_str += ' <%s>' % package_json['author']['email']
+            author_data = parse_npm_package_author(author_str)
+    elif 'authors' in package_json and len(package_json['authors']) > 0:
+        author_data = parse_npm_package_author(package_json['authors'][0])
+
+    name = author_data.get('name')
+    email = author_data.get('email')
+
+    fullname = None
+
+    if name and email:
+        fullname = '%s <%s>' % (name, email)
+    elif name:
+        fullname = name
+
+    if not fullname:
+        return _EMPTY_AUTHOR
+
+    if fullname:
+        fullname = fullname.encode('utf-8')
+
+    if name:
+        name = name.encode('utf-8')
+
+    if email:
+        email = email.encode('utf-8')
+
+    return {'fullname': fullname, 'name': name, 'email': email}
diff --git a/tox.ini b/tox.ini
index 335f4ed..0fb07c6 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,23 +1,16 @@
 [tox]
-envlist=check-manifest,flake8,py3
+envlist=flake8,py3
 
 [testenv:py3]
 deps =
   .[testing]
   pytest-cov
 commands =
   pytest --cov=swh --cov-branch {posargs}
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
-
-[testenv:check-manifest]
-skip_install = true
-deps =
-  check-manifest
-commands =
-  {envpython} -m check_manifest {toxinidir}