diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
index 2c5a083..76e115d 100644
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -1,384 +1,385 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 import tempfile
 import os
 
 from typing import (
     Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple
 )
 
 from swh.core.tarball import uncompress
 from swh.core.config import SWHConfig
 from swh.model.from_disk import Directory
 from swh.model.identifiers import (
     revision_identifier, snapshot_identifier, identifier_to_bytes
 )
 from swh.storage import get_storage
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 from swh.loader.core.converters import content_for_storage
 from swh.loader.package.utils import download
 
 
 logger = logging.getLogger(__name__)
 
 
 # Not implemented yet:
 # - clean up disk routines from previous killed workers (when OOMkilled)
 # -> separation of concern would like this to be abstracted from the code
 # -> experience tells us it's complicated to do as such (T903, T964, T982,
 #    etc...)
 #
 # - model: swh.model.merkle.from_disk should output swh.model.model.* objects
 #          to avoid this layer's conversion routine call
 # -> Take this up within swh.model's current implementation
 
 
 class PackageLoader:
     # Origin visit type (str) set by the loader
     visit_type = ''
 
     def __init__(self, url):
         """Loader's constructor. This raises exception if the minimal required
            configuration is missing (cf. fn:`check` method).
 
         Args:
             url (str): Origin url to load data from
 
         """
         # This expects to use the environment variable SWH_CONFIG_FILENAME
         self.config = SWHConfig.parse_config_file()
         self._check_configuration()
         self.storage = get_storage(**self.config['storage'])
         self.url = url
         self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
 
     def _check_configuration(self):
         """Checks the minimal configuration required is set for the loader.
 
         If some required configuration is missing, exception detailing the
         issue is raised.
 
         """
         if 'storage' not in self.config:
             raise ValueError(
                 'Misconfiguration, at least the storage key should be set')
 
     def get_versions(self) -> Sequence[str]:
         """Return the list of all published package versions.
 
         Returns:
             Sequence of published versions
 
         """
         return []
 
     def get_package_info(self, version: str) -> Generator[
             Tuple[str, Mapping[str, Any]], None, None]:
         """Given a release version of a package, retrieve the associated
            package information for such version.
 
         Args:
             version: Package version
 
         Returns:
             (branch name, package metadata)
 
         """
         yield from {}
 
     def build_revision(
             self, a_metadata: Dict, i_metadata: Dict) -> Dict:
         """Build the revision dict from the archive metadata (extrinsic
         artifact metadata) and the intrinsic metadata.
 
         Returns:
             SWH data dict
 
         """
         return {}
 
     def get_default_release(self) -> str:
         """Retrieve the latest release version
 
         Returns:
             Latest version
 
         """
         return ''
 
     def last_snapshot(self) -> Optional[Dict]:
         """Retrieve the last snapshot
 
         """
         visit = self.storage.origin_visit_get_latest(
             self.url, require_snapshot=True)
         if visit:
             return snapshot_get_all_branches(
                 self.storage, visit['snapshot']['id'])
 
     def known_artifacts(self, snapshot: Dict) -> [Dict]:
         """Retrieve the known releases/artifact for the origin.
 
         Args
             snapshot: snapshot for the visit
 
         Returns:
             Dict of keys revision id (bytes), values a metadata Dict.
 
         """
         if not snapshot or 'branches' not in snapshot:
             return {}
 
         # retrieve only revisions (e.g the alias we do not want here)
         revs = [rev['target']
                 for rev in snapshot['branches'].values()
                 if rev and rev['target_type'] == 'revision']
         known_revisions = self.storage.revision_get(revs)
 
         ret = {}
         for revision in known_revisions:
             if not revision:  # revision_get can return None
                 continue
             ret[revision['id']] = revision['metadata']
 
         return ret
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         """Resolve the revision from a snapshot and an artifact metadata dict.
 
         If the artifact has already been downloaded, this will return the
         existing revision targeting that uncompressed artifact directory.
         Otherwise, this returns None.
 
         Args:
             snapshot: Snapshot
             artifact_metadata: Information dict
 
         Returns:
             None or revision identifier
 
         """
         return None
 
     def download_package(self, p_info: Mapping[str, Any],
                          tmpdir: str) -> [Tuple[str, Dict]]:
         """Download artifacts for a specific package. All downloads happen in
         in the tmpdir folder.
 
         Default implementation expects the artifacts package info to be
         about one artifact per package.
 
         Note that most implementation have 1 artifact per package. But some
         implementation have multiple artifacts per package (debian), some have
         none, the package is the artifact (gnu).
 
         Args:
             artifacts_package_info: Information on the package artifacts to
                 download (url, filename, etc...)
             tmpdir: Location to retrieve such artifacts
 
         Returns:
             List of (path, computed hashes)
 
         """
         a_uri = p_info['url']
         filename = p_info.get('filename')
         return [download(a_uri, dest=tmpdir, filename=filename)]
 
     def uncompress(self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]],
                    dest: str) -> str:
         """Uncompress the artifact(s) in the destination folder dest.
 
         Optionally, this could need to use the p_info dict for some more
         information (debian).
 
         """
         uncompressed_path = os.path.join(dest, 'src')
         for a_path, _ in dl_artifacts:
             uncompress(a_path, dest=uncompressed_path)
         return uncompressed_path
 
     def load(self) -> Dict:
         """Load for a specific origin the associated contents.
 
         for each package version of the origin
 
         1. Fetch the files for one package version By default, this can be
            implemented as a simple HTTP request. Loaders with more specific
            requirements can override this, e.g.: the PyPI loader checks the
            integrity of the downloaded files; the Debian loader has to download
            and check several files for one package version.
 
         2. Extract the downloaded files By default, this would be a universal
            archive/tarball extraction.
 
            Loaders for specific formats can override this method (for instance,
            the Debian loader uses dpkg-source -x).
 
         3. Convert the extracted directory to a set of Software Heritage
            objects Using swh.model.from_disk.
 
         4. Extract the metadata from the unpacked directories This would only
            be applicable for "smart" loaders like npm (parsing the
            package.json), PyPI (parsing the PKG-INFO file) or Debian (parsing
            debian/changelog and debian/control).
 
            On "minimal-metadata" sources such as the GNU archive, the lister
            should provide the minimal set of metadata needed to populate the
            revision/release objects (authors, dates) as an argument to the
            task.
 
         5. Generate the revision/release objects for the given version. From
            the data generated at steps 3 and 4.
 
         end for each
 
         6. Generate and load the snapshot for the visit
 
         Using the revisions/releases collected at step 5., and the branch
         information from step 0., generate a snapshot and load it into the
         Software Heritage archive
 
         """
         status_load = 'uneventful'  # either: eventful, uneventful, failed
         status_visit = 'full'       # either: partial, full
         tmp_revisions = {}  # type: Dict[str, List]
         snapshot = None
 
         try:
             # Prepare origin and origin_visit
             origin = {'url': self.url}
             self.storage.origin_add([origin])
             visit_id = self.storage.origin_visit_add(
                 origin=self.url,
                 date=self.visit_date,
                 type=self.visit_type)['visit']
             last_snapshot = self.last_snapshot()
             logger.debug('last snapshot: %s', last_snapshot)
             known_artifacts = self.known_artifacts(last_snapshot)
             logger.debug('known artifacts: %s', known_artifacts)
 
             # Retrieve the default release (the "latest" one)
             default_release = self.get_default_release()
             logger.debug('default release: %s', default_release)
 
             for version in self.get_versions():  # for each
                 logger.debug('version: %s', version)
                 tmp_revisions[version] = []
                 # `p_` stands for `package_`
                 for branch_name, p_info in self.get_package_info(version):
                     logger.debug('package_info: %s', p_info)
                     revision_id = self.resolve_revision_from(
                         known_artifacts, p_info['raw'])
                     if revision_id is None:
                         with tempfile.TemporaryDirectory() as tmpdir:
                             try:
                                 dl_artifacts = self.download_package(
                                     p_info, tmpdir)
                             except Exception:
                                 logger.exception('Unable to retrieve %s',
                                                  p_info)
                                 status_visit = 'partial'
                                 continue
 
                             uncompressed_path = self.uncompress(
                                 dl_artifacts, dest=tmpdir)
                             logger.debug('uncompressed_path: %s',
                                          uncompressed_path)
 
                             directory = Directory.from_disk(
                                 path=uncompressed_path.encode('utf-8'),
                                 data=True)  # noqa
                             # FIXME: Try not to load the full raw content in
                             # memory
                             objects = directory.collect()
 
                             contents = objects['content'].values()
                             logger.debug('Number of contents: %s',
                                          len(contents))
 
                             self.storage.content_add(
                                 map(content_for_storage, contents))
 
                             status_load = 'eventful'
                             directories = objects['directory'].values()
 
                             logger.debug('Number of directories: %s',
                                          len(directories))
 
                             self.storage.directory_add(directories)
 
                             # FIXME: This should be release. cf. D409
                             revision = self.build_revision(
                                 p_info['raw'], uncompressed_path)
                             revision.update({
                                 'synthetic': True,
                                 'directory': directory.hash,
                             })
 
                         revision['metadata'].update({
                             'original_artifact': [
                                 hashes for _, hashes in dl_artifacts
                             ],
                         })
 
                         revision['id'] = revision_id = identifier_to_bytes(
                             revision_identifier(revision))
 
                         logger.debug('Revision: %s', revision)
 
                         self.storage.revision_add([revision])
 
                     tmp_revisions[version].append((branch_name, revision_id))
 
+            logger.debug('tmp_revisions: %s', tmp_revisions)
             # Build and load the snapshot
             branches = {}
             for version, branch_name_revisions in tmp_revisions.items():
                 if len(branch_name_revisions) == 1:
                     branch_name, target = branch_name_revisions[0]
                     if branch_name != 'HEAD':
                         branches[b'HEAD'] = {
                             'target_type': 'alias',
                             'target': branch_name.encode('utf-8'),
                         }
 
                 for branch_name, target in branch_name_revisions:
                     branch_name = branch_name.encode('utf-8')
                     branches[branch_name] = {
                         'target_type': 'revision',
                         'target': target,
                     }
 
             snapshot = {
                 'branches': branches
             }
             logger.debug('snapshot: %s', snapshot)
 
             snapshot['id'] = identifier_to_bytes(
                 snapshot_identifier(snapshot))
 
             logger.debug('snapshot: %s', snapshot)
             self.storage.snapshot_add([snapshot])
             if hasattr(self.storage, 'flush'):
                 self.storage.flush()
         except Exception:
             logger.exception('Fail to load %s' % self.url)
             status_visit = 'partial'
             status_load = 'failed'
         finally:
             self.storage.origin_visit_update(
                 origin=self.url, visit_id=visit_id, status=status_visit,
                 snapshot=snapshot)
         result = {
             'status': status_load,
         }
         if snapshot:
             result['snapshot_id'] = snapshot['id']
         return result
diff --git a/swh/loader/package/npm.py b/swh/loader/package/npm.py
index 09cdfdd..980a300 100644
--- a/swh/loader/package/npm.py
+++ b/swh/loader/package/npm.py
@@ -1,295 +1,295 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 import os
 import re
 
 from codecs import BOM_UTF8
 from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional
 
 import chardet
 import iso8601
 
 from swh.model.identifiers import normalize_timestamp
 from swh.loader.package.loader import PackageLoader
 from swh.loader.package.utils import api_info
 
 
 logger = logging.getLogger(__name__)
 
 
 _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
 
 # https://github.com/jonschlinkert/author-regex
 _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
 
 
 def parse_npm_package_author(author_str):
     """
     Parse npm package author string.
 
     It works with a flexible range of formats, as detailed below::
 
         name
         name <email> (url)
         name <email>(url)
         name<email> (url)
         name<email>(url)
         name (url) <email>
         name (url)<email>
         name(url) <email>
         name(url)<email>
         name (url)
         name(url)
         name <email>
         name<email>
         <email> (url)
         <email>(url)
         (url) <email>
         (url)<email>
         <email>
         (url)
 
     Args:
         author_str (str): input author string
 
     Returns:
         dict: A dict that may contain the following keys:
             * name
             * email
             * url
 
     """
     author = {}
     matches = re.findall(_author_regexp,
                          author_str.replace('<>', '').replace('()', ''),
                          re.M)
     for match in matches:
         if match[0].strip():
             author['name'] = match[0].strip()
         if match[1].strip():
             author['email'] = match[1].strip()
         if match[2].strip():
             author['url'] = match[2].strip()
     return author
 
 
 def extract_npm_package_author(package_json):
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json (dict): Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         dict: A dict with the following keys:
             * fullname
             * name
             * email
 
     """
 
     def _author_str(author_data):
         if type(author_data) is dict:
             author_str = ''
             if 'name' in author_data:
                 author_str += author_data['name']
             if 'email' in author_data:
                 author_str += ' <%s>' % author_data['email']
             return author_str
         elif type(author_data) is list:
             return _author_str(author_data[0]) if len(author_data) > 0 else ''
         else:
             return author_data
 
     author_data = {}
     for author_key in ('author', 'authors'):
         if author_key in package_json:
             author_str = _author_str(package_json[author_key])
             author_data = parse_npm_package_author(author_str)
 
     name = author_data.get('name')
     email = author_data.get('email')
 
     fullname = None
 
     if name and email:
         fullname = '%s <%s>' % (name, email)
     elif name:
         fullname = name
 
     if not fullname:
         return _EMPTY_AUTHOR
 
     if fullname:
         fullname = fullname.encode('utf-8')
 
     if name:
         name = name.encode('utf-8')
 
     if email:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
 
 
 def _lstrip_bom(s, bom=BOM_UTF8):
     if s.startswith(bom):
         return s[len(bom):]
     else:
         return s
 
 
 def load_json(json_bytes):
     """
     Try to load JSON from bytes and return a dictionary.
 
     First try to decode from utf-8. If the decoding failed,
     try to detect the encoding and decode again with replace
     error handling.
 
     If JSON is malformed, an empty dictionary will be returned.
 
     Args:
         json_bytes (bytes): binary content of a JSON file
 
     Returns:
         dict: JSON data loaded in a dictionary
     """
     json_data = {}
     try:
         json_str = _lstrip_bom(json_bytes).decode('utf-8')
     except UnicodeDecodeError:
         encoding = chardet.detect(json_bytes)['encoding']
         if encoding:
             json_str = json_bytes.decode(encoding, 'replace')
     try:
         json_data = json.loads(json_str)
     except json.decoder.JSONDecodeError:
         pass
     return json_data
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from npm.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) == 0:
         return {}
     project_dirname = lst[0]
     package_json_path = os.path.join(dir_path, project_dirname, 'package.json')
     if not os.path.exists(package_json_path):
         return {}
     with open(package_json_path, 'rb') as package_json_file:
         package_json_bytes = package_json_file.read()
         return load_json(package_json_bytes)
 
 
 class NpmLoader(PackageLoader):
     visit_type = 'npm'
 
     def __init__(self, package_name, package_url, package_metadata_url):
         super().__init__(url=package_url)
         self.provider_url = package_metadata_url
 
         self._info = None
         self._versions = None
 
         # if package_url is None:
         #     package_url = 'https://www.npmjs.com/package/%s' % package_name
         # if package_metadata_url is None:
         #     package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
         #                             quote(package_name, safe='')
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from npm registry)
 
         """
         if not self._info:
             self._info = api_info(self.provider_url)
         return self._info
 
     def get_versions(self) -> Sequence[str]:
-        return sorted(self.info['versions'].keys())
+        return sorted(list(self.info['versions'].keys()))
 
     def get_default_release(self) -> str:
         return self.info['dist-tags'].get('latest', '')
 
     def get_package_info(self, version: str) -> Generator[
             Tuple[str, Mapping[str, Any]], None, None]:
         meta = self.info['versions'][version]
         url = meta['dist']['tarball']
         p_info = {
             'url': url,
             'filename': os.path.basename(url),
             'raw': meta,
         }
         yield 'releases/%s' % version, p_info
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         shasum = artifact_metadata['dist']['shasum']
         for rev_id, known_artifact in known_artifacts.items():
             original_artifact = known_artifact['original_artifact'][0]
             if shasum == original_artifact['checksums']['sha1']:
                 return rev_id
 
     def build_revision(
             self, a_metadata: Dict, uncompressed_path: str) -> Dict:
         i_metadata = extract_intrinsic_metadata(uncompressed_path)
         # from intrinsic metadata
         author = extract_npm_package_author(i_metadata)
         # extrinsic metadata
         version = i_metadata['version']
         date = self.info['time'][version]
         date = iso8601.parse_date(date)
         date = normalize_timestamp(int(date.timestamp()))
         message = version.encode('ascii')
 
         return {
             'type': 'tar',
             'message': message,
             'author': author,
             'date': date,
             'committer': author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic': {
                     'tool': 'package.json',
                     'raw': i_metadata,
                 },
                 'extrinsic': {
                     'provider': self.provider_url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             },
         }
diff --git a/swh/loader/package/tests/common.py b/swh/loader/package/tests/common.py
index a46cd58..1c2a9c1 100644
--- a/swh/loader/package/tests/common.py
+++ b/swh/loader/package/tests/common.py
@@ -1,101 +1,106 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from os import path
 
 import logging
 
 from typing import Dict, List, Tuple
 
 from swh.model.hashutil import hash_to_bytes, hash_to_hex
 
 
 logger = logging.getLogger(__file__)
 
 
 DATADIR = path.join(path.abspath(path.dirname(__file__)), 'resources')
 
 
 def decode_target(target):
     """Test helper to ease readability in test
 
     """
     if not target:
         return target
     target_type = target['target_type']
 
     if target_type == 'alias':
         decoded_target = target['target'].decode('utf-8')
     else:
         decoded_target = hash_to_hex(target['target'])
 
     return {
         'target': decoded_target,
         'target_type': target_type
     }
 
 
 def check_snapshot(expected_snapshot, storage):
     """Check for snapshot match.
 
     Provide the hashes as hexadecimal, the conversion is done
     within the method.
 
     Args:
         expected_snapshot (dict): full snapshot with hex ids
         storage (Storage): expected storage
 
     """
     expected_snapshot_id = expected_snapshot['id']
     expected_branches = expected_snapshot['branches']
     snap = storage.snapshot_get(hash_to_bytes(expected_snapshot_id))
-    assert snap is not None
+    if snap is None:
+        # display known snapshots instead
+        from pprint import pprint
+        for snap_id, (_snap, branches) in storage._snapshots.items():
+            pprint(_snap.to_dict())
+        raise AssertionError('Snapshot is not found')
 
     branches = {
         branch.decode('utf-8'): decode_target(target)
         for branch, target in snap['branches'].items()
     }
     assert expected_branches == branches
 
 
 def check_metadata(metadata: Dict, key_path: str, raw_type: str):
     """Given a metadata dict, ensure the associated key_path value is of type
        raw_type.
 
     Args:
         metadata: Dict to check
         key_path: Path to check
         raw_type: Type to check the path with
 
     Raises:
         Assertion error in case of mismatch
 
     """
     data = metadata
     keys = key_path.split('.')
     for k in keys:
         try:
             data = data[k]
         except (TypeError, KeyError) as e:
             # KeyError: because path too long
             # TypeError: data is not a dict
             raise AssertionError(e)
     assert isinstance(data, raw_type)
 
 
 def check_metadata_paths(metadata: Dict, paths: List[Tuple[str, str]]):
     """Given a metadata dict, ensure the keys are of expected types
 
     Args:
         metadata: Dict to check
         key_path: Path to check
         raw_type: Type to check the path with
 
     Raises:
         Assertion error in case of mismatch
 
     """
     for key_path, raw_type in paths:
         check_metadata(metadata, key_path, raw_type)
diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py
index 654c472..3bb776f 100644
--- a/swh/loader/package/tests/test_npm.py
+++ b/swh/loader/package/tests/test_npm.py
@@ -1,531 +1,526 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import os
 
 from swh.model.hashutil import hash_to_bytes
 
 from swh.loader.package.npm import (
     parse_npm_package_author, extract_npm_package_author
 )
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths
 )
 
 from swh.loader.package.npm import NpmLoader
 
 
 def _parse_author_string_test(author_str, expected_result):
     assert parse_npm_package_author(author_str) == expected_result
     assert parse_npm_package_author(' %s' % author_str) == expected_result
     assert parse_npm_package_author('%s ' % author_str) == expected_result
 
 
 def test_parse_npm_package_author():
     _parse_author_string_test(
         'John Doe',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         '<john.doe@foo.bar>',
         {
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         '(https://john.doe)',
         {
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         'John Doe (https://john.doe)',
         {
             'name': 'John Doe',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe(https://john.doe)',
         {
             'name': 'John Doe',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         '<john.doe@foo.bar> (https://john.doe)',
         {
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         '(https://john.doe) <john.doe@foo.bar>',
         {
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <john.doe@foo.bar> (https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe (https://john.doe) <john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar> (https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar>(https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test('', {})
     _parse_author_string_test('<>', {})
     _parse_author_string_test(' <>', {})
     _parse_author_string_test('<>()', {})
     _parse_author_string_test('<> ()', {})
     _parse_author_string_test('()', {})
     _parse_author_string_test(' ()', {})
 
     _parse_author_string_test(
         'John Doe <> ()',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <>',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe ()',
         {
             'name': 'John Doe'
         }
     )
 
 
 def test_extract_npm_package_author(datadir):
     package_metadata_filepath = os.path.join(
         datadir, 'https_replicate.npmjs.com', 'org_visit1')
 
     with open(package_metadata_filepath) as json_file:
         package_metadata = json.load(json_file)
 
     extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
         {
             'fullname': b'mooz <stillpedant@gmail.com>',
             'name': b'mooz',
             'email': b'stillpedant@gmail.com'
         }
 
     assert (
         extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
         {
             'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
             'name': b'Masafumi Oyamada',
             'email': b'stillpedant@gmail.com'
         }
     )
 
     package_json = json.loads('''
     {
         "name": "highlightjs-line-numbers.js",
         "version": "2.7.0",
         "description": "Highlight.js line numbers plugin.",
         "main": "src/highlightjs-line-numbers.js",
         "dependencies": {},
         "devDependencies": {
             "gulp": "^4.0.0",
             "gulp-rename": "^1.4.0",
             "gulp-replace": "^0.6.1",
             "gulp-uglify": "^1.2.0"
         },
         "repository": {
             "type": "git",
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
         },
         "author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
         "license": "MIT",
         "bugs": {
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
         },
         "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
     }''') # noqa
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
             'name': b'Yauheni Pakala',
             'email': b'evgeniy.pakalo@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "3-way-diff",
         "version": "0.0.1",
         "description": "3-way diffing of JavaScript objects",
         "main": "index.js",
         "authors": [
             {
                 "name": "Shawn Walsh",
                 "url": "https://github.com/shawnpwalsh"
             },
             {
                 "name": "Markham F Rollins IV",
                 "url": "https://github.com/mrollinsiv"
             }
         ],
         "keywords": [
             "3-way diff",
             "3 way diff",
             "three-way diff",
             "three way diff"
         ],
         "devDependencies": {
             "babel-core": "^6.20.0",
             "babel-preset-es2015": "^6.18.0",
             "mocha": "^3.0.2"
         },
         "dependencies": {
             "lodash": "^4.15.0"
         }
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Shawn Walsh',
             'name': b'Shawn Walsh',
             'email': None
         }
 
     package_json = json.loads('''
     {
         "name": "yfe-ynpm",
         "version": "1.0.0",
         "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
         "repository": {
             "type": "git",
             "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
         },
         "author": [
             "fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
             "xufuzi <xufuzi@ywwl.com> (https://7993.org)"
         ],
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'fengmk2 <fengmk2@gmail.com>',
             'name': b'fengmk2',
             'email': b'fengmk2@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "umi-plugin-whale",
         "version": "0.0.8",
         "description": "Internal contract component",
         "authors": {
             "name": "xiaohuoni",
             "email": "448627663@qq.com"
         },
         "repository": "alitajs/whale",
         "devDependencies": {
             "np": "^3.0.4",
             "umi-tools": "*"
         },
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'xiaohuoni <448627663@qq.com>',
             'name': b'xiaohuoni',
             'email': b'448627663@qq.com'
         }
 
 
 def normalize_hashes(hashes):
     if isinstance(hashes, str):
         return hash_to_bytes(hashes)
     if isinstance(hashes, list):
         return [hash_to_bytes(x) for x in hashes]
     return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
 
 
 _expected_new_contents_first_visit = normalize_hashes([
     '4ce3058e16ab3d7e077f65aabf855c34895bf17c',
     '858c3ceee84c8311adc808f8cdb30d233ddc9d18',
     '0fa33b4f5a4e0496da6843a38ff1af8b61541996',
     '85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
     '9163ac8025923d5a45aaac482262893955c9b37b',
     '692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
     '18c03aac6d3e910efb20039c15d70ab5e0297101',
     '41265c42446aac17ca769e67d1704f99e5a1394d',
     '783ff33f5882813dca9239452c4a7cadd4dba778',
     'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
     '112d1900b4c2e3e9351050d1b542c9744f9793f3',
     '5439bbc4bd9a996f1a38244e6892b71850bc98fd',
     'd83097a2f994b503185adf4e719d154123150159',
     'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
     'b3523a26f7147e4af40d9d462adaae6d49eda13e',
     'cd065fb435d6fb204a8871bcd623d0d0e673088c',
     '2854a40855ad839a54f4b08f5cff0cf52fca4399',
     'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
     '0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
     '0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
     '585fc5caab9ead178a327d3660d35851db713df1',
     'e8cd41a48d79101977e3036a87aeb1aac730686f',
     '5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
     '9c3cc2763bf9e9e37067d3607302c4776502df98',
     '3649a68410e354c83cd4a38b66bd314de4c8f5c9',
     'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
     '078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
     '38de737da99514de6559ff163c988198bc91367a',
 ])
 
 _expected_new_directories_first_visit = normalize_hashes([
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
     '80579be563e2ef3e385226fe7a3f079b377f142c',
     '3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
     'bcad03ce58ac136f26f000990fc9064e559fe1c0',
     '5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
     'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
     '584b5b4b6cf7f038095e820b99386a9c232de931',
     '184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
     'bb5f4ee143c970367eb409f2e4c1104898048b9d',
     '1b95491047add1103db0dfdfa84a9735dcb11e88',
     'a00c6de13471a2d66e64aca140ddb21ef5521e62',
     '5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
     'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
     '202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
     '775cc516543be86c15c1dc172f49c0d4e6e78235',
     'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
 ])
 
 _expected_new_revisions_first_visit = normalize_hashes({
     'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     '5f9eb78af37ffd12949f235e86fac04898f9f72a':
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
 )
 
-_expected_new_snapshot_first_visit_id = normalize_hashes(
-    'd0587e1195aed5a8800411a008f2f2d627f18e2d')
-
-_expected_branches_first_visit = {
-    'HEAD': {
-        'target': 'releases/0.0.4',
-        'target_type': 'alias'
-    },
-    'releases/0.0.2': {
-        'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
-        'target_type': 'revision'
-    },
-    'releases/0.0.3': {
-        'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
-        'target_type': 'revision'
-    },
-    'releases/0.0.4': {
-        'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
-        'target_type': 'revision'
-    }
-}
-
 
 def package_url(package):
     return 'https://www.npmjs.com/package/%s' % package
 
 
 def package_metadata_url(package):
     return 'https://replicate.npmjs.com/%s/' % package
 
 
 def test_revision_metadata_structure(swh_config, requests_mock_datadir):
     package = 'org'
     loader = NpmLoader(package,
                        package_url(package),
                        package_metadata_url(package))
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_revision_id = hash_to_bytes(
         'd8a1c7474d2956ac598a19f0f27d52f7015f117e')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('intrinsic.tool', str),
         ('intrinsic.raw', dict),
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
         ('original_artifact', list),
     ])
 
     for original_artifact in revision['metadata']['original_artifact']:
         check_metadata_paths(original_artifact, paths=[
             ('filename', str),
             ('length', int),
             ('checksums', dict),
         ])
 
 
 def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
 
     package = 'org'
     loader = NpmLoader(package,
                        package_url(package),
                        package_metadata_url(package))
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     assert len(list(loader.storage.content_get(
         _expected_new_contents_first_visit))) == len(
             _expected_new_contents_first_visit)
 
     assert list(loader.storage.directory_missing(
         _expected_new_directories_first_visit)) == []
 
     assert list(loader.storage.revision_missing(
         _expected_new_revisions_first_visit)) == []
 
     expected_snapshot = {
-        'id': _expected_new_snapshot_first_visit_id,
-        'branches': _expected_branches_first_visit,
+        'id': 'd0587e1195aed5a8800411a008f2f2d627f18e2d',
+        'branches': {
+            'HEAD': {
+                'target': 'releases/0.0.4',
+                'target_type': 'alias'
+            },
+            'releases/0.0.2': {
+                'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
+                'target_type': 'revision'
+            },
+            'releases/0.0.3': {
+                'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
+                'target_type': 'revision'
+            },
+            'releases/0.0.4': {
+                'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
+                'target_type': 'revision'
+            }
+        }
     }
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_npm_loader_incremental_visit(
         swh_config, requests_mock_datadir_visits):
     package = 'org'
     url = package_url(package)
     metadata_url = package_metadata_url(package)
     loader = NpmLoader(package, url, metadata_url)
 
     actual_load_status = loader.load()
 
     assert actual_load_status['status'] == 'eventful'
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
 
     stats = loader.storage.stat_counters()
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     loader._info = None  # reset loader internal state
     actual_load_status2 = loader.load()
 
     assert actual_load_status2['status'] == 'eventful'
     origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit2['status'] == 'full'
 
     stats = loader.storage.stat_counters()
 
     assert {  # 3 new releases artifacts
         'content': len(_expected_new_contents_first_visit) + 14,
         'directory': len(_expected_new_directories_first_visit) + 15,
         'origin': 1,
         'origin_visit': 2,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit) + 3,
         'skipped_content': 0,
         'snapshot': 2,
     } == stats
 
     urls = [
         m.url for m in requests_mock_datadir_visits.request_history
         if m.url.startswith('https://registry.npmjs.org')
     ]
     assert len(urls) == len(set(urls))  # we visited each artifact once across