diff --git a/swh/loader/package/debian.py b/swh/loader/package/debian.py
index 0396c8d..eb2f74c 100644
--- a/swh/loader/package/debian.py
+++ b/swh/loader/package/debian.py
@@ -1,342 +1,356 @@
 # Copyright (C) 2017-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import email.utils
 import iso8601
 import logging
 import re
 import subprocess
 
 from dateutil.parser import parse as parse_date
 from debian.changelog import Changelog
 from debian.deb822 import Dsc
 from os import path
-from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple
+from typing import (
+    Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple
+)
 
 from swh.loader.package.loader import PackageLoader
 from swh.loader.package.utils import download
 
 
 logger = logging.getLogger(__name__)
 UPLOADERS_SPLIT = re.compile(r'(?<=\>)\s*,\s*')
 
 
 class DebianLoader(PackageLoader):
     """Load debian origins into swh archive.
 
     """
     visit_type = 'debian'
 
     def __init__(self, url: str, date: str, packages: Mapping[str, Any]):
         super().__init__(url=url)
         self.packages = packages
 
     def get_versions(self) -> Sequence[str]:
         """Returns the keys of the packages input (e.g.
            stretch/contrib/0.7.2-3, etc...)
 
         """
         return self.packages.keys()
 
     def get_default_release(self) -> str:
         """Take the first version as default release
 
         """
         return list(self.packages.keys())[0]
 
-    def get_artifacts(self, version: str) -> Generator[
-            Tuple[Mapping[str, Any], Dict], None, None]:
-        a_metadata = self.packages[version]
-        artifacts_package_info = a_metadata.copy()
-        artifacts_package_info['filename'] = version
-        yield artifacts_package_info, a_metadata
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Mapping[str, Any]], None, None]:
+        meta = self.packages[version]
+        p_info = meta.copy()
+        p_info['raw'] = meta
+        yield 'releases/%s' % version, p_info
 
     def resolve_revision_from(
             self, known_package_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         artifacts_to_fetch = artifact_metadata['files']
         logger.debug('k_p_artifacts: %s', known_package_artifacts)
         logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch)
         for rev_id, known_artifacts in known_package_artifacts.items():
             logger.debug('Revision: %s', rev_id)
             logger.debug('Associated known_artifacts: %s', known_artifacts)
             known_artifacts = known_artifacts['extrinsic']['raw']['files']
             rev_found = True
             for a_name, k_artifact in known_artifacts.items():
                 artifact_to_fetch = artifacts_to_fetch.get(a_name)
                 logger.debug('artifact_to_fetch: %s', artifact_to_fetch)
                 if artifact_to_fetch is None:
                     # as soon as we do not see an artifact, we consider we need
                     # to check the other revision
                     rev_found = False
                 if k_artifact['sha256'] != artifact_to_fetch['sha256']:
                     # Hash is different, we consider we need to check the other
                     # revisions
                     rev_found = False
             if rev_found:
                 logger.debug('Existing revision %s found for new artifacts.',
                              rev_id)
                 return rev_id
         # if we pass here, we did not find any known artifacts
         logger.debug('No existing revision found for the new artifacts.')
 
-    def download_package(self, a_p_info: str, tmpdir: str) -> Tuple[str, Dict]:
+    def download_package(self, p_info: Mapping[str, Any],
+                         tmpdir: str) -> [Tuple[str, Dict]]:
         """Contrary to other package loaders (1 package, 1 artifact),
         `a_metadata` represents the package's datafiles set to fetch:
         - <package-version>.orig.tar.gz
         - <package-version>.dsc
         - <package-version>.diff.gz
 
         This is delegated to the `download_package` function.
 
         """
-        logger.debug('debian: artifactS_package_info: %s', a_p_info)
-        return tmpdir, download_package(a_p_info, tmpdir)
-
-    def uncompress(self, a_path: str, tmpdir: str, a_metadata: Dict) -> str:
-        return extract_package(a_metadata, tmpdir)
-
-    def read_intrinsic_metadata(self, a_metadata: Dict,
-                                a_uncompressed_path: str) -> Dict:
-        _, dsc_name = dsc_information(a_metadata)
-        dsc_path = path.join(path.dirname(a_uncompressed_path), dsc_name)
-        return get_package_metadata(
-            a_metadata, dsc_path, a_uncompressed_path)
-
-    def build_revision(
-            self, a_metadata: Dict, i_metadata: Dict) -> Dict:
-        dsc_url, _ = dsc_information(a_metadata)
+        all_hashes = download_package(p_info, tmpdir)
+        logger.debug('all_hashes: %s', all_hashes)
+        res = []
+        for hashes in all_hashes.values():
+            res.append((tmpdir, hashes))
+        logger.debug('res: %s', res)
+        return res
+
+    def uncompress(self, dl_artifacts: [Tuple[str, Dict]], dest: str) -> str:
+        logger.debug('dl_artifacts: %s', dl_artifacts)
+        return extract_package(dl_artifacts, dest=dest)
+
+    def build_revision(self, a_metadata: Mapping[str, Any],
+                       uncompressed_path: str) -> Dict:
+        dsc_url, dsc_name = dsc_information(a_metadata)
+        dsc_path = path.join(path.dirname(uncompressed_path), dsc_name)
+        i_metadata = get_package_metadata(
+            a_metadata, dsc_path, uncompressed_path)
+
         logger.debug('i_metadata: %s', i_metadata)
         logger.debug('a_metadata: %s', a_metadata)
 
         msg = 'Synthetic revision for Debian source package %s version %s' % (
             a_metadata['name'], a_metadata['version'])
 
         date = iso8601.parse_date(i_metadata['changelog']['date'])
         author = prepare_person(i_metadata['changelog']['person'])
 
         # inspired from swh.loader.debian.converters.package_metadata_to_revision  # noqa
         return {
             'type': 'dsc',
             'message': msg.encode('utf-8'),
             'author': author,
             'date': date,
             'committer': author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic': {
                     'tool': 'dsc',
                     'raw': i_metadata,
                 },
                 'extrinsic': {
                     'provider': dsc_url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             }
         }
 
 
 def uid_to_person(uid: str) -> Mapping[str, str]:
     """Convert an uid to a person suitable for insertion.
 
     Args:
         uid: an uid of the form "Name <email@ddress>"
 
     Returns:
         a dictionary with the following keys:
 
         - name: the name associated to the uid
         - email: the mail associated to the uid
         - fullname: the actual uid input
 
     """
     logger.debug('uid: %s', uid)
     ret = {
         'name': '',
         'email': '',
         'fullname': uid,
     }
 
     name, mail = email.utils.parseaddr(uid)
     if name and email:
         ret['name'] = name
         ret['email'] = mail
     else:
         ret['name'] = uid
     return ret
 
 
 def prepare_person(person: Mapping[str, str]) -> Mapping[str, bytes]:
     """Prepare person for swh serialization...
 
     Args:
         A person dict
 
     Returns:
         A person dict ready for storage
 
     """
     ret = {}
     for key, value in person.items():
         ret[key] = value.encode('utf-8')
     return ret
 
 
 def download_package(
         package: Mapping[str, Any], tmpdir: Any) -> Mapping[str, Any]:
     """Fetch a source package in a temporary directory and check the checksums
     for all files.
 
     Args:
         package: Dict defining the set of files representing a debian package
         tmpdir: Where to download and extract the files to ingest
 
     Returns:
         Dict of swh hashes per filename key
 
     """
     all_hashes = {}
     for filename, fileinfo in package['files'].items():
         uri = fileinfo['uri']
         logger.debug('fileinfo: %s', fileinfo)
         extrinsic_hashes = {'sha256': fileinfo['sha256']}
         logger.debug('extrinsic_hashes(%s): %s', filename, extrinsic_hashes)
         filepath, hashes = download(uri, dest=tmpdir, filename=filename,
                                     hashes=extrinsic_hashes)
         all_hashes[filename] = hashes
 
     logger.debug('all_hashes: %s', all_hashes)
     return all_hashes
 
 
 def dsc_information(package: Mapping[str, Any]) -> Tuple[str, str]:
     """Retrieve dsc information from a package.
 
     Args:
         package: Package metadata information
 
     Returns:
         Tuple of dsc file's uri, dsc's full disk path
 
     """
     dsc_name = None
     dsc_url = None
     for filename, fileinfo in package['files'].items():
         if filename.endswith('.dsc'):
             if dsc_name:
                 raise ValueError(
                     'Package %s_%s references several dsc files' %
                     (package['name'], package['version'])
                 )
             dsc_url = fileinfo['uri']
             dsc_name = filename
 
     return dsc_url, dsc_name
 
 
-def extract_package(package: Mapping[str, Any], tmpdir: str) -> str:
+def extract_package(dl_artifacts: List[Tuple[str, Dict]], dest: str) -> str:
     """Extract a Debian source package to a given directory.
 
     Note that after extraction the target directory will be the root of the
     extracted package, rather than containing it.
 
     Args:
-        package (dict): package information dictionary
-        tmpdir (str): directory where the package files are stored
+        package: package information dictionary
+        dest: directory where the package files are stored
 
     Returns:
         Package extraction directory
 
     """
-    _, dsc_name = dsc_information(package)
-    dsc_path = path.join(tmpdir, dsc_name)
-    destdir = path.join(tmpdir, 'extracted')
-    logfile = path.join(tmpdir, 'extract.log')
+    a_path = dl_artifacts[0][0]
+    logger.debug('dl_artifacts: %s', dl_artifacts)
+    for _, hashes in dl_artifacts:
+        logger.debug('hashes: %s', hashes)
+        filename = hashes['filename']
+        if filename.endswith('.dsc'):
+            dsc_name = filename
+            break
+
+    dsc_path = path.join(a_path, dsc_name)
+    destdir = path.join(dest, 'extracted')
+    logfile = path.join(dest, 'extract.log')
     logger.debug('extract Debian source package %s in %s' %
                  (dsc_path, destdir), extra={
                      'swh_type': 'deb_extract',
                      'swh_dsc': dsc_path,
                      'swh_destdir': destdir,
                  })
 
     cmd = ['dpkg-source',
            '--no-copy', '--no-check',
            '--ignore-bad-version',
            '-x', dsc_path,
            destdir]
 
     try:
         with open(logfile, 'w') as stdout:
             subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT)
     except subprocess.CalledProcessError as e:
         logdata = open(logfile, 'r').read()
         raise ValueError('dpkg-source exited with code %s: %s' %
                          (e.returncode, logdata)) from None
 
     return destdir
 
 
 def get_package_metadata(package: Mapping[str, Any], dsc_path: str,
                          extracted_path: str) -> Mapping[str, Any]:
     """Get the package metadata from the source package at dsc_path,
     extracted in extracted_path.
 
     Args:
         package: the package dict (with a dsc_path key)
         dsc_path: path to the package's dsc file
         extracted_path: the path where the package got extracted
 
     Returns:
         dict: a dictionary with the following keys:
 
         - history: list of (package_name, package_version) tuples parsed from
           the package changelog
 
     """
     with open(dsc_path, 'rb') as dsc:
         parsed_dsc = Dsc(dsc)
 
     # Parse the changelog to retrieve the rest of the package information
     changelog_path = path.join(extracted_path, 'debian/changelog')
     with open(changelog_path, 'rb') as changelog:
         try:
             parsed_changelog = Changelog(changelog)
         except UnicodeDecodeError:
             logger.warning('Unknown encoding for changelog %s,'
                            ' falling back to iso' %
                            changelog_path.decode('utf-8'), extra={
                                'swh_type': 'deb_changelog_encoding',
                                'swh_name': package['name'],
                                'swh_version': str(package['version']),
                                'swh_changelog': changelog_path.decode('utf-8'),
                            })
 
             # need to reset as Changelog scrolls to the end of the file
             changelog.seek(0)
             parsed_changelog = Changelog(changelog, encoding='iso-8859-15')
 
     package_info = {
         'name': package['name'],
         'version': str(package['version']),
         'changelog': {
             'person': uid_to_person(parsed_changelog.author),
             'date': parse_date(parsed_changelog.date).isoformat(),
             'history': [(block.package, str(block.version))
                         for block in parsed_changelog][1:],
         }
     }
 
     maintainers = [
         uid_to_person(parsed_dsc['Maintainer']),
     ]
     maintainers.extend(
         uid_to_person(person)
         for person in UPLOADERS_SPLIT.split(parsed_dsc.get('Uploaders', ''))
     )
     package_info['maintainers'] = maintainers
 
     return package_info
diff --git a/swh/loader/package/deposit.py b/swh/loader/package/deposit.py
index 1a74c0a..160819c 100644
--- a/swh/loader/package/deposit.py
+++ b/swh/loader/package/deposit.py
@@ -1,153 +1,154 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 from typing import Dict, Generator, Mapping, Sequence, Tuple
 
 from swh.model.hashutil import hash_to_hex
 from swh.loader.package.loader import PackageLoader
 from swh.deposit.client import PrivateApiDepositClient as ApiClient
 
 
 logger = logging.getLogger(__name__)
 
 
 class DepositLoader(PackageLoader):
     """Load pypi origin's artifact releases into swh archive.
 
     """
     visit_type = 'deposit'
 
     def __init__(self, url: str, deposit_id: str):
         """Constructor
 
         Args:
             url: Origin url to associate the artifacts/metadata to
             deposit_id: Deposit identity
 
         """
         super().__init__(url=url)
 
         # For now build back existing api urls
         # archive_url: Private api url to retrieve archive artifact
         self.archive_url = '/%s/raw/' % deposit_id
         # metadata_url: Private api url to retrieve the deposit metadata
         self.metadata_url = '/%s/meta/' % deposit_id
         # deposit_update_url: Private api to push pids and status update on the
         #     deposit id
         self.deposit_update_url = '/%s/update/' % deposit_id
         self.client = ApiClient()
         self._metadata = None
 
     @property
     def metadata(self):
         if self._metadata is None:
             self._metadata = self.client.metadata_get(self.metadata_url)
         return self._metadata
 
     def get_versions(self) -> Sequence[str]:
         # only 1 branch 'HEAD' with no alias since we only have 1 snapshot
         # branch
         return ['HEAD']
 
-    def get_artifacts(self, version: str) -> Generator[
+    def get_package_info(self, version: str) -> Generator[
             Tuple[Mapping[str, str], Dict], None, None]:
-        artifact_package_info = {
+        p_info = {
             'url': self.client.base_url + self.archive_url,
             'filename': 'archive.zip',
+            'raw': self.metadata,
         }
-        yield artifact_package_info, self.metadata
+        yield 'HEAD', p_info
 
     def build_revision(
-            self, a_metadata: Dict, i_metadata: Dict) -> Dict:
+            self, a_metadata: Dict, uncompressed_path: str) -> Dict:
         revision = a_metadata.pop('revision')
         metadata = {
             'extrinsic': {
                 'provider': '%s/%s' % (
                     self.client.base_url, self.metadata_url),
                 'when': self.visit_date.isoformat(),
                 'raw': a_metadata,
             },
         }
 
         # FIXME: the deposit no longer needs to build the revision
         revision['metadata'].update(metadata)
         revision['author'] = parse_author(revision['author'])
         revision['committer'] = parse_author(revision['committer'])
         revision['message'] = revision['message'].encode('utf-8')
         revision['type'] = 'tar'
 
         return revision
 
     def load(self) -> Dict:
         # Usual loading
         r = super().load()
         success = r['status'] != 'failed'
 
         if success:
             # Update archive with metadata information
             origin_metadata = self.metadata['origin_metadata']
 
             logger.debug('origin_metadata: %s', origin_metadata)
             tools = self.storage.tool_add([origin_metadata['tool']])
             logger.debug('tools: %s', tools)
             tool_id = tools[0]['id']
 
             provider = origin_metadata['provider']
             # FIXME: Shall we delete this info?
             provider_id = self.storage.metadata_provider_add(
                 provider['provider_name'],
                 provider['provider_type'],
                 provider['provider_url'],
                 metadata=None)
 
             metadata = origin_metadata['metadata']
             self.storage.origin_metadata_add(
                 self.url, self.visit_date, provider_id, tool_id, metadata)
 
         # Update deposit status
         try:
             if not success:
                 self.client.status_update(
                     self.deposit_update_url, status='failed')
                 return r
 
             snapshot_id = r['snapshot_id']
             branches = self.storage.snapshot_get(snapshot_id)['branches']
             logger.debug('branches: %s', branches)
             if not branches:
                 return r
             rev_id = branches[b'HEAD']['target']
 
             revision = next(self.storage.revision_get([rev_id]))
 
             # Retrieve the revision identifier
             dir_id = revision['directory']
 
             # update the deposit's status to success with its
             # revision-id and directory-id
             self.client.status_update(
                 self.deposit_update_url,
                 status='done',
                 revision_id=hash_to_hex(rev_id),
                 directory_id=hash_to_hex(dir_id),
                 origin_url=self.url)
         except Exception:
             logger.exception(
                 'Problem when trying to update the deposit\'s status')
             return {'status': 'failed'}
         return r
 
 
 def parse_author(author):
     """See prior fixme
 
     """
     return {
         'fullname': author['fullname'].encode('utf-8'),
         'name': author['name'].encode('utf-8'),
         'email': author['email'].encode('utf-8'),
     }
diff --git a/swh/loader/package/gnu.py b/swh/loader/package/gnu.py
index ce194c6..189041c 100644
--- a/swh/loader/package/gnu.py
+++ b/swh/loader/package/gnu.py
@@ -1,191 +1,195 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 import re
 
 from os import path
 
-from typing import Dict, Generator, Mapping, Optional, Sequence, Tuple
+from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple
 
 from swh.loader.package.loader import PackageLoader
 
 from swh.model.identifiers import normalize_timestamp
 
 
 logger = logging.getLogger(__name__)
 
 
 # to recognize existing naming pattern
 extensions = [
     'zip',
     'tar',
     'gz', 'tgz',
     'bz2', 'bzip2',
     'lzma', 'lz',
     'xz',
     'Z',
 ]
 
 version_keywords = [
     'cygwin_me',
     'w32', 'win32', 'nt', 'cygwin', 'mingw',
     'latest', 'alpha', 'beta',
     'release', 'stable',
     'hppa',
     'solaris', 'sunos', 'sun4u', 'sparc', 'sun',
     'aix', 'ibm', 'rs6000',
     'i386', 'i686',
     'linux', 'redhat', 'linuxlibc',
     'mips',
     'powerpc', 'macos', 'apple', 'darwin', 'macosx', 'powermacintosh',
     'unknown',
     'netbsd', 'freebsd',
     'sgi', 'irix',
 ]
 
 # Match a filename into components.
 #
 # We use Debian's release number heuristic: A release number starts
 # with a digit, and is followed by alphanumeric characters or any of
 # ., +, :, ~ and -
 #
 # We hardcode a list of possible extensions, as this release number
 # scheme would match them too... We match on any combination of those.
 #
 # Greedy matching is done right to left (we only match the extension
 # greedily with +, software_name and release_number are matched lazily
 # with +? and *?).
 
 pattern = r'''
 ^
 (?:
     # We have a software name and a release number, separated with a
     # -, _ or dot.
     (?P<software_name1>.+?[-_.])
     (?P<release_number>(%(vkeywords)s|[0-9][0-9a-zA-Z_.+:~-]*?)+)
 |
     # We couldn't match a release number, put everything in the
     # software name.
     (?P<software_name2>.+?)
 )
 (?P<extension>(?:\.(?:%(extensions)s))+)
 $
 ''' % {
     'extensions': '|'.join(extensions),
     'vkeywords': '|'.join('%s[-]?' % k for k in version_keywords),
 }
 
 
 def get_version(url: str) -> str:
     """Extract branch name from tarball url
 
     Args:
         url (str): Tarball URL
 
     Returns:
         byte: Branch name
 
     Example:
         For url = https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz
 
         >>> get_version(url)
         '0.2.0'
 
     """
     filename = path.split(url)[-1]
     m = re.match(pattern, filename,
                  flags=re.VERBOSE | re.IGNORECASE)
     if m:
         d = m.groupdict()
         if d['software_name1'] and d['release_number']:
             return d['release_number']
         if d['software_name2']:
             return d['software_name2']
 
     return ''
 
 
 class GNULoader(PackageLoader):
     visit_type = 'gnu'
     SWH_PERSON = {
         'name': b'Software Heritage',
         'fullname': b'Software Heritage',
         'email': b'robot@softwareheritage.org'
     }
     REVISION_MESSAGE = b'swh-loader-package: synthetic revision message'
 
     def __init__(self, package_url: str, tarballs: Sequence):
         """Loader constructor.
 
         For now, this is the lister's task output.
 
         Args:
             package_url: Origin url
 
             tarballs: List of dict with keys `date` (date) and `archive` (str)
             the url to retrieve one versioned archive
 
         """
         super().__init__(url=package_url)
         self.tarballs = list(sorted(tarballs, key=lambda v: v['time']))
 
     def get_versions(self) -> Sequence[str]:
         versions = []
         for archive in self.tarballs:
             v = get_version(archive['archive'])
             if v:
                 versions.append(v)
         return versions
 
     def get_default_release(self) -> str:
         # It's the most recent, so for this loader, it's the last one
         return get_version(self.tarballs[-1]['archive'])
 
-    def get_artifacts(self, version: str) -> Generator[
-            Tuple[Mapping[str, str], Dict], None, None]:
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Mapping[str, Any]], None, None]:
         for a_metadata in self.tarballs:
             url = a_metadata['archive']
-            artifact_version = get_version(url)
-            if version == artifact_version:
-                artifact_package_info = {
+            package_version = get_version(url)
+            if version == package_version:
+                p_info = {
                     'url': url,
-                    'filename': path.split(url)[-1]
+                    'filename': path.split(url)[-1],
+                    'raw': a_metadata,
                 }
-                yield artifact_package_info, a_metadata
+                # FIXME: this code assumes we have only 1 artifact per
+                # versioned package
+                yield 'releases/%s' % version, p_info
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         def pk(d):
             return [d.get(k) for k in ['time', 'archive', 'length']]
 
         artifact_pk = pk(artifact_metadata)
         for rev_id, known_artifact in known_artifacts.items():
             logging.debug('known_artifact: %s', known_artifact)
             known_pk = pk(known_artifact['extrinsic']['raw'])
             if artifact_pk == known_pk:
                 return rev_id
 
     def build_revision(
-            self, a_metadata: Dict, i_metadata: Dict) -> Dict:
+            self, a_metadata: Mapping[str, Any],
+            uncompressed_path: str) -> Dict:
         normalized_date = normalize_timestamp(int(a_metadata['time']))
         return {
             'type': 'tar',
             'message': self.REVISION_MESSAGE,
             'date': normalized_date,
             'author': self.SWH_PERSON,
             'committer': self.SWH_PERSON,
             'committer_date': normalized_date,
             'parents': [],
             'metadata': {
                 'intrinsic': {},
                 'extrinsic': {
                     'provider': self.url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             },
         }
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
index 061c8b3..2c5a083 100644
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -1,409 +1,384 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import logging
 import tempfile
 import os
 
-from typing import Dict, Generator, List, Mapping, Optional, Sequence, Tuple
+from typing import (
+    Any, Dict, Generator, List, Mapping, Optional, Sequence, Tuple
+)
 
 from swh.core.tarball import uncompress
 from swh.core.config import SWHConfig
 from swh.model.from_disk import Directory
 from swh.model.identifiers import (
     revision_identifier, snapshot_identifier, identifier_to_bytes
 )
 from swh.storage import get_storage
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 from swh.loader.core.converters import content_for_storage
 from swh.loader.package.utils import download
 
 
 logger = logging.getLogger(__name__)
 
 
 # Not implemented yet:
 # - clean up disk routines from previous killed workers (when OOMkilled)
 # -> separation of concern would like this to be abstracted from the code
 # -> experience tells us it's complicated to do as such (T903, T964, T982,
 #    etc...)
 #
 # - model: swh.model.merkle.from_disk should output swh.model.model.* objects
 #          to avoid this layer's conversion routine call
 # -> Take this up within swh.model's current implementation
 
 
 class PackageLoader:
     # Origin visit type (str) set by the loader
     visit_type = ''
 
     def __init__(self, url):
         """Loader's constructor. This raises exception if the minimal required
            configuration is missing (cf. fn:`check` method).
 
         Args:
             url (str): Origin url to load data from
 
         """
         # This expects to use the environment variable SWH_CONFIG_FILENAME
         self.config = SWHConfig.parse_config_file()
         self._check_configuration()
         self.storage = get_storage(**self.config['storage'])
         self.url = url
         self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
 
     def _check_configuration(self):
         """Checks the minimal configuration required is set for the loader.
 
         If some required configuration is missing, exception detailing the
         issue is raised.
 
         """
         if 'storage' not in self.config:
             raise ValueError(
                 'Misconfiguration, at least the storage key should be set')
 
     def get_versions(self) -> Sequence[str]:
         """Return the list of all published package versions.
 
         Returns:
             Sequence of published versions
 
         """
         return []
 
-    def get_artifacts(self, version: str) -> Generator[
-            Tuple[str, str, Dict], None, None]:
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Mapping[str, Any]], None, None]:
         """Given a release version of a package, retrieve the associated
-           artifact information for such version.
+           package information for such version.
 
         Args:
             version: Package version
 
         Returns:
-            (artifact filename, artifact uri, raw artifact metadata)
+            (branch name, package metadata)
 
         """
         yield from {}
 
     def build_revision(
             self, a_metadata: Dict, i_metadata: Dict) -> Dict:
         """Build the revision dict from the archive metadata (extrinsic
         artifact metadata) and the intrinsic metadata.
 
         Returns:
             SWH data dict
 
         """
         return {}
 
     def get_default_release(self) -> str:
         """Retrieve the latest release version
 
         Returns:
             Latest version
 
         """
         return ''
 
     def last_snapshot(self) -> Optional[Dict]:
         """Retrieve the last snapshot
 
         """
         visit = self.storage.origin_visit_get_latest(
             self.url, require_snapshot=True)
         if visit:
             return snapshot_get_all_branches(
                 self.storage, visit['snapshot']['id'])
 
     def known_artifacts(self, snapshot: Dict) -> [Dict]:
         """Retrieve the known releases/artifact for the origin.
 
         Args
             snapshot: snapshot for the visit
 
         Returns:
             Dict of keys revision id (bytes), values a metadata Dict.
 
         """
         if not snapshot or 'branches' not in snapshot:
             return {}
 
         # retrieve only revisions (e.g the alias we do not want here)
         revs = [rev['target']
                 for rev in snapshot['branches'].values()
                 if rev and rev['target_type'] == 'revision']
         known_revisions = self.storage.revision_get(revs)
 
         ret = {}
         for revision in known_revisions:
             if not revision:  # revision_get can return None
                 continue
             ret[revision['id']] = revision['metadata']
 
         return ret
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         """Resolve the revision from a snapshot and an artifact metadata dict.
 
         If the artifact has already been downloaded, this will return the
         existing revision targeting that uncompressed artifact directory.
         Otherwise, this returns None.
 
         Args:
             snapshot: Snapshot
             artifact_metadata: Information dict
 
         Returns:
             None or revision identifier
 
         """
         return None
 
-    def download_package(self, artifacts_package_info: Mapping[str, str],
-                         tmpdir: str) -> Tuple[str, Dict]:
-        """Download artifacts for a specific package. All downloads happen in the
-        the tmpdir folder.
+    def download_package(self, p_info: Mapping[str, Any],
+                         tmpdir: str) -> [Tuple[str, Dict]]:
+        """Download artifacts for a specific package. All downloads happen in
+        in the tmpdir folder.
 
         Default implementation expects the artifacts package info to be
         about one artifact per package.
 
         Note that most implementation have 1 artifact per package. But some
         implementation have multiple artifacts per package (debian), some have
         none, the package is the artifact (gnu).
 
         Args:
             artifacts_package_info: Information on the package artifacts to
-                download (uri, filename, etc...)
+                download (url, filename, etc...)
             tmpdir: Location to retrieve such artifacts
 
-        Note:
-
-        """
-        a_uri = artifacts_package_info['url']
-        filename = artifacts_package_info.get('filename')
-        return download(a_uri, dest=tmpdir, filename=filename)
-
-    def read_intrinsic_metadata(
-            self, a_metadata: Dict, a_uncompressed_path: str) -> Dict:
-        """Read intrinsic metadata from either the a_metadata or
-        the uncompressed path.
-
-        Depending on the implementations, some extracts directly from the
-        artifacts to ingest (pypi, npm...), some use api to access directly
-        their intrinsic metadata (debian exposes a dsc through uri) or some
-        have none (gnu).
+        Returns:
+            List of (path, computed hashes)
 
         """
-        return {}
+        a_uri = p_info['url']
+        filename = p_info.get('filename')
+        return [download(a_uri, dest=tmpdir, filename=filename)]
 
-    def uncompress(
-            self, a_path: str, tmpdir: str, a_metadata: Dict) -> str:
-        """Uncompress the artfifact(s) stored at a_path to tmpdir.
+    def uncompress(self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]],
+                   dest: str) -> str:
+        """Uncompress the artifact(s) in the destination folder dest.
 
-        Optionally, this could need to use the a_metadata dict for some more
+        Optionally, this could need to use the p_info dict for some more
         information (debian).
 
         """
-        uncompressed_path = os.path.join(tmpdir, 'src')
-        uncompress(a_path, dest=uncompressed_path)
+        uncompressed_path = os.path.join(dest, 'src')
+        for a_path, _ in dl_artifacts:
+            uncompress(a_path, dest=uncompressed_path)
         return uncompressed_path
 
     def load(self) -> Dict:
         """Load for a specific origin the associated contents.
 
         for each package version of the origin
 
         1. Fetch the files for one package version By default, this can be
            implemented as a simple HTTP request. Loaders with more specific
            requirements can override this, e.g.: the PyPI loader checks the
            integrity of the downloaded files; the Debian loader has to download
            and check several files for one package version.
 
         2. Extract the downloaded files By default, this would be a universal
            archive/tarball extraction.
 
            Loaders for specific formats can override this method (for instance,
            the Debian loader uses dpkg-source -x).
 
         3. Convert the extracted directory to a set of Software Heritage
            objects Using swh.model.from_disk.
 
         4. Extract the metadata from the unpacked directories This would only
            be applicable for "smart" loaders like npm (parsing the
            package.json), PyPI (parsing the PKG-INFO file) or Debian (parsing
            debian/changelog and debian/control).
 
            On "minimal-metadata" sources such as the GNU archive, the lister
            should provide the minimal set of metadata needed to populate the
            revision/release objects (authors, dates) as an argument to the
            task.
 
         5. Generate the revision/release objects for the given version. From
            the data generated at steps 3 and 4.
 
         end for each
 
         6. Generate and load the snapshot for the visit
 
         Using the revisions/releases collected at step 5., and the branch
         information from step 0., generate a snapshot and load it into the
         Software Heritage archive
 
         """
         status_load = 'uneventful'  # either: eventful, uneventful, failed
         status_visit = 'full'       # either: partial, full
         tmp_revisions = {}  # type: Dict[str, List]
         snapshot = None
 
         try:
             # Prepare origin and origin_visit
             origin = {'url': self.url}
             self.storage.origin_add([origin])
             visit_id = self.storage.origin_visit_add(
                 origin=self.url,
                 date=self.visit_date,
                 type=self.visit_type)['visit']
             last_snapshot = self.last_snapshot()
             logger.debug('last snapshot: %s', last_snapshot)
             known_artifacts = self.known_artifacts(last_snapshot)
             logger.debug('known artifacts: %s', known_artifacts)
 
             # Retrieve the default release (the "latest" one)
             default_release = self.get_default_release()
             logger.debug('default release: %s', default_release)
 
             for version in self.get_versions():  # for each
                 logger.debug('version: %s', version)
                 tmp_revisions[version] = []
-                # `a_` stands for `artifact(s)_`, `p_` stands for `package_`
-                for a_p_info, a_metadata in self.get_artifacts(version):
-                    logger.debug('a_p_info: %s', a_p_info)
-                    logger.debug('a_metadata: %s', a_metadata)
+                # `p_` stands for `package_`
+                for branch_name, p_info in self.get_package_info(version):
+                    logger.debug('package_info: %s', p_info)
                     revision_id = self.resolve_revision_from(
-                        known_artifacts, a_metadata)
+                        known_artifacts, p_info['raw'])
                     if revision_id is None:
                         with tempfile.TemporaryDirectory() as tmpdir:
                             try:
-                                # a_c_: archive_computed_
-                                a_path, a_c_metadata = self.download_package(
-                                    a_p_info, tmpdir)
+                                dl_artifacts = self.download_package(
+                                    p_info, tmpdir)
                             except Exception:
                                 logger.exception('Unable to retrieve %s',
-                                                 a_p_info['url'])
+                                                 p_info)
                                 status_visit = 'partial'
                                 continue
 
-                            logger.debug('archive_path: %s', a_path)
-                            logger.debug('archive_computed_metadata: %s',
-                                         a_c_metadata)
-
                             uncompressed_path = self.uncompress(
-                                a_path, tmpdir, a_metadata)
+                                dl_artifacts, dest=tmpdir)
                             logger.debug('uncompressed_path: %s',
                                          uncompressed_path)
 
                             directory = Directory.from_disk(
                                 path=uncompressed_path.encode('utf-8'),
                                 data=True)  # noqa
                             # FIXME: Try not to load the full raw content in
                             # memory
                             objects = directory.collect()
 
                             contents = objects['content'].values()
                             logger.debug('Number of contents: %s',
                                          len(contents))
 
                             self.storage.content_add(
                                 map(content_for_storage, contents))
 
                             status_load = 'eventful'
                             directories = objects['directory'].values()
 
                             logger.debug('Number of directories: %s',
                                          len(directories))
 
                             self.storage.directory_add(directories)
 
-                            i_metadata = self.read_intrinsic_metadata(
-                                a_metadata, uncompressed_path)
-
                             # FIXME: This should be release. cf. D409
                             revision = self.build_revision(
-                                a_metadata, i_metadata)
+                                p_info['raw'], uncompressed_path)
                             revision.update({
                                 'synthetic': True,
                                 'directory': directory.hash,
                             })
 
                         revision['metadata'].update({
-                            'original_artifact': a_c_metadata,
+                            'original_artifact': [
+                                hashes for _, hashes in dl_artifacts
+                            ],
                         })
 
                         revision['id'] = revision_id = identifier_to_bytes(
                             revision_identifier(revision))
 
                         logger.debug('Revision: %s', revision)
 
                         self.storage.revision_add([revision])
 
-                    tmp_revisions[version].append(
-                        (a_p_info['filename'], revision_id))
+                    tmp_revisions[version].append((branch_name, revision_id))
 
             # Build and load the snapshot
             branches = {}
-            for version, v_branches in tmp_revisions.items():
-                if len(v_branches) == 1:
-                    branch_name = (
-                        version if version == 'HEAD'
-                        else 'releases/%s' % version).encode('utf-8')
-                    if version == default_release:
+            for version, branch_name_revisions in tmp_revisions.items():
+                if len(branch_name_revisions) == 1:
+                    branch_name, target = branch_name_revisions[0]
+                    if branch_name != 'HEAD':
                         branches[b'HEAD'] = {
                             'target_type': 'alias',
-                            'target': branch_name,
+                            'target': branch_name.encode('utf-8'),
                         }
 
+                for branch_name, target in branch_name_revisions:
+                    branch_name = branch_name.encode('utf-8')
                     branches[branch_name] = {
                         'target_type': 'revision',
-                        'target': v_branches[0][1],
+                        'target': target,
                     }
-                else:
-                    for filename, target in v_branches:
-                        branch_name = ('releases/%s/%s' % (
-                            version, filename)).encode('utf-8')
-                        branches[branch_name] = {
-                            'target_type': 'revision',
-                            'target': target,
-                        }
 
             snapshot = {
                 'branches': branches
             }
             logger.debug('snapshot: %s', snapshot)
 
             snapshot['id'] = identifier_to_bytes(
                 snapshot_identifier(snapshot))
 
             logger.debug('snapshot: %s', snapshot)
             self.storage.snapshot_add([snapshot])
             if hasattr(self.storage, 'flush'):
                 self.storage.flush()
         except Exception:
             logger.exception('Fail to load %s' % self.url)
             status_visit = 'partial'
             status_load = 'failed'
         finally:
             self.storage.origin_visit_update(
                 origin=self.url, visit_id=visit_id, status=status_visit,
                 snapshot=snapshot)
         result = {
             'status': status_load,
         }
         if snapshot:
             result['snapshot_id'] = snapshot['id']
         return result
diff --git a/swh/loader/package/npm.py b/swh/loader/package/npm.py
index ae1fa3c..09cdfdd 100644
--- a/swh/loader/package/npm.py
+++ b/swh/loader/package/npm.py
@@ -1,298 +1,295 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 import os
 import re
 
 from codecs import BOM_UTF8
-from typing import Dict, Generator, Mapping, Sequence, Tuple, Optional
+from typing import Any, Dict, Generator, Mapping, Sequence, Tuple, Optional
 
 import chardet
 import iso8601
 
 from swh.model.identifiers import normalize_timestamp
 from swh.loader.package.loader import PackageLoader
 from swh.loader.package.utils import api_info
 
 
 logger = logging.getLogger(__name__)
 
 
 _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
 
 # https://github.com/jonschlinkert/author-regex
 _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
 
 
 def parse_npm_package_author(author_str):
     """
     Parse npm package author string.
 
     It works with a flexible range of formats, as detailed below::
 
         name
         name <email> (url)
         name <email>(url)
         name<email> (url)
         name<email>(url)
         name (url) <email>
         name (url)<email>
         name(url) <email>
         name(url)<email>
         name (url)
         name(url)
         name <email>
         name<email>
         <email> (url)
         <email>(url)
         (url) <email>
         (url)<email>
         <email>
         (url)
 
     Args:
         author_str (str): input author string
 
     Returns:
         dict: A dict that may contain the following keys:
             * name
             * email
             * url
 
     """
     author = {}
     matches = re.findall(_author_regexp,
                          author_str.replace('<>', '').replace('()', ''),
                          re.M)
     for match in matches:
         if match[0].strip():
             author['name'] = match[0].strip()
         if match[1].strip():
             author['email'] = match[1].strip()
         if match[2].strip():
             author['url'] = match[2].strip()
     return author
 
 
 def extract_npm_package_author(package_json):
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json (dict): Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         dict: A dict with the following keys:
             * fullname
             * name
             * email
 
     """
 
     def _author_str(author_data):
         if type(author_data) is dict:
             author_str = ''
             if 'name' in author_data:
                 author_str += author_data['name']
             if 'email' in author_data:
                 author_str += ' <%s>' % author_data['email']
             return author_str
         elif type(author_data) is list:
             return _author_str(author_data[0]) if len(author_data) > 0 else ''
         else:
             return author_data
 
     author_data = {}
     for author_key in ('author', 'authors'):
         if author_key in package_json:
             author_str = _author_str(package_json[author_key])
             author_data = parse_npm_package_author(author_str)
 
     name = author_data.get('name')
     email = author_data.get('email')
 
     fullname = None
 
     if name and email:
         fullname = '%s <%s>' % (name, email)
     elif name:
         fullname = name
 
     if not fullname:
         return _EMPTY_AUTHOR
 
     if fullname:
         fullname = fullname.encode('utf-8')
 
     if name:
         name = name.encode('utf-8')
 
     if email:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
 
 
 def _lstrip_bom(s, bom=BOM_UTF8):
     if s.startswith(bom):
         return s[len(bom):]
     else:
         return s
 
 
 def load_json(json_bytes):
     """
     Try to load JSON from bytes and return a dictionary.
 
     First try to decode from utf-8. If the decoding failed,
     try to detect the encoding and decode again with replace
     error handling.
 
     If JSON is malformed, an empty dictionary will be returned.
 
     Args:
         json_bytes (bytes): binary content of a JSON file
 
     Returns:
         dict: JSON data loaded in a dictionary
     """
     json_data = {}
     try:
         json_str = _lstrip_bom(json_bytes).decode('utf-8')
     except UnicodeDecodeError:
         encoding = chardet.detect(json_bytes)['encoding']
         if encoding:
             json_str = json_bytes.decode(encoding, 'replace')
     try:
         json_data = json.loads(json_str)
     except json.decoder.JSONDecodeError:
         pass
     return json_data
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from npm.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) == 0:
         return {}
     project_dirname = lst[0]
     package_json_path = os.path.join(dir_path, project_dirname, 'package.json')
     if not os.path.exists(package_json_path):
         return {}
     with open(package_json_path, 'rb') as package_json_file:
         package_json_bytes = package_json_file.read()
         return load_json(package_json_bytes)
 
 
 class NpmLoader(PackageLoader):
     visit_type = 'npm'
 
     def __init__(self, package_name, package_url, package_metadata_url):
         super().__init__(url=package_url)
         self.provider_url = package_metadata_url
 
         self._info = None
         self._versions = None
 
         # if package_url is None:
         #     package_url = 'https://www.npmjs.com/package/%s' % package_name
         # if package_metadata_url is None:
         #     package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
         #                             quote(package_name, safe='')
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from npm registry)
 
         """
         if not self._info:
             self._info = api_info(self.provider_url)
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return sorted(self.info['versions'].keys())
 
     def get_default_release(self) -> str:
         return self.info['dist-tags'].get('latest', '')
 
-    def get_artifacts(self, version: str) -> Generator[
-            Tuple[Mapping[str, str], Dict], None, None]:
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Mapping[str, Any]], None, None]:
         meta = self.info['versions'][version]
         url = meta['dist']['tarball']
-        artifact_package_info = {
+        p_info = {
             'url': url,
             'filename': os.path.basename(url),
+            'raw': meta,
         }
-        yield artifact_package_info, meta
+        yield 'releases/%s' % version, p_info
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         shasum = artifact_metadata['dist']['shasum']
         for rev_id, known_artifact in known_artifacts.items():
-            original_artifact = known_artifact['original_artifact']
+            original_artifact = known_artifact['original_artifact'][0]
             if shasum == original_artifact['checksums']['sha1']:
                 return rev_id
 
-    def read_intrinsic_metadata(self, a_metadata: Dict,
-                                a_uncompressed_path: str) -> Dict:
-        return extract_intrinsic_metadata(a_uncompressed_path)
-
     def build_revision(
-            self, a_metadata: Dict, i_metadata: Dict) -> Dict:
-
+            self, a_metadata: Dict, uncompressed_path: str) -> Dict:
+        i_metadata = extract_intrinsic_metadata(uncompressed_path)
         # from intrinsic metadata
         author = extract_npm_package_author(i_metadata)
         # extrinsic metadata
         version = i_metadata['version']
         date = self.info['time'][version]
         date = iso8601.parse_date(date)
         date = normalize_timestamp(int(date.timestamp()))
         message = version.encode('ascii')
 
         return {
             'type': 'tar',
             'message': message,
             'author': author,
             'date': date,
             'committer': author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic': {
                     'tool': 'package.json',
                     'raw': i_metadata,
                 },
                 'extrinsic': {
                     'provider': self.provider_url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             },
         }
diff --git a/swh/loader/package/pypi.py b/swh/loader/package/pypi.py
index 156ab2f..820b79c 100644
--- a/swh/loader/package/pypi.py
+++ b/swh/loader/package/pypi.py
@@ -1,186 +1,193 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
-from typing import Generator, Dict, Mapping, Optional, Sequence, Tuple
+from typing import Any, Dict, Generator, Mapping, Optional, Sequence, Tuple
 from urllib.parse import urlparse
 from pkginfo import UnpackedSDist
 
 import iso8601
 
 from swh.model.identifiers import normalize_timestamp
 from swh.loader.package.loader import PackageLoader
 from swh.loader.package.utils import api_info
 
 
 def pypi_api_url(url: str) -> str:
     """Compute api url from a project url
 
     Args:
         url (str): PyPI instance's url (e.g: https://pypi.org/project/requests)
         This deals with correctly transforming the project's api url (e.g
         https://pypi.org/pypi/requests/json)
 
     Returns:
         api url
 
     """
     p_url = urlparse(url)
     project_name = p_url.path.split('/')[-1]
     url = '%s://%s/pypi/%s/json' % (p_url.scheme, p_url.netloc, project_name)
     return url
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from pypi.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) != 1:
         return {}
     project_dirname = lst[0]
     pkginfo_path = os.path.join(dir_path, project_dirname, 'PKG-INFO')
     if not os.path.exists(pkginfo_path):
         return {}
     pkginfo = UnpackedSDist(pkginfo_path)
     raw = pkginfo.__dict__
     raw.pop('filename')  # this gets added with the ondisk location
     return raw
 
 
 def author(data: Dict) -> Dict:
     """Given a dict of project/release artifact information (coming from
        PyPI), returns an author subset.
 
     Args:
         data (dict): Representing either artifact information or
                      release information.
 
     Returns:
         swh-model dict representing a person.
 
     """
     name = data.get('author')
     email = data.get('author_email')
 
     if email:
         fullname = '%s <%s>' % (name, email)
     else:
         fullname = name
 
     if not fullname:
         return {'fullname': b'', 'name': None, 'email': None}
 
     fullname = fullname.encode('utf-8')
 
     if name is not None:
         name = name.encode('utf-8')
 
     if email is not None:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
 
 
 class PyPILoader(PackageLoader):
     """Load pypi origin's artifact releases into swh archive.
 
     """
     visit_type = 'pypi'
 
     def __init__(self, url):
         super().__init__(url=url)
         self._info = None
         self.provider_url = pypi_api_url(self.url)
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from pypi registry)
 
         """
         if not self._info:
             self._info = api_info(self.provider_url)
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return self.info['releases'].keys()
 
     def get_default_release(self) -> str:
         return self.info['info']['version']
 
-    def get_artifacts(self, version: str) -> Generator[
-            Tuple[Mapping[str, str], Dict], None, None]:
+    def get_package_info(self, version: str) -> Generator[
+            Tuple[str, Mapping[str, Any]], None, None]:
+        res = []
         for meta in self.info['releases'][version]:
-            artifact_package_info = {
+            filename = meta['filename']
+            p_info = {
                 'url': meta['url'],
-                'filename': meta['filename'],
+                'filename': filename,
+                'raw': meta,
             }
-            yield artifact_package_info, meta
+            res.append((version, p_info))
+
+        if len(res) == 1:
+            version, p_info = res[0]
+            yield 'releases/%s' % version, p_info
+        else:
+            for version, p_info in res:
+                yield 'releases/%s/%s' % (version, p_info['filename']), p_info
 
     def resolve_revision_from(
             self, known_artifacts: Dict, artifact_metadata: Dict) \
             -> Optional[bytes]:
         sha256 = artifact_metadata['digests']['sha256']
         for rev_id, known_artifact in known_artifacts.items():
-            original_artifact = known_artifact['original_artifact']
-            if sha256 == original_artifact['checksums']['sha256']:
-                return rev_id
-
-    def read_intrinsic_metadata(self, a_metadata: Dict,
-                                a_uncompressed_path: str) -> Dict:
-        return extract_intrinsic_metadata(a_uncompressed_path)
+            for original_artifact in known_artifact['original_artifact']:
+                if sha256 == original_artifact['checksums']['sha256']:
+                    return rev_id
 
     def build_revision(
-            self, a_metadata: Dict, i_metadata: Dict) -> Dict:
+            self, a_metadata: Dict, uncompressed_path: str) -> Dict:
+        i_metadata = extract_intrinsic_metadata(uncompressed_path)
 
         # from intrinsic metadata
         name = i_metadata['version']
         _author = author(i_metadata)
 
         # from extrinsic metadata
         message = a_metadata.get('comment_text', '')
         message = '%s: %s' % (name, message) if message else name
         date = normalize_timestamp(
             int(iso8601.parse_date(a_metadata['upload_time']).timestamp()))
 
         return {
             'type': 'tar',
             'message': message.encode('utf-8'),
             'author': _author,
             'date': date,
             'committer': _author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic': {
                     'tool': 'PKG-INFO',
                     'raw': i_metadata,
                 },
                 'extrinsic': {
                     'provider': self.provider_url,
                     'when': self.visit_date.isoformat(),
                     'raw': a_metadata,
                 },
             }
         }
diff --git a/swh/loader/package/tests/test_debian.py b/swh/loader/package/tests/test_debian.py
index 85663ac..b453c28 100644
--- a/swh/loader/package/tests/test_debian.py
+++ b/swh/loader/package/tests/test_debian.py
@@ -1,316 +1,318 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import logging
 import pytest
 
 from os import path
 
 from swh.loader.package.debian import (
     DebianLoader, download_package, dsc_information, uid_to_person,
     prepare_person, get_package_metadata, extract_package
 )
 from swh.loader.package.tests.common import check_snapshot
 
 
 logger = logging.getLogger(__name__)
 
 
 PACKAGE_FILES = {
     'files': {
         'cicero_0.7.2-3.diff.gz': {
             'md5sum': 'a93661b6a48db48d59ba7d26796fc9ce',
             'name': 'cicero_0.7.2-3.diff.gz',
             'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c',  # noqa
             'size': 3964,
             'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2-3.diff.gz'  # noqa
         },
         'cicero_0.7.2-3.dsc': {
             'md5sum': 'd5dac83eb9cfc9bb52a15eb618b4670a',
             'name': 'cicero_0.7.2-3.dsc',
             'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03',  # noqa
             'size': 1864,
             'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2-3.dsc'},  # noqa
         'cicero_0.7.2.orig.tar.gz': {
             'md5sum': '4353dede07c5728319ba7f5595a7230a',
             'name': 'cicero_0.7.2.orig.tar.gz',
             'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786',  # noqa
             'size': 96527,
             'uri': 'http://deb.debian.org/debian//pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz'  # noqa
         }
     },
     'id': 23,
     'name': 'cicero',
     'revision_id': None,
     'version': '0.7.2-3'
 }
 
 
 PACKAGE_PER_VERSION = {
     'stretch/contrib/0.7.2-3': PACKAGE_FILES
 }
 
 
 def test_debian_first_visit(
         swh_config, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
 
     """
     loader = DebianLoader(
         url='deb://Debian/packages/cicero',
         date='2019-10-12T05:58:09.165557+00:00',
         packages=PACKAGE_PER_VERSION)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 42,
         'directory': 2,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 1,  # all artifacts under 1 revision
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_snapshot = {
         'id': 'a59ec49a01ff329dcbbc63fd36a5654143aef240',
         'branches': {
             'HEAD': {
                 'target_type': 'alias',
                 'target': 'releases/stretch/contrib/0.7.2-3'
             },
             'releases/stretch/contrib/0.7.2-3': {
                 'target_type': 'revision',
                 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07',
             }
         },
     }  # different than the previous loader as no release is done
 
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_debian_first_visit_then_another_visit(
         swh_config, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
 
     """
     url = 'deb://Debian/packages/cicero'
     loader = DebianLoader(
         url=url,
         date='2019-10-12T05:58:09.165557+00:00',
         packages=PACKAGE_PER_VERSION)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 42,
         'directory': 2,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 1,  # all artifacts under 1 revision
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_snapshot = {
         'id': 'a59ec49a01ff329dcbbc63fd36a5654143aef240',
         'branches': {
             'HEAD': {
                 'target_type': 'alias',
                 'target': 'releases/stretch/contrib/0.7.2-3'
             },
             'releases/stretch/contrib/0.7.2-3': {
                 'target_type': 'revision',
                 'target': '2807f5b3f84368b4889a9ae827fe85854ffecf07',
             }
         },
     }  # different than the previous loader as no release is done
 
     check_snapshot(expected_snapshot, loader.storage)
 
     # No change in between load
     actual_load_status2 = loader.load()
     assert actual_load_status2['status'] == 'uneventful'
     origin_visit2 = list(loader.storage.origin_visit_get(url))
     assert origin_visit2[-1]['status'] == 'full'
 
     stats2 = loader.storage.stat_counters()
     assert {
         'content': 42 + 0,
         'directory': 2 + 0,
         'origin': 1,
         'origin_visit': 1 + 1,  # a new visit occurred
         'person': 1,
         'release': 0,
         'revision': 1,
         'skipped_content': 0,
         'snapshot': 1,  # same snapshot across 2 visits
     } == stats2
 
     urls = [
         m.url for m in requests_mock_datadir.request_history
         if m.url.startswith('http://deb.debian.org')
     ]
     # visited each package artifact twice across 2 visits
     assert len(urls) == len(set(urls))
 
 
 def test_uid_to_person():
     uid = 'Someone Name <someone@orga.org>'
     actual_person = uid_to_person(uid)
 
     assert actual_person == {
         'name': 'Someone Name',
         'email': 'someone@orga.org',
         'fullname': uid,
     }
 
 
 def test_prepare_person():
     actual_author = prepare_person({
         'name': 'Someone Name',
         'email': 'someone@orga.org',
         'fullname': 'Someone Name <someone@orga.org>',
     })
 
     assert actual_author == {
         'name': b'Someone Name',
         'email': b'someone@orga.org',
         'fullname': b'Someone Name <someone@orga.org>',
     }
 
 
 def test_download_package(datadir, tmpdir, requests_mock_datadir):
     tmpdir = str(tmpdir)  # py3.5 work around (LocalPath issue)
     all_hashes = download_package(PACKAGE_FILES, tmpdir)
     assert all_hashes == {
         'cicero_0.7.2-3.diff.gz': {
             'checksums': {
                 'blake2s256': '08b1c438e70d2474bab843d826515147fa4a817f8c4baaf3ddfbeb5132183f21',  # noqa
                 'sha1': '0815282053f21601b0ec4adf7a8fe47eace3c0bc',
                 'sha1_git': '834ac91da3a9da8f23f47004bb456dd5bd16fe49',
                 'sha256': 'f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c'  # noqa
             },
             'filename': 'cicero_0.7.2-3.diff.gz',
             'length': 3964},
         'cicero_0.7.2-3.dsc': {
             'checksums': {
                 'blake2s256': '8c002bead3e35818eaa9d00826f3d141345707c58fb073beaa8abecf4bde45d2',  # noqa
                 'sha1': 'abbec4e8efbbc80278236e1dd136831eac08accd',
                 'sha1_git': '1f94b2086fa1142c2df6b94092f5c5fa11093a8e',
                 'sha256': '35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03'  # noqa
             },
             'filename': 'cicero_0.7.2-3.dsc',
             'length': 1864},
         'cicero_0.7.2.orig.tar.gz': {
             'checksums': {
                 'blake2s256': '9809aa8d2e2dad7f34cef72883db42b0456ab7c8f1418a636eebd30ab71a15a6',  # noqa
                 'sha1': 'a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43',
                 'sha1_git': 'aa0a38978dce86d531b5b0299b4a616b95c64c74',
                 'sha256': '63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786'  # noqa
             },
             'filename': 'cicero_0.7.2.orig.tar.gz',
             'length': 96527
         }
     }
 
 
 def test_dsc_information_ok():
     fname = 'cicero_0.7.2-3.dsc'
     dsc_url, dsc_name = dsc_information(PACKAGE_FILES)
 
     assert dsc_url == PACKAGE_FILES['files'][fname]['uri']
     assert dsc_name == PACKAGE_FILES['files'][fname]['name']
 
 
 def test_dsc_information_not_found():
     fname = 'cicero_0.7.2-3.dsc'
     package_files = copy.deepcopy(PACKAGE_FILES)
     package_files['files'].pop(fname)
 
     dsc_url, dsc_name = dsc_information(package_files)
 
     assert dsc_url is None
     assert dsc_name is None
 
 
 def test_dsc_information_too_many_dsc_entries():
     # craft an extra dsc file
     fname = 'cicero_0.7.2-3.dsc'
     package_files = copy.deepcopy(PACKAGE_FILES)
     data = package_files['files'][fname]
     fname2 = fname.replace('cicero', 'ciceroo')
     package_files['files'][fname2] = data
 
     with pytest.raises(
             ValueError, match='Package %s_%s references several dsc' % (
                 package_files['name'], package_files['version'])):
         dsc_information(package_files)
 
 
 def test_get_package_metadata(requests_mock_datadir, datadir, tmp_path):
     tmp_path = str(tmp_path)  # py3.5 compat.
     package = PACKAGE_FILES
 
     logger.debug('package: %s', package)
 
     # download the packages
-    download_package(package, tmp_path)
+    all_hashes = download_package(package, tmp_path)
 
     # Retrieve information from package
     _, dsc_name = dsc_information(package)
 
+    dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()]
+
     # Extract information from package
-    extracted_path = extract_package(package, tmp_path)
+    extracted_path = extract_package(dl_artifacts, tmp_path)
 
     # Retrieve information on package
     dsc_path = path.join(path.dirname(extracted_path), dsc_name)
     actual_package_info = get_package_metadata(
         package, dsc_path, extracted_path)
 
     logger.debug('actual_package_info: %s', actual_package_info)
 
     assert actual_package_info == {
         'changelog': {
             'date': '2014-10-19T16:52:35+02:00',
             'history': [
                 ('cicero', '0.7.2-2'),
                 ('cicero', '0.7.2-1'),
                 ('cicero', '0.7-1')
             ],
             'person': {
                 'email': 'sthibault@debian.org',
                 'fullname': 'Samuel Thibault <sthibault@debian.org>',
                 'name': 'Samuel Thibault'
             }
         },
         'maintainers': [
             {
                 'email': 'debian-accessibility@lists.debian.org',
                 'fullname': 'Debian Accessibility Team '
                 '<debian-accessibility@lists.debian.org>',
                 'name': 'Debian Accessibility Team'
             },
             {
                 'email': 'sthibault@debian.org',
                 'fullname': 'Samuel Thibault <sthibault@debian.org>',
                 'name': 'Samuel Thibault'
             }
         ],
         'name': 'cicero',
         'version': '0.7.2-3'
     }
diff --git a/swh/loader/package/tests/test_deposit.py b/swh/loader/package/tests/test_deposit.py
index 8cc5723..2e999b0 100644
--- a/swh/loader/package/tests/test_deposit.py
+++ b/swh/loader/package/tests/test_deposit.py
@@ -1,199 +1,204 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 
 
 from swh.model.hashutil import hash_to_bytes
 from swh.loader.package.deposit import DepositLoader
 
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths
 )
 
 from swh.core.pytest_plugin import requests_mock_datadir_factory
 
 
 def test_deposit_init_ok(swh_config):
     url = 'some-url'
     deposit_id = 999
     loader = DepositLoader(url, deposit_id)  # Something that does not exist
 
     assert loader.url == url
     assert loader.archive_url == '/%s/raw/' % deposit_id
     assert loader.metadata_url == '/%s/meta/' % deposit_id
     assert loader.deposit_update_url == '/%s/update/' % deposit_id
     assert loader.client is not None
 
 
 def test_deposit_loading_failure_to_fetch_metadata(swh_config):
     """Error during fetching artifact ends us with failed/partial visit
 
     """
     # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
     url = 'some-url'
     unknown_deposit_id = 666
     loader = DepositLoader(url, unknown_deposit_id)  # does not exist
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'failed'
 
     stats = loader.storage.stat_counters()
 
     assert {
         'content': 0,
         'directory': 0,
         'origin': 1,
         'origin_visit': 1,
         'person': 0,
         'release': 0,
         'revision': 0,
         'skipped_content': 0,
         'snapshot': 0,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
     'https://deposit.softwareheritage.org/1/private/666/raw/',
 ])
 
 
 def test_deposit_loading_failure_to_retrieve_1_artifact(
         swh_config, requests_mock_datadir_missing_one):
     """Deposit with missing artifact ends up with an uneventful/partial visit
 
     """
     # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
     url = 'some-url-2'
     deposit_id = 666
     loader = DepositLoader(url, deposit_id)
 
     assert loader.archive_url
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'uneventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 0,
         'directory': 0,
         'origin': 1,
         'origin_visit': 1,
         'person': 0,
         'release': 0,
         'revision': 0,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 def test_revision_metadata_structure(swh_config, requests_mock_datadir):
     # do not care for deposit update query
     requests_mock_datadir.put(re.compile('https'))
 
     url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
     deposit_id = 666
     loader = DepositLoader(url, deposit_id)
 
     assert loader.archive_url
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_revision_id = hash_to_bytes(
         '9471c606239bccb1f269564c9ea114e1eeab9eb4')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
-        ('original_artifact.filename', str),
-        ('original_artifact.length', int),
-        ('original_artifact.checksums', dict),
+        ('original_artifact', list),
     ])
 
+    for original_artifact in revision['metadata']['original_artifact']:
+        check_metadata_paths(original_artifact, paths=[
+            ('filename', str),
+            ('length', int),
+            ('checksums', dict),
+        ])
+
 
 def test_deposit_loading_ok(swh_config, requests_mock_datadir):
     requests_mock_datadir.put(re.compile('https'))  # do not care for put
 
     url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
     deposit_id = 666
     loader = DepositLoader(url, deposit_id)
 
     assert loader.archive_url
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 303,
         'directory': 12,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 1,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
     expected_branches = {
         'HEAD': {
             'target': '9471c606239bccb1f269564c9ea114e1eeab9eb4',
             'target_type': 'revision',
         },
     }
 
     expected_snapshot = {
         'id': '453f455d0efb69586143cd6b6e5897f9906b53a7',
         'branches': expected_branches,
     }
     check_snapshot(expected_snapshot, storage=loader.storage)
 
     # check metadata
 
     tool = {
         "name": "swh-deposit",
         "version": "0.0.1",
         "configuration": {
             "sword_version": "2",
         }
     }
 
     tool = loader.storage.tool_get(tool)
     assert tool is not None
     assert tool['id'] is not None
 
     provider = {
         "provider_name": "hal",
         "provider_type": "deposit_client",
         "provider_url": "https://hal-test.archives-ouvertes.fr/",
         "metadata": None,
     }
 
     provider = loader.storage.metadata_provider_get_by(provider)
     assert provider is not None
     assert provider['id'] is not None
 
     metadata = loader.storage.origin_metadata_get_by(
         url, provider_type='deposit_client')
     assert metadata is not None
     assert isinstance(metadata, list)
     assert len(metadata) == 1
     metadata0 = metadata[0]
 
     assert metadata0['provider_id'] == provider['id']
     assert metadata0['provider_type'] == 'deposit_client'
     assert metadata0['tool_id'] == tool['id']
diff --git a/swh/loader/package/tests/test_gnu.py b/swh/loader/package/tests/test_gnu.py
index ea70a83..3be6610 100644
--- a/swh/loader/package/tests/test_gnu.py
+++ b/swh/loader/package/tests/test_gnu.py
@@ -1,349 +1,354 @@
 # Copyright (C) 2019 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import re
 
 from swh.model.hashutil import hash_to_bytes
 
 from swh.loader.package.gnu import GNULoader, get_version
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths
 )
 
 
 def test_get_version():
     """From url to branch name should yield something relevant
 
     """
     for url, expected_branchname in [
             ('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
             ('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
             ('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
             ('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
             ('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
             ('https://ftp.org/gnu/aris-w32.zip', 'w32'),
             ('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
             ('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
             ('https://ftp.org/gnu/crypto-build-demo.tar.gz',
              'crypto-build-demo'),
             ('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
              'clue+clio+xit.clisp'),
             ('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
              'clue+clio.for-pcl'),
             ('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
              'hppa2.0-hp-hpux10.20'),
             ('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
             ('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
             ('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
             ('clisp-powerpc-unknown-linuxlibc6.tar.gz',
              'powerpc-unknown-linuxlibc6'),
 
             ('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
             ('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
             ('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
             ('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
              'sparc-sun-sunos4.1.3_U1'),
             ('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
              '2.25.1-powerpc-apple-MacOSX'),
             ('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
              '2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
             ('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
              '2.27-i686-unknown-Linux-2.2.19'),
             ('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
              '2.28-i386-i386-freebsd-4.3-RELEASE'),
             ('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
              '2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
             ('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
              '2.29-i386-i386-freebsd-4.6-STABLE'),
             ('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
              '2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
             ('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
              '2.5.3-ansi-japi-xdr.20030701_mingw32'),
             ('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
             ('sather-logo_images.tar.gz', 'sather-logo_images'),
             ('sather-specification-000328.html.tar.gz', '000328.html')
 
     ]:
         actual_branchname = get_version(url)
 
         assert actual_branchname == expected_branchname
 
 
 _expected_new_contents_first_visit = [
     'e9258d81faf5881a2f96a77ba609396f82cb97ad',
     '1170cf105b04b7e2822a0e09d2acf71da7b9a130',
     'fbd27c3f41f2668624ffc80b7ba5db9b92ff27ac',
     '0057bec9b5422aff9256af240b177ac0e3ac2608',
     '2b8d0d0b43a1078fc708930c8ddc2956a86c566e',
     '27de3b3bc6545d2a797aeeb4657c0e215a0c2e55',
     '2e6db43f5cd764e677f416ff0d0c78c7a82ef19b',
     'ae9be03bd2a06ed8f4f118d3fe76330bb1d77f62',
     'edeb33282b2bffa0e608e9d2fd960fd08093c0ea',
     'd64e64d4c73679323f8d4cde2643331ba6c20af9',
     '7a756602914be889c0a2d3952c710144b3e64cb0',
     '84fb589b554fcb7f32b806951dcf19518d67b08f',
     '8624bcdae55baeef00cd11d5dfcfa60f68710a02',
     'e08441aeab02704cfbd435d6445f7c072f8f524e',
     'f67935bc3a83a67259cda4b2d43373bd56703844',
     '809788434b433eb2e3cfabd5d591c9a659d5e3d8',
     '7d7c6c8c5ebaeff879f61f37083a3854184f6c41',
     'b99fec102eb24bffd53ab61fc30d59e810f116a2',
     '7d149b28eaa228b3871c91f0d5a95a2fa7cb0c68',
     'f0c97052e567948adf03e641301e9983c478ccff',
     '7fb724242e2b62b85ca64190c31dcae5303e19b3',
     '4f9709e64a9134fe8aefb36fd827b84d8b617ab5',
     '7350628ccf194c2c3afba4ac588c33e3f3ac778d',
     '0bb892d9391aa706dc2c3b1906567df43cbe06a2',
     '49d4c0ce1a16601f1e265d446b6c5ea6b512f27c',
     '6b5cc594ac466351450f7f64a0b79fdaf4435ad3',
     '3046e5d1f70297e2a507b98224b6222c9688d610',
     '1572607d456d7f633bc6065a2b3048496d679a31',
 ]
 
 _expected_new_directories_first_visit = [
     'daabc65ec75d487b1335ffc101c0ac11c803f8fc',
     '263be23b4a8101d3ad0d9831319a3e0f2b065f36',
     '7f6e63ba6eb3e2236f65892cd822041f1a01dd5c',
     '4db0a3ecbc976083e2dac01a62f93729698429a3',
     'dfef1c80e1098dd5deda664bb44a9ab1f738af13',
     'eca971d346ea54d95a6e19d5051f900237fafdaa',
     '3aebc29ed1fccc4a6f2f2010fb8e57882406b528',
 ]
 
 _expected_new_revisions_first_visit = {
     '44183488c0774ce3c957fa19ba695cf18a4a42b3':
     '3aebc29ed1fccc4a6f2f2010fb8e57882406b528'
 }
 
 _expected_branches_first_visit = {
     'HEAD': {
         'target_type': 'alias',
         'target': 'releases/0.1.0',
     },
     'releases/0.1.0': {
         'target_type': 'revision',
         'target': '44183488c0774ce3c957fa19ba695cf18a4a42b3',
     },
 }
 
 # hash is different then before as we changed the snapshot
 # gnu used to use `release/` (singular) instead of plural
 _expected_new_snapshot_first_visit_id = 'c419397fd912039825ebdbea378bc6283f006bf5'  # noqa
 
 
 def test_visit_with_no_artifact_found(swh_config, requests_mock):
     package_url = 'https://ftp.gnu.org/gnu/8sync/'
     tarballs = [{
         'time': '944729610',
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
         'length': 221837,
     }]
 
     loader = GNULoader(package_url, tarballs)
     requests_mock.get(re.compile('https://'), status_code=404)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'uneventful'
     stats = loader.storage.stat_counters()
 
     assert {
         'content': 0,
         'directory': 0,
         'origin': 1,
         'origin_visit': 1,
         'person': 0,
         'release': 0,
         'revision': 0,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(package_url))
     assert origin_visit['status'] == 'partial'
 
 
 def test_check_revision_metadata_structure(swh_config, requests_mock_datadir):
     package_url = 'https://ftp.gnu.org/gnu/8sync/'
     tarballs = [{
         'time': '944729610',
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
         'length': 221837,
     }]
 
     loader = GNULoader(package_url, tarballs)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_revision_id = hash_to_bytes(
         '44183488c0774ce3c957fa19ba695cf18a4a42b3')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('intrinsic', dict),
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
-        ('original_artifact.filename', str),
-        ('original_artifact.length', int),
-        ('original_artifact.checksums', dict),
+        ('original_artifact', list),
     ])
 
+    for original_artifact in revision['metadata']['original_artifact']:
+        check_metadata_paths(original_artifact, paths=[
+            ('filename', str),
+            ('length', int),
+            ('checksums', dict),
+        ])
+
 
 def test_visit_with_release_artifact_no_prior_visit(
         swh_config, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
 
     """
     assert 'SWH_CONFIG_FILENAME' in os.environ  # cf. tox.ini
     package_url = 'https://ftp.gnu.org/gnu/8sync/'
     tarballs = [{
         'time': 944729610,
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
         'length': 221837,
     }]
 
     loader = GNULoader(package_url, tarballs)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
     assert list(loader.storage.content_missing_per_sha1(expected_contents)) \
         == []
 
     expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit)
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_snapshot = {
         'id': _expected_new_snapshot_first_visit_id,
         'branches': _expected_branches_first_visit,
     }
 
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_2_visits_without_change(swh_config, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
 
     """
     assert 'SWH_CONFIG_FILENAME' in os.environ  # cf. tox.ini
     url = 'https://ftp.gnu.org/gnu/8sync/'
     tarballs = [{
         'time': 944729610,
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
         'length': 221837,
     }]
 
     loader = GNULoader(url, tarballs)
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
 
     actual_load_status2 = loader.load()
     assert actual_load_status2['status'] == 'uneventful'
     origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit2['status'] == 'full'
 
     urls = [
         m.url for m in requests_mock_datadir.request_history
         if m.url.startswith('https://ftp.gnu.org')
     ]
     assert len(urls) == 1
 
 
 def test_2_visits_with_new_artifact(swh_config, requests_mock_datadir):
     """With no prior visit, load a gnu project ends up with 1 snapshot
 
     """
     assert 'SWH_CONFIG_FILENAME' in os.environ  # cf. tox.ini
     url = 'https://ftp.gnu.org/gnu/8sync/'
     tarball1 = {
         'time': 944729610,
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
         'length': 221837,
     }
 
     loader = GNULoader(url, [tarball1])
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     urls = [
         m.url for m in requests_mock_datadir.request_history
         if m.url.startswith('https://ftp.gnu.org')
     ]
     assert len(urls) == 1
 
     tarball2 = {
         'time': 1480991830,
         'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz',
         'length': 238466,
     }
     loader2 = GNULoader(url, [tarball1, tarball2])
     # implementation detail: share the storage in between visits
     loader2.storage = loader.storage
     stats2 = loader2.storage.stat_counters()
     assert stats == stats2  # ensure we share the storage
 
     actual_load_status2 = loader2.load()
     assert actual_load_status2['status'] == 'eventful'
 
     stats2 = loader.storage.stat_counters()
     assert {
         'content': len(_expected_new_contents_first_visit) + 14,
         'directory': len(_expected_new_directories_first_visit) + 8,
         'origin': 1,
         'origin_visit': 1 + 1,
         'person': 1,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit) + 1,
         'skipped_content': 0,
         'snapshot': 1 + 1,
     } == stats2
 
     origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit2['status'] == 'full'
 
     urls = [
         m.url for m in requests_mock_datadir.request_history
         if m.url.startswith('https://ftp.gnu.org')
     ]
     # 1 artifact (2nd time no modification) + 1 new artifact
     assert len(urls) == 2
diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py
index 1c253c0..654c472 100644
--- a/swh/loader/package/tests/test_npm.py
+++ b/swh/loader/package/tests/test_npm.py
@@ -1,526 +1,531 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import os
 
 from swh.model.hashutil import hash_to_bytes
 
 from swh.loader.package.npm import (
     parse_npm_package_author, extract_npm_package_author
 )
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths
 )
 
 from swh.loader.package.npm import NpmLoader
 
 
 def _parse_author_string_test(author_str, expected_result):
     assert parse_npm_package_author(author_str) == expected_result
     assert parse_npm_package_author(' %s' % author_str) == expected_result
     assert parse_npm_package_author('%s ' % author_str) == expected_result
 
 
 def test_parse_npm_package_author():
     _parse_author_string_test(
         'John Doe',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         '<john.doe@foo.bar>',
         {
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         '(https://john.doe)',
         {
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar'
         }
     )
 
     _parse_author_string_test(
         'John Doe (https://john.doe)',
         {
             'name': 'John Doe',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe(https://john.doe)',
         {
             'name': 'John Doe',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         '<john.doe@foo.bar> (https://john.doe)',
         {
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         '(https://john.doe) <john.doe@foo.bar>',
         {
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <john.doe@foo.bar> (https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe (https://john.doe) <john.doe@foo.bar>',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar> (https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe<john.doe@foo.bar>(https://john.doe)',
         {
             'name': 'John Doe',
             'email': 'john.doe@foo.bar',
             'url': 'https://john.doe'
         }
     )
 
     _parse_author_string_test('', {})
     _parse_author_string_test('<>', {})
     _parse_author_string_test(' <>', {})
     _parse_author_string_test('<>()', {})
     _parse_author_string_test('<> ()', {})
     _parse_author_string_test('()', {})
     _parse_author_string_test(' ()', {})
 
     _parse_author_string_test(
         'John Doe <> ()',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe <>',
         {
             'name': 'John Doe'
         }
     )
 
     _parse_author_string_test(
         'John Doe ()',
         {
             'name': 'John Doe'
         }
     )
 
 
 def test_extract_npm_package_author(datadir):
     package_metadata_filepath = os.path.join(
         datadir, 'https_replicate.npmjs.com', 'org_visit1')
 
     with open(package_metadata_filepath) as json_file:
         package_metadata = json.load(json_file)
 
     extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
         {
             'fullname': b'mooz <stillpedant@gmail.com>',
             'name': b'mooz',
             'email': b'stillpedant@gmail.com'
         }
 
     assert (
         extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
         {
             'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
             'name': b'Masafumi Oyamada',
             'email': b'stillpedant@gmail.com'
         }
     )
 
     package_json = json.loads('''
     {
         "name": "highlightjs-line-numbers.js",
         "version": "2.7.0",
         "description": "Highlight.js line numbers plugin.",
         "main": "src/highlightjs-line-numbers.js",
         "dependencies": {},
         "devDependencies": {
             "gulp": "^4.0.0",
             "gulp-rename": "^1.4.0",
             "gulp-replace": "^0.6.1",
             "gulp-uglify": "^1.2.0"
         },
         "repository": {
             "type": "git",
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
         },
         "author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
         "license": "MIT",
         "bugs": {
             "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
         },
         "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
     }''') # noqa
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
             'name': b'Yauheni Pakala',
             'email': b'evgeniy.pakalo@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "3-way-diff",
         "version": "0.0.1",
         "description": "3-way diffing of JavaScript objects",
         "main": "index.js",
         "authors": [
             {
                 "name": "Shawn Walsh",
                 "url": "https://github.com/shawnpwalsh"
             },
             {
                 "name": "Markham F Rollins IV",
                 "url": "https://github.com/mrollinsiv"
             }
         ],
         "keywords": [
             "3-way diff",
             "3 way diff",
             "three-way diff",
             "three way diff"
         ],
         "devDependencies": {
             "babel-core": "^6.20.0",
             "babel-preset-es2015": "^6.18.0",
             "mocha": "^3.0.2"
         },
         "dependencies": {
             "lodash": "^4.15.0"
         }
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'Shawn Walsh',
             'name': b'Shawn Walsh',
             'email': None
         }
 
     package_json = json.loads('''
     {
         "name": "yfe-ynpm",
         "version": "1.0.0",
         "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
         "repository": {
             "type": "git",
             "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
         },
         "author": [
             "fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
             "xufuzi <xufuzi@ywwl.com> (https://7993.org)"
         ],
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'fengmk2 <fengmk2@gmail.com>',
             'name': b'fengmk2',
             'email': b'fengmk2@gmail.com'
         }
 
     package_json = json.loads('''
     {
         "name": "umi-plugin-whale",
         "version": "0.0.8",
         "description": "Internal contract component",
         "authors": {
             "name": "xiaohuoni",
             "email": "448627663@qq.com"
         },
         "repository": "alitajs/whale",
         "devDependencies": {
             "np": "^3.0.4",
             "umi-tools": "*"
         },
         "license": "MIT"
     }''')
 
     assert extract_npm_package_author(package_json) == \
         {
             'fullname': b'xiaohuoni <448627663@qq.com>',
             'name': b'xiaohuoni',
             'email': b'448627663@qq.com'
         }
 
 
 def normalize_hashes(hashes):
     if isinstance(hashes, str):
         return hash_to_bytes(hashes)
     if isinstance(hashes, list):
         return [hash_to_bytes(x) for x in hashes]
     return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
 
 
 _expected_new_contents_first_visit = normalize_hashes([
     '4ce3058e16ab3d7e077f65aabf855c34895bf17c',
     '858c3ceee84c8311adc808f8cdb30d233ddc9d18',
     '0fa33b4f5a4e0496da6843a38ff1af8b61541996',
     '85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
     '9163ac8025923d5a45aaac482262893955c9b37b',
     '692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
     '18c03aac6d3e910efb20039c15d70ab5e0297101',
     '41265c42446aac17ca769e67d1704f99e5a1394d',
     '783ff33f5882813dca9239452c4a7cadd4dba778',
     'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
     '112d1900b4c2e3e9351050d1b542c9744f9793f3',
     '5439bbc4bd9a996f1a38244e6892b71850bc98fd',
     'd83097a2f994b503185adf4e719d154123150159',
     'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
     'b3523a26f7147e4af40d9d462adaae6d49eda13e',
     'cd065fb435d6fb204a8871bcd623d0d0e673088c',
     '2854a40855ad839a54f4b08f5cff0cf52fca4399',
     'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
     '0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
     '0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
     '585fc5caab9ead178a327d3660d35851db713df1',
     'e8cd41a48d79101977e3036a87aeb1aac730686f',
     '5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
     '9c3cc2763bf9e9e37067d3607302c4776502df98',
     '3649a68410e354c83cd4a38b66bd314de4c8f5c9',
     'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
     '078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
     '38de737da99514de6559ff163c988198bc91367a',
 ])
 
 _expected_new_directories_first_visit = normalize_hashes([
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
     '80579be563e2ef3e385226fe7a3f079b377f142c',
     '3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
     'bcad03ce58ac136f26f000990fc9064e559fe1c0',
     '5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
     'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
     '584b5b4b6cf7f038095e820b99386a9c232de931',
     '184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
     'bb5f4ee143c970367eb409f2e4c1104898048b9d',
     '1b95491047add1103db0dfdfa84a9735dcb11e88',
     'a00c6de13471a2d66e64aca140ddb21ef5521e62',
     '5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
     'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
     '202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
     '775cc516543be86c15c1dc172f49c0d4e6e78235',
     'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
 ])
 
 _expected_new_revisions_first_visit = normalize_hashes({
     'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
     '42753c0c2ab00c4501b552ac4671c68f3cf5aece',
     '5f9eb78af37ffd12949f235e86fac04898f9f72a':
     '3370d20d6f96dc1c9e50f083e2134881db110f4f',
     'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
     'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
 )
 
 _expected_new_snapshot_first_visit_id = normalize_hashes(
     'd0587e1195aed5a8800411a008f2f2d627f18e2d')
 
 _expected_branches_first_visit = {
     'HEAD': {
         'target': 'releases/0.0.4',
         'target_type': 'alias'
     },
     'releases/0.0.2': {
         'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
         'target_type': 'revision'
     },
     'releases/0.0.3': {
         'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
         'target_type': 'revision'
     },
     'releases/0.0.4': {
         'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
         'target_type': 'revision'
     }
 }
 
 
 def package_url(package):
     return 'https://www.npmjs.com/package/%s' % package
 
 
 def package_metadata_url(package):
     return 'https://replicate.npmjs.com/%s/' % package
 
 
 def test_revision_metadata_structure(swh_config, requests_mock_datadir):
     package = 'org'
     loader = NpmLoader(package,
                        package_url(package),
                        package_metadata_url(package))
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_revision_id = hash_to_bytes(
         'd8a1c7474d2956ac598a19f0f27d52f7015f117e')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('intrinsic.tool', str),
         ('intrinsic.raw', dict),
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
-        ('original_artifact.filename', str),
-        ('original_artifact.length', int),
-        ('original_artifact.checksums', dict),
+        ('original_artifact', list),
     ])
 
+    for original_artifact in revision['metadata']['original_artifact']:
+        check_metadata_paths(original_artifact, paths=[
+            ('filename', str),
+            ('length', int),
+            ('checksums', dict),
+        ])
+
 
 def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
 
     package = 'org'
     loader = NpmLoader(package,
                        package_url(package),
                        package_metadata_url(package))
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     assert len(list(loader.storage.content_get(
         _expected_new_contents_first_visit))) == len(
             _expected_new_contents_first_visit)
 
     assert list(loader.storage.directory_missing(
         _expected_new_directories_first_visit)) == []
 
     assert list(loader.storage.revision_missing(
         _expected_new_revisions_first_visit)) == []
 
     expected_snapshot = {
         'id': _expected_new_snapshot_first_visit_id,
         'branches': _expected_branches_first_visit,
     }
     check_snapshot(expected_snapshot, loader.storage)
 
 
 def test_npm_loader_incremental_visit(
         swh_config, requests_mock_datadir_visits):
     package = 'org'
     url = package_url(package)
     metadata_url = package_metadata_url(package)
     loader = NpmLoader(package, url, metadata_url)
 
     actual_load_status = loader.load()
 
     assert actual_load_status['status'] == 'eventful'
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
 
     stats = loader.storage.stat_counters()
 
     assert {
         'content': len(_expected_new_contents_first_visit),
         'directory': len(_expected_new_directories_first_visit),
         'origin': 1,
         'origin_visit': 1,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit),
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     loader._info = None  # reset loader internal state
     actual_load_status2 = loader.load()
 
     assert actual_load_status2['status'] == 'eventful'
     origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit2['status'] == 'full'
 
     stats = loader.storage.stat_counters()
 
     assert {  # 3 new releases artifacts
         'content': len(_expected_new_contents_first_visit) + 14,
         'directory': len(_expected_new_directories_first_visit) + 15,
         'origin': 1,
         'origin_visit': 2,
         'person': 2,
         'release': 0,
         'revision': len(_expected_new_revisions_first_visit) + 3,
         'skipped_content': 0,
         'snapshot': 2,
     } == stats
 
     urls = [
         m.url for m in requests_mock_datadir_visits.request_history
         if m.url.startswith('https://registry.npmjs.org')
     ]
     assert len(urls) == len(set(urls))  # we visited each artifact once across
diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py
index aba1814..74b3a70 100644
--- a/swh/loader/package/tests/test_pypi.py
+++ b/swh/loader/package/tests/test_pypi.py
@@ -1,654 +1,659 @@
 # Copyright (C) 2019 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 from os import path
 
 import pytest
 
 from unittest.mock import patch
 
 from swh.core.tarball import uncompress
 from swh.core.pytest_plugin import requests_mock_datadir_factory
 from swh.model.hashutil import hash_to_bytes
 
 from swh.loader.package.pypi import (
     PyPILoader, pypi_api_url, author, extract_intrinsic_metadata
 )
 from swh.loader.package.tests.common import (
     check_snapshot, check_metadata_paths
 )
 
 
 def test_author_basic():
     data = {
         'author': "i-am-groot",
         'author_email': 'iam@groot.org',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'i-am-groot <iam@groot.org>',
         'name': b'i-am-groot',
         'email': b'iam@groot.org',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_empty_email():
     data = {
         'author': 'i-am-groot',
         'author_email': '',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'i-am-groot',
         'name': b'i-am-groot',
         'email': b'',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_empty_name():
     data = {
         'author': "",
         'author_email': 'iam@groot.org',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b' <iam@groot.org>',
         'name': b'',
         'email': b'iam@groot.org',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed():
     data = {
         'author': "['pierre', 'paul', 'jacques']",
         'author_email': None,
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b"['pierre', 'paul', 'jacques']",
         'name': b"['pierre', 'paul', 'jacques']",
         'email': None,
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed_2():
     data = {
         'author': '[marie, jeanne]',
         'author_email': '[marie@some, jeanne@thing]',
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
         'name': b'[marie, jeanne]',
         'email': b'[marie@some, jeanne@thing]',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed_3():
     data = {
         'author': '[marie, jeanne, pierre]',
         'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>',  # noqa
         'name': b'[marie, jeanne, pierre]',
         'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
     }
 
     actual_author == expected_author
 
 
 # configuration error #
 
 def test_badly_configured_loader_raise(monkeypatch):
     """Badly configured loader should raise"""
     monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False)
     with pytest.raises(ValueError) as e:
         PyPILoader(url='some-url')
 
     assert 'Misconfiguration' in e.value.args[0]
 
 
 def test_pypi_api_url():
     """Compute pypi api url from the pypi project url should be ok"""
     url = pypi_api_url('https://pypi.org/project/requests')
     assert url == 'https://pypi.org/pypi/requests/json'
 
 
 @pytest.mark.fs
 def test_extract_intrinsic_metadata(tmp_path, datadir):
     """Parsing existing archive's PKG-INFO should yield results"""
     uncompressed_archive_path = str(tmp_path)
     archive_path = path.join(
         datadir, 'https_files.pythonhosted.org', '0805nexter-1.1.0.zip')
     uncompress(archive_path, dest=uncompressed_archive_path)
 
     actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
     expected_metadata = {
         'metadata_version': '1.0',
         'name': '0805nexter',
         'version': '1.1.0',
         'summary': 'a simple printer of nested lest',
         'home_page': 'http://www.hp.com',
         'author': 'hgtkpython',
         'author_email': '2868989685@qq.com',
         'platforms': ['UNKNOWN'],
     }
 
     assert actual_metadata == expected_metadata
 
 
 @pytest.mark.fs
 def test_extract_intrinsic_metadata_failures(tmp_path):
     """Parsing inexistant path/archive/PKG-INFO yield None"""
     tmp_path = str(tmp_path)  # py3.5 work around (PosixPath issue)
     # inexistant first level path
     assert extract_intrinsic_metadata('/something-inexistant') == {}
     # inexistant second level path (as expected by pypi archives)
     assert extract_intrinsic_metadata(tmp_path) == {}
     # inexistant PKG-INFO within second level path
     existing_path_no_pkginfo = path.join(tmp_path, 'something')
     os.mkdir(existing_path_no_pkginfo)
     assert extract_intrinsic_metadata(tmp_path) == {}
 
 
 # LOADER SCENARIO #
 
 # "edge" cases (for the same origin) #
 
 
 # no release artifact:
 # {visit full, status: uneventful, no contents, etc...}
 requests_mock_datadir_missing_all = requests_mock_datadir_factory(ignore_urls=[
     'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip',  # noqa
     'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip',  # noqa
 ])
 
 
 def test_no_release_artifact(swh_config, requests_mock_datadir_missing_all):
     """Load a pypi project with all artifacts missing ends up with no snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'uneventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 0,
         'directory': 0,
         'origin': 1,
         'origin_visit': 1,
         'person': 0,
         'release': 0,
         'revision': 0,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 # problem during loading:
 # {visit: partial, status: uneventful, no snapshot}
 
 
 def test_release_with_traceback(swh_config):
     url = 'https://pypi.org/project/0805nexter'
     with patch('swh.loader.package.pypi.PyPILoader.get_default_release',
                side_effect=ValueError('Problem')):
         loader = PyPILoader(url)
 
         actual_load_status = loader.load()
         assert actual_load_status['status'] == 'failed'
 
         stats = loader.storage.stat_counters()
 
         assert {
             'content': 0,
             'directory': 0,
             'origin': 1,
             'origin_visit': 1,
             'person': 0,
             'release': 0,
             'revision': 0,
             'skipped_content': 0,
             'snapshot': 0,
         } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 # problem during loading: failure early enough in between swh contents...
 # some contents (contents, directories, etc...) have been written in storage
 # {visit: partial, status: eventful, no snapshot}
 
 # problem during loading: failure late enough we can have snapshots (some
 # revisions are written in storage already)
 # {visit: partial, status: eventful, snapshot}
 
 # "normal" cases (for the same origin) #
 
 
 requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
     'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip',  # noqa
 ])
 
 # some missing release artifacts:
 # {visit partial, status: eventful, 1 snapshot}
 
 
 def test_revision_metadata_structure(swh_config, requests_mock_datadir):
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_revision_id = hash_to_bytes(
         'e445da4da22b31bfebb6ffc4383dbf839a074d21')
     revision = list(loader.storage.revision_get([expected_revision_id]))[0]
 
     assert revision is not None
 
     check_metadata_paths(revision['metadata'], paths=[
         ('intrinsic.tool', str),
         ('intrinsic.raw', dict),
         ('extrinsic.provider', str),
         ('extrinsic.when', str),
         ('extrinsic.raw', dict),
-        ('original_artifact.filename', str),
-        ('original_artifact.length', int),
-        ('original_artifact.checksums', dict),
+        ('original_artifact', list),
     ])
 
+    for original_artifact in revision['metadata']['original_artifact']:
+        check_metadata_paths(original_artifact, paths=[
+            ('filename', str),
+            ('length', int),
+            ('checksums', dict),
+        ])
+
 
 def test_visit_with_missing_artifact(
         swh_config, requests_mock_datadir_missing_one):
     """Load a pypi project with some missing artifacts ends up with 1 snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 3,
         'directory': 2,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 1,
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_contents = map(hash_to_bytes, [
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
     }
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.2.0',
             'target_type': 'alias',
         },
     }
 
     expected_snapshot = {
         'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355',
         'branches': expected_branches,
     }
     check_snapshot(expected_snapshot, storage=loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir):
     """With no prior visit, load a pypi project ends up with 1 snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 6,
         'directory': 4,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_contents = map(hash_to_bytes, [
         'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
         '938c33483285fd8ad57f15497f538320df82aeb8',
         'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
         'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'),  # noqa
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
     }
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.1.0': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.2.0',
             'target_type': 'alias',
         },
     }
 
     expected_snapshot = {
         'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a',
         'branches': expected_branches,
     }
     check_snapshot(expected_snapshot, loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
 
 def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir):
     """Multiple visits with no changes results in 1 same snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 6,
         'directory': 4,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_branches = {
         'releases/1.1.0': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.2.0',
             'target_type': 'alias',
         },
     }
 
     snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a'
     expected_snapshot = {
         'id': snapshot_id,
         'branches': expected_branches,
     }
     check_snapshot(expected_snapshot, loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
     actual_load_status2 = loader.load()
     assert actual_load_status2['status'] == 'uneventful'
 
     stats2 = loader.storage.stat_counters()
     expected_stats2 = stats.copy()
     expected_stats2['origin_visit'] = 1 + 1
     assert expected_stats2 == stats2
 
     # same snapshot
     actual_snapshot_id = origin_visit['snapshot']['id']
     assert actual_snapshot_id == hash_to_bytes(snapshot_id)
 
 
 def test_incremental_visit(swh_config, requests_mock_datadir_visits):
     """With prior visit, 2nd load will result with a different snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     visit1_actual_load_status = loader.load()
     visit1_stats = loader.storage.stat_counters()
     assert visit1_actual_load_status['status'] == 'eventful'
     origin_visit1 = next(loader.storage.origin_visit_get(url))
     assert origin_visit1['status'] == 'full'
 
     assert {
         'content': 6,
         'directory': 4,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1
     } == visit1_stats
 
     # Reset internal state
     loader._info = None
 
     visit2_actual_load_status = loader.load()
     visit2_stats = loader.storage.stat_counters()
 
     assert visit2_actual_load_status['status'] == 'eventful'
     visits = list(loader.storage.origin_visit_get(url))
     assert len(visits) == 2
     assert visits[1]['status'] == 'full'
 
     assert {
         'content': 6 + 1,     # 1 more content
         'directory': 4 + 2,   # 2 more directories
         'origin': 1,
         'origin_visit': 1 + 1,
         'person': 1,
         'release': 0,
         'revision': 2 + 1,    # 1 more revision
         'skipped_content': 0,
         'snapshot': 1 + 1,    # 1 more snapshot
     } == visit2_stats
 
     expected_contents = map(hash_to_bytes, [
         'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
         '938c33483285fd8ad57f15497f538320df82aeb8',
         'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
         '92689fa2b7fb4d4fc6fb195bf73a50c87c030639'
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
         'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
         'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a',
         '52604d46843b898f5a43208045d09fcf8731631b',
 
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'),  # noqa
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
         hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'),  # noqa
     }
 
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.1.0': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'releases/1.3.0': {
             'target': '51247143b01445c9348afa9edfae31bf7c5d86b1',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.3.0',
             'target_type': 'alias',
         },
     }
     expected_snapshot = {
         'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283',
         'branches': expected_branches,
     }
 
     check_snapshot(expected_snapshot, loader.storage)
 
     origin_visit = list(loader.storage.origin_visit_get(url))[-1]
     assert origin_visit['status'] == 'full'
 
     urls = [
         m.url for m in requests_mock_datadir_visits.request_history
         if m.url.startswith('https://files.pythonhosted.org')
     ]
     # visited each artifact once across 2 visits
     assert len(urls) == len(set(urls))
 
 
 # release artifact, no new artifact
 # {visit full, status uneventful, same snapshot as before}
 
 # release artifact, old artifact with different checksums
 # {visit full, status full, new snapshot with shared history and some new
 # different history}
 
 # release with multiple sdist artifacts per pypi "version"
 # snapshot branch output is different
 
 def test_visit_1_release_with_2_artifacts(swh_config, requests_mock_datadir):
     """With no prior visit, load a pypi project ends up with 1 snapshot
 
     """
     url = 'https://pypi.org/project/nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
     assert actual_load_status['status'] == 'eventful'
 
     expected_branches = {
         'releases/1.1.0/nexter-1.1.0.zip': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.1.0/nexter-1.1.0.tar.gz': {
             'target': '0bf88f5760cca7665d0af4d6575d9301134fe11a',
             'target_type': 'revision',
         },
     }
 
     expected_snapshot = {
         'id': 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6',
         'branches': expected_branches,
     }
     check_snapshot(expected_snapshot, loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'