diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -1,4 +1,102 @@
 swh-loader-pypi
 ====================
 
-SWH PyPi loader's source code repository
+SWH PyPI loader's source code repository
+
+# What does the loader do?
+
+The PyPI loader visits and loads a PyPI project [1].
+
+Each visit will result in:
+- 1 snapshot (which targets n revisions ; 1 per release artifact)
+- 1 revision (which targets 1 directory ; the release artifact uncompressed)
+
+[1] https://pypi.org/help/#packages
+
+## First visit
+
+Given a PyPI project (origin), the loader, for the first visit:
+
+- retrieves information for the given project (including releases)
+- then for each associated release
+- for each associated source distribution (type 'sdist') release
+  artifact (possibly many per release)
+- retrieves the associated artifact archive (with checks)
+- uncompresses locally the archive
+- computes the hashes of the uncompressed directory
+- then creates a revision (using PKG-INFO metadata file)
+  targetting such directory
+- finally, creates a snapshot targetting all seen revisions
+  (uncompressed PyPI artifact and metadata).
+
+## Next visit
+
+The loader starts by checking if something changed since the last
+visit.  If nothing changed, the visit's snapshot is left
+unchanged. The new visit targets the same snapshot.
+
+If something changed, the already seen release artifacts are skipped.
+Only the new ones are loaded. In the end, the loader creates a new
+snapshot based on the previous one. Thus, the new snapshot targets
+both the old and new PyPI release artifacts.
+
+## Terminology
+
+- 1 project: a PyPI project (used as swh origin). This is a collection
+             of releases.
+
+- 1 release: a specific version of the (PyPi) project. It's a
+             collection of information and associated source release
+             artifacts (type 'sdist')
+
+- 1 release artifact: a source release artifact (distributed by a PyPI
+                      maintainer). In swh, we are specifically
+                      interested by the 'sdist' type (source code).
+
+## Edge cases
+
+- If no release provides release artifacts, those are skipped
+
+- If a release artifact holds no PKG-INFO file (root at the archive),
+  the release artifact is skipped.
+
+- If a problem occurs during a fetch action (e.g. release artifact
+  download), the load fails and the visit is marked as 'partial'.
+
+# Development
+
+## Configuration file
+
+### Location
+
+Either:
+- /etc/softwareheritage/loader/pypi.yml
+- ~/.config/swh/loader/pypi.yml
+- ~/.swh/loader/svn.pypi
+
+### Configuration sample
+
+```
+storage:
+  cls: remote
+  args:
+    url: http://localhost:5002/
+
+```
+
+## Local run
+
+PyPI loader expects as input:
+- project: a pypi project name (ex: arrow)
+- project_url: uri to the pypi project (html page)
+- project_metadata_url: uri to the pypi metadata information (json page)
+
+``` sh
+$ python3
+Python 3.6.6 (default, Jun 27 2018, 14:44:17)
+[GCC 8.1.0] on linux
+Type "help", "copyright", "credits" or "license" for more information.
+>>> import logging; logging.basicConfig(level=logging.DEBUG
+>>> project='arrow; from swh.loader.pypi.tasks import LoadPyPI;
+>>> LoadPyPI().run(project, 'https://pypi.org/pypi/%s/' % project, 'https://pypi.org/pypi/%s/json' % project)
+```
diff --git a/debian/changelog b/debian/changelog
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-swh-loader-pypy (0.0.1-1) unstable; urgency=low
+swh-loader-pypi (0.0.1-1) unstable; urgency=low
 
   * Initial bootstrap
 
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -5,12 +5,16 @@
 Build-Depends: debhelper (>= 9),
                dh-python (>= 2),
                python3-all,
+               python3-arrow,
                python3-nose,
+               python3-pkginfo,
+               python3-requests,
                python3-setuptools,
                python3-swh.core,
+               python3-swh.loader.core,
+               python3-swh.model (>= 0.0.27~),
                python3-swh.storage,
                python3-swh.scheduler,
-               python3-swh.loader.core,
                python3-vcversioner
 Standards-Version: 3.9.6
 Homepage: https://forge.softwareheritage.org/source/swh-loader-pypi.git
@@ -19,6 +23,7 @@
 Architecture: all
 Depends: python3-swh.core,
          python3-swh.loader.core,
+         python3-swh.model (>= 0.0.27~),
          python3-swh.storage,
          ${misc:Depends}, ${python3:Depends}
-Description: Software Heritage PyPi Loader
+Description: Software Heritage PyPI Loader
diff --git a/debian/rules b/debian/rules
--- a/debian/rules
+++ b/debian/rules
@@ -9,3 +9,4 @@
 override_dh_install:
 	dh_install
 	rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py
+	rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/loader/__init__.py
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
 swh.core
+swh.model >= 0.0.27
 swh.storage
 swh.scheduler
 swh.loader.core
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
 setuptools
 vcversioner
+requests
+arrow
+pkginfo
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
 
 setup(
     name='swh.loader.pypi',
-    description='Software Heritage PyPi Loader',
+    description='Software Heritage PyPI Loader',
     author='Software Heritage developers',
     author_email='swh-devel@inria.fr',
     url='https://forge.softwareheritage.org/source/swh-loader-pypi.git',
@@ -35,6 +35,6 @@
     install_requires=parse_requirements() + parse_requirements('swh'),
     test_requires=parse_requirements('test'),
     setup_requires=['vcversioner'],
-    vcversioner={},
+    vcversioner={'version_module_paths': ['swh/loader/pypi/_version.py']},
     include_package_data=True,
 )
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
--- a/swh/loader/__init__.py
+++ b/swh/loader/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/loader/pypi/.gitignore b/swh/loader/pypi/.gitignore
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/.gitignore
@@ -0,0 +1 @@
+_version.py
diff --git a/swh/loader/pypi/client.py b/swh/loader/pypi/client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/client.py
@@ -0,0 +1,442 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import logging
+import os
+import requests
+import shutil
+
+from .converters import info, author
+
+from pkginfo import UnpackedSDist
+
+from swh.core import tarball
+from swh.model import hashutil
+
+try:
+    from swh.loader.pypi._version import __version__
+except ImportError:
+    __version__ = 'devel'
+
+
+def _to_dict(pkginfo):
+    """Given a pkginfo parsed structure, convert it to a dict.
+
+    Args:
+        pkginfo (UnpackedSDist): The sdist parsed structure
+
+    Returns:
+        parsed structure as a dict
+
+    """
+    m = {}
+    for k in pkginfo:
+        m[k] = getattr(pkginfo, k)
+    return m
+
+
+def _project_pkginfo(dir_path):
+    """Given an uncompressed path holding the pkginfo file, returns a
+       pkginfo parsed structure as a dict.
+
+       The release artifact contains at their root one folder. For example:
+       $ tar tvf zprint-0.0.6.tar.gz
+       drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
+       ...
+
+    Args:
+
+        dir_path (str): Path to the uncompressed directory
+                        representing a release artifact from pypi.
+
+    Returns:
+        the pkginfo parsed structure as a dict if any or None if
+        none was present.
+
+    """
+    # Retrieve the root folder of the archive
+    project_dirname = os.listdir(dir_path)[0]
+    pkginfo_path = os.path.join(dir_path, project_dirname, 'PKG-INFO')
+    if not os.path.exists(pkginfo_path):
+        return None
+    pkginfo = UnpackedSDist(pkginfo_path)
+    return _to_dict(pkginfo)
+
+
+class PyPIClient:
+    """PyPI client in charge of discussing with the pypi server.
+
+    Args:
+        base_url (str): PyPI instance's base url
+        temp_directory (str): Path to the temporary disk location used
+                              for uncompressing the release artifacts
+
+        cache (bool): Use an internal cache to keep the archives on
+                      disk. Default is not to use it.
+        cache_dir (str): cache's disk location (relevant only with
+                        `cache` to True)
+
+        Those last 2 parameters are not for production use.
+
+    """
+    def __init__(self, base_url='https://pypi.org/pypi',
+                 temp_directory=None, cache=False, cache_dir=None):
+        self.version = __version__
+        self.base_url = base_url
+        self.temp_directory = temp_directory
+
+        self.do_cache = cache
+        if self.do_cache:
+            self.cache_dir = cache_dir
+            self.cache_raw_dir = os.path.join(cache_dir, 'archives')
+            os.makedirs(self.cache_raw_dir, exist_ok=True)
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage PyPI Loader (%s)' % (
+                    __version__
+                )
+            }
+        }
+
+    def _save_response(self, response, project=None):
+        """Log the response from a server request to a cache dir.
+
+        Args:
+            response (Response): full server response
+            cache_dir (str): system path for cache dir
+
+        Returns:
+            nothing
+
+        """
+        import gzip
+        from json import dumps
+        datepath = arrow.utcnow().isoformat()
+        name = '%s.gz' % datepath if project is None else '%s-%s.gz' % (
+            project, datepath)
+        fname = os.path.join(self.cache_dir, name)
+        with gzip.open(fname, 'w') as f:
+            f.write(bytes(
+                dumps(response.json()),
+                'utf-8'
+            ))
+
+    def _save_raw(self, filepath):
+        """In cache mode, backup the filepath to self.cache_raw_dir
+
+        Args:
+            filepath (str): Path of the file to save
+
+        """
+        _filename = os.path.basename(filepath)
+        _archive = os.path.join(self.cache_raw_dir, _filename)
+        shutil.copyfile(filepath, _archive)
+
+    def _get_raw(self, filepath):
+        """In cache mode, we try to retrieve the cached file.
+
+        """
+        _filename = os.path.basename(filepath)
+        _archive = os.path.join(self.cache_raw_dir, _filename)
+        if not os.path.exists(_archive):
+            return None
+        shutil.copyfile(_archive, filepath)
+        return filepath
+
+    def _get(self, url, project=None):
+        """Get query to the url.
+
+        Args:
+            url (str): Url
+
+        Raises:
+            ValueError in case of failing to query
+
+        Returns:
+            Response as dict if ok
+
+        """
+        response = self.session.get(url, **self.params)
+        if response.status_code != 200:
+            raise ValueError("Fail to query '%s'. Reason: %s" % (
+                url, response.status_code))
+
+        if self.do_cache:
+            self._save_response(response, project=project)
+
+        return response.json()
+
+    def info(self, project_url, project=None):
+        """Given a metadata project url, retrieve the raw json response
+
+        Args:
+            project_url (str): Project's pypi to retrieve information
+
+        Returns:
+            Main project information as dict.
+
+        """
+        return self._get(project_url, project=project)
+
+    def release(self, project, release):
+        """Given a project and a release name, retrieve the raw information
+           for said project's release.
+
+        Args:
+            project (str): Project's name
+            release (dict): Release information
+
+        Returns:
+            Release information as dict
+
+        """
+        release_url = '%s/%s/%s/json' % (self.base_url, project, release)
+        return self._get(release_url, project=project)
+
+    def prepare_release_artifacts(self, project, version, release_artifacts):
+        """For a given project's release version, fetch and prepare the
+           associated release artifacts.
+
+        Args:
+            project (str): PyPI Project
+            version (str): Release version
+            release_artifacts ([dict]): List of source distribution
+                                        release artifacts
+
+        Yields:
+            tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+            - artifact (dict): release artifact's associated info
+            - release (dict): release information
+            - filepath (str): Local artifact's path
+            - uncompressed_archive_path (str): uncompressed archive path
+            - pkginfo (dict): package information or None if none found
+
+        """
+        for artifact in release_artifacts:
+            release = {
+                'name': version,
+                'message': artifact.get('comment_text', ''),
+            }
+            artifact = {
+                'sha256': artifact['digests']['sha256'],
+                'size': artifact['size'],
+                'filename': artifact['filename'],
+                'url': artifact['url'],
+                'date': artifact['upload_time'],
+            }
+            yield self.prepare_release_artifact(project, release, artifact)
+
+    def prepare_release_artifact(self, project, release, artifact):
+        """For a given release project, fetch and prepare the associated
+           artifact.
+
+        This:
+        - fetches the artifact
+        - checks the size, hashes match
+        - uncompress the artifact locally
+        - computes the swh hashes
+        - returns the associated information for the artifact
+
+        Args:
+            project (str): Project's name
+            release (dict): Release information
+            artifact (dict): Release artifact information
+
+        Returns:
+            tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+            - release (dict): Release information (name, message)
+            - artifact (dict): release artifact's information
+            - filepath (str): Local artifact's path
+            - uncompressed_archive_path (str): uncompressed archive path
+            - pkginfo (dict): package information or None if none found
+
+        """
+        version = release['name']
+        logging.debug('Release version: %s' % version)
+        path = os.path.join(self.temp_directory, project, version)
+        os.makedirs(path, exist_ok=True)
+        filepath = os.path.join(path, artifact['filename'])
+        logging.debug('Artifact local path: %s' % filepath)
+
+        cache_hit = None
+        if self.do_cache:
+            cache_hit = self._get_raw(filepath)
+
+        if cache_hit:
+            h = hashutil.MultiHash.from_path(filepath, track_length=False)
+        else:  # no cache hit, we fetch from pypi
+            url = artifact['url']
+            r = self.session.get(url, **self.params, stream=True)
+            status = r.status_code
+            if status != 200:
+                if status == 404:
+                    raise ValueError("Project '%s' not found" % url)
+                else:
+                    msg = "Fail to query '%s'\nCode: %s\nDetails: %s" % (
+                        url, r.status_code, r.content)
+                    raise ValueError(msg)
+
+            length = int(r.headers['content-length'])
+            if length != artifact['size']:
+                raise ValueError('Error when checking size: %s != %s' % (
+                    artifact['size'], length))
+
+            h = hashutil.MultiHash(length=length)
+            with open(filepath, 'wb') as f:
+                for chunk in r.iter_content():
+                    h.update(chunk)
+                    f.write(chunk)
+
+        hashes = h.hexdigest()
+
+        actual_digest = hashes['sha256']
+        if actual_digest != artifact['sha256']:
+            raise ValueError(
+                '%s %s: Checksum mismatched: %s != %s' % (
+                    project, version, artifact['sha256'], actual_digest))
+
+        if not cache_hit and self.do_cache:
+            self._save_raw(filepath)
+
+        uncompress_path = os.path.join(path, 'uncompress')
+        os.makedirs(uncompress_path, exist_ok=True)
+        nature = tarball.uncompress(filepath, uncompress_path)
+        artifact['archive_type'] = nature
+        artifact.update(hashes)
+        pkginfo = _project_pkginfo(uncompress_path)
+        return release, artifact, filepath, uncompress_path, pkginfo
+
+
+class PyPIProject:
+    """PyPI project representation
+
+    This allows to extract information for a given project:
+    - either its latest information (from the latest release)
+    - either for a given release version
+    - uncompress associated fetched release artifacts
+
+    This also fetches and uncompresses the associated release
+    artifacts.
+
+    """
+    def __init__(self, client, project, project_metadata_url, data=None):
+        self.client = client
+        self.project = project
+        self.project_metadata_url = project_metadata_url
+        if data:
+            self.data = data
+        else:
+            self.data = client.info(project_metadata_url, project)
+
+        self.last_version = self.data['info']['version']
+        self.cache = {
+            self.last_version: self.data
+        }
+
+    def _data(self, release_name=None):
+        """Fetch data per release and cache it.  Returns the cache retrieved
+           data if already fetched.
+
+        """
+        if release_name:
+            data = self.cache.get(release_name)
+            if not data:
+                data = self.client.release(self.project, release_name)
+                self.cache[release_name] = data
+        else:
+            data = self.data
+        return data
+
+    def info(self, release_name=None):
+        """Compute release information for provided release (or latest one).
+
+        """
+        return info(self._data(release_name))
+
+    def _filter_release_artifacts(self, version, releases, known_artifacts):
+        """Filter not already known sdist (source distribution) release.
+
+        There can be multiple 'package_type' (sdist, bdist_egg,
+        bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst, ...), we are
+        only interested in source distribution (sdist), others bdist*
+        are binary
+
+        Args:
+            version (str): Release name or version
+            releases (dict/[dict]): Full release object (or a list of)
+            known_artifacts ([tuple]): List of known releases (tuple filename,
+                                       sha256)
+
+        Yields:
+            an unknown release artifact
+
+        """
+        if not releases:
+            return []
+        if not isinstance(releases, list):
+            releases = [releases]
+        for artifact in releases:
+            name = artifact['filename']
+            sha256 = artifact['digests']['sha256']
+            if (name, sha256) in known_artifacts:
+                logging.debug('artifact (%s, %s) already seen for release %s, skipping' % (  # noqa
+                    name, sha256, version))
+                continue
+            if artifact['packagetype'] != 'sdist':
+                continue
+            yield artifact
+
+    def _cleanup_release_artifacts(self, archive_path, directory_path):
+        """Clean intermediary files which no longer needs to be present.
+
+        """
+        if directory_path and os.path.exists(directory_path):
+            logging.debug('Clean up uncompressed archive path %s' % (
+                directory_path, ))
+            shutil.rmtree(directory_path)
+
+        if archive_path and os.path.exists(archive_path):
+            logging.debug('Clean up archive %s' % archive_path)
+            os.unlink(archive_path)
+
+    def releases(self, known_artifacts):
+        """Fetch metadata/data per release (if new release artifact detected)
+
+        For new release artifact, this:
+        - downloads and uncompresses the release artifacts.
+        - yields the (release info, author info, release, dir_path)
+        - Clean up the intermediary fetched artifact files
+
+        Args:
+            known_artifacts (tuple): artifact name, artifact sha256 hash
+
+        Yields:
+            tuple (version, release_info, release, uncompressed_path) where:
+
+            - project_info (dict): release's associated version info
+            - author (dict): Author information for the release
+            - artifact (dict): Release artifact information
+            - release (dict): release metadata
+            - uncompressed_path (str): Path to uncompressed artifact
+
+        """
+        releases_dict = self.data['releases']
+        for version, releases in releases_dict.items():
+            releases = self._filter_release_artifacts(
+                version, releases, known_artifacts)
+            releases = self.client.prepare_release_artifacts(
+                self.project, version, releases)
+            for release, artifact, archive, dir_path, pkginfo in releases:
+                if pkginfo is None:  # fallback to pypi api metadata
+                    msg = '%s %s: No PKG-INFO detected, skipping' % (  # noqa
+                            self.project, version)
+                    logging.warn(msg)
+                    continue
+                yield pkginfo, author(pkginfo), release, artifact, dir_path
+                self._cleanup_release_artifacts(archive, dir_path)
diff --git a/swh/loader/pypi/converters.py b/swh/loader/pypi/converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/converters.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def info(data):
+    """Given a dict of a PyPI project information, returns a project
+       subset.
+
+    Args:
+        data (dict): Representing either artifact information or
+                     release information.
+
+    Returns:
+        A dict subset of project information.
+
+    """
+    _info = data['info']
+    default = {
+        'home_page': _info['home_page'],
+        'description': _info['description'],
+        'summary': _info['summary'],
+        'license': _info['license'],
+        'package_url': _info['package_url'],
+        'project_url': _info['project_url'],
+        'upstream': None,
+    }
+
+    project_urls = _info.get('project_urls')
+    if project_urls:
+        homepage = project_urls.get('Homepage')
+        if homepage:
+            default['upstream'] = homepage
+
+    return default
+
+
+def author(data):
+    """Given a dict of project/release artifact information (coming from
+       PyPI), returns an author subset.
+
+    Args:
+        data (dict): Representing either artifact information or
+                     release information.
+
+    Returns:
+        swh-model dict representing a person.
+
+    """
+    name = data['author']
+    email = data['author_email']
+    if email:
+        fullname = '%s <%s>' % (name, email)
+    else:
+        fullname = name
+
+    if not fullname:
+        return {'fullname': b'', 'name': None, 'email': None}
+
+    if fullname:
+        fullname = fullname.encode('utf-8')
+
+    if name:
+        name = name.encode('utf-8')
+
+    if email:
+        email = email.encode('utf-8')
+
+    return {'fullname': fullname, 'name': name, 'email': email}
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/loader.py
@@ -0,0 +1,244 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import os
+import shutil
+
+from tempfile import mkdtemp
+
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import SWHLoader
+from swh.model.from_disk import Directory
+from swh.model.identifiers import (
+    revision_identifier, snapshot_identifier,
+    identifier_to_bytes, normalize_timestamp
+)
+
+from .client import PyPIClient, PyPIProject
+
+
+TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
+DEBUG_MODE = '** DEBUG MODE **'
+
+
+class PyPILoader(SWHLoader):
+    CONFIG_BASE_FILENAME = 'loader/pypi'
+    ADDITIONAL_CONFIG = {
+        'temp_directory': ('str', '/tmp/swh.loader.pypi/'),
+        'cache': ('bool', False),
+        'cache_dir': ('str', ''),
+        'debug': ('bool', False),  # NOT FOR PRODUCTION
+    }
+
+    def __init__(self, client=None):
+        super().__init__(logging_class='swh.loader.pypi.PyPILoader')
+        self.origin_id = None
+        if not client:
+            temp_directory = self.config['temp_directory']
+            os.makedirs(temp_directory, exist_ok=True)
+            self.temp_directory = mkdtemp(
+                suffix='-%s' % os.getpid(),
+                prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+                dir=temp_directory)
+            self.pypi_client = PyPIClient(
+                temp_directory=self.temp_directory,
+                cache=self.config['cache'],
+                cache_dir=self.config['cache_dir'])
+        else:
+            self.temp_directory = client.temp_directory
+            self.pypi_client = client
+        self.debug = self.config['debug']
+        self.done = False
+
+    def pre_cleanup(self):
+        """To prevent disk explosion if some other workers exploded
+        in mid-air (OOM killed), we try and clean up dangling files.
+
+        """
+        if self.debug:
+            self.log.warn('%s Will not pre-clean up temp dir %s' % (
+                DEBUG_MODE, self.temp_directory
+            ))
+            return
+        clean_dangling_folders(self.config['temp_directory'],
+                               pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+                               log=self.log)
+
+    def cleanup(self):
+        """Clean up temporary disk use
+
+        """
+        if self.debug:
+            self.log.warn('%s Will not clean up temp dir %s' % (
+                DEBUG_MODE, self.temp_directory
+            ))
+            return
+        if os.path.exists(self.temp_directory):
+            self.log.debug('Clean up %s' % self.temp_directory)
+            shutil.rmtree(self.temp_directory)
+
+    def prepare_origin_visit(self, project_name, origin_url,
+                             origin_metadata_url=None):
+        """Prepare the origin visit information
+
+        Args:
+            project_name (str): Project's simple name
+            origin_url (str): Project's main url
+            origin_metadata_url (str): Project's metadata url
+
+        """
+        self.origin = {
+            'url': origin_url,
+            'type': 'pypi',
+        }
+        self.visit_date = None  # loader core will populate it
+
+    def _known_artifacts(self, last_snapshot):
+        """Retrieve the known releases/artifact for the origin_id.
+
+        Args
+            snapshot (dict): Last snapshot for the visit
+
+        Returns:
+            tuple artifact's filename, artifact's sha256
+
+        """
+        revs = [rev['target'] for rev in last_snapshot['branches'].values()]
+        known_revisions = self.storage.revision_get(revs)
+        for revision in known_revisions:
+            artifact = revision['metadata']['original_artifact']
+            yield artifact['filename'], artifact['sha256']
+
+    def _last_snapshot(self):
+        """Retrieve the last snapshot
+
+        """
+        return self.storage.snapshot_get_latest(self.origin_id)
+
+    def prepare(self, project_name, origin_url,
+                origin_metadata_url=None):
+        """Keep reference to the origin url (project) and the
+           project metadata url
+
+        Args:
+            project_name (str): Project's simple name
+            origin_url (str): Project's main url
+            origin_metadata_url (str): Project's metadata url
+
+        """
+        self.project_name = project_name
+        self.origin_url = origin_url
+        self.origin_metadata_url = origin_metadata_url
+        self.project = PyPIProject(self.pypi_client, self.project_name,
+                                   self.origin_metadata_url)
+        self._prepare_state()
+
+    def _prepare_state(self):
+        """Initialize internal state (snapshot, contents, directories, etc...)
+
+        This is called from `prepare` method.
+
+        """
+        last_snapshot = self._last_snapshot()
+        if last_snapshot:
+            self._snapshot = last_snapshot.copy()
+            known_artifacts = self._known_artifacts(self._snapshot)
+        else:
+            self._snapshot = {
+                'branches': {}
+            }
+            known_artifacts = []
+        # and the artifacts
+        # that will be the source of data to retrieve
+        self.release_artifacts = self.project.releases(known_artifacts)
+        # temporary state
+        self._contents = []
+        self._directories = []
+        self._revisions = []
+
+    def fetch_data(self):
+        """Called once per release artifact version (can be many for one
+           release).
+
+        This will for each call:
+        - retrieve a release artifact (associated to a release version)
+        - Uncompress it and compute the necessary information
+        - Computes the swh objects
+
+        Returns:
+            True as long as data to fetch exist
+
+        """
+        data = None
+        if self.done:
+            return False
+
+        try:
+            data = next(self.release_artifacts)
+        except StopIteration:
+            self.done = True
+            return False
+
+        project_info, author, release, artifact, dir_path = data
+        dir_path = dir_path.encode('utf-8')
+        directory = Directory.from_disk(path=dir_path, data=True)
+        _objects = directory.collect()
+
+        self._contents = _objects['content'].values()
+        self._directories = _objects['directory'].values()
+        date = normalize_timestamp(
+            int(arrow.get(artifact['date']).timestamp))
+
+        name = release['name'].encode('utf-8')
+        message = release['message'].encode('utf-8')
+        if message:
+            message = b'%s: %s' % (name, message)
+        else:
+            message = name
+
+        _revision = {
+            'synthetic': True,
+            'metadata': {
+                'original_artifact': artifact,
+                'project': project_info,
+            },
+            'author': author,
+            'date': date,
+            'committer': author,
+            'committer_date': date,
+            'message': message,
+            'directory': directory.hash,
+            'parents': [],
+            'type': 'tar',
+        }
+        _revision['id'] = identifier_to_bytes(
+            revision_identifier(_revision))
+        self._revisions.append(_revision)
+
+        branch_name = artifact['filename'].encode('utf-8')
+        self._snapshot['branches'][branch_name] = {
+            'target': _revision['id'],
+            'target_type': 'revision',
+        }
+
+        return not self.done
+
+    def generate_and_load_snapshot(self):
+        self._snapshot['id'] = identifier_to_bytes(
+            snapshot_identifier(self._snapshot))
+        self.maybe_load_snapshot(self._snapshot)
+
+    def store_data(self):
+        """(override) This sends collected objects to storage.
+
+        """
+        self.maybe_load_contents(self._contents)
+        self.maybe_load_directories(self._directories)
+        self.maybe_load_revisions(self._revisions)
+
+        if self.done:
+            self.generate_and_load_snapshot()
+            self.flush()
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/model.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import logging
+import shutil
+
+from .converters import info, author
diff --git a/swh/loader/pypi/tasks.py b/swh/loader/pypi/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.task import Task
+
+from .loader import PyPILoader
+
+
+class LoadPyPI(Task):
+    task_queue = 'swh_loader_pypi'
+
+    def run_task(self, project_name, project_url, project_metadata_url=None):
+        loader = PyPILoader()
+        loader.log = self.log
+        return loader.load(project_name,
+                           project_url,
+                           origin_metadata_url=project_metadata_url)
diff --git a/swh/loader/__init__.py b/swh/loader/pypi/tests/__init__.py
copy from swh/loader/__init__.py
copy to swh/loader/pypi/tests/__init__.py
diff --git a/swh/loader/pypi/tests/common.py b/swh/loader/pypi/tests/common.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/common.py
@@ -0,0 +1,151 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import os
+import tempfile
+
+from nose.plugins.attrib import attr
+from unittest import TestCase
+
+from swh.loader.pypi.client import PyPIClient, PyPIProject
+
+
+RESOURCES_PATH = './swh/loader/pypi/tests/resources'
+
+
+class PyPIClientWithCache(PyPIClient):
+    """Force the use of the cache to bypass pypi calls
+
+    """
+    def __init__(self, temp_directory, cache_dir):
+        super().__init__(temp_directory=temp_directory,
+                         cache=True, cache_dir=cache_dir)
+
+
+class LoaderNoStorage:
+    """Mixin class to inhibit the persistence (storage calls) and keep in
+       memory the data sent.
+
+    """
+    CONFIG_BASE_FILENAME = ''  # do not provide a real path
+    ADDITIONAL_CONFIG = {
+        'storage': ('dict', {
+            'cls': 'remote',
+            'args': {
+                'url': 'http://nowhere:5002/',  # do not provide a real storage
+            }
+        }),
+
+        # do not send any data to the storage
+        'send_contents': ('bool', False),
+        'send_directories': ('bool', False),
+        'send_revisions': ('bool', False),
+        'send_releases': ('bool', False),
+        'send_snapshot': ('bool', False),
+        'debug': ('bool', False),
+    }
+
+    def __init__(self, client=None):
+        super().__init__(client=client)
+        self.all_contents = []
+        self.all_directories = []
+        self.all_revisions = []
+        self.all_releases = []
+        self.all_snapshots = []
+
+        # typed data
+        self.objects = {
+            'content': self.all_contents,
+            'directory': self.all_directories,
+            'revision': self.all_revisions,
+            'release': self.all_releases,
+            'snapshot': self.all_snapshots
+        }
+
+    def _add(self, type, l):
+        """Add without duplicates and keeping the insertion order.
+
+        Args:
+            type (str): Type of objects concerned by the action
+            l ([object]): List of 'type' object
+
+        """
+        col = self.objects[type]
+        for o in l:
+            if o in col:
+                continue
+            col.extend([o])
+
+    def maybe_load_contents(self, all_contents):
+        self._add('content', all_contents)
+
+    def maybe_load_directories(self, all_directories):
+        self._add('directory', all_directories)
+
+    def maybe_load_revisions(self, all_revisions):
+        self._add('revision', all_revisions)
+
+    def maybe_load_releases(self, releases):
+        raise ValueError('If called, the test must break.')
+
+    def maybe_load_snapshot(self, snapshot):
+        self.objects['snapshot'].append(snapshot)
+
+    def _store_origin_visit(self):
+        pass
+
+    def open_fetch_history(self):
+        pass
+
+    def close_fetch_history_success(self, fetch_history_id):
+        pass
+
+    def close_fetch_history_failure(self, fetch_history_id):
+        pass
+
+    def update_origin_visit(self, origin_id, visit, status):
+        pass
+
+    # Override to do nothing at the end
+    def close_failure(self):
+        pass
+
+    def close_success(self):
+        pass
+
+    def pre_cleanup(self):
+        pass
+
+
+@attr('fs')
+class WithProjectTest(TestCase):
+    def setUp(self):
+        project = '0805nexter'
+        project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project)
+
+        with open(project_metadata_file) as f:
+            data = json.load(f)
+
+        temp_dir = tempfile.mkdtemp(
+            dir='/tmp/', prefix='swh.loader.pypi.tests-')
+        project_metadata_url = 'https://pypi.org/pypi/%s/json' % project
+        # Will use the pypi with cache
+        client = PyPIClientWithCache(
+            temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+        self.project = PyPIProject(
+            client=client,
+            project=project,
+            project_metadata_url=project_metadata_url,
+            data=data)
+
+        self.data = data
+        self.temp_dir = temp_dir
+        self.project_name = project
+
+    def tearDown(self):
+        if os.path.exists(self.temp_dir):
+            shutil.rmtree(self.temp_dir)
diff --git a/swh/loader/pypi/tests/resources/0805nexter.json b/swh/loader/pypi/tests/resources/0805nexter.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/resources/0805nexter.json
@@ -0,0 +1 @@
+{"info":{"author":"hgtkpython","author_email":"2868989685@qq.com","bugtrack_url":null,"classifiers":[],"description":"UNKNOWN","description_content_type":null,"docs_url":null,"download_url":"UNKNOWN","downloads":{"last_day":-1,"last_month":-1,"last_week":-1},"home_page":"http://www.hp.com","keywords":null,"license":"UNKNOWN","maintainer":null,"maintainer_email":null,"name":"0805nexter","package_url":"https://pypi.org/project/0805nexter/","platform":"UNKNOWN","project_url":"https://pypi.org/project/0805nexter/","project_urls":{"Download":"UNKNOWN","Homepage":"http://www.hp.com"},"release_url":"https://pypi.org/project/0805nexter/1.2.0/","requires_dist":null,"requires_python":null,"summary":"a simple printer of nested lest","version":"1.2.0"},"last_serial":1931736,"releases":{"1.1.0":[{"comment_text":"","digests":{"md5":"07fc93fc12821c1405c3483db88154af","sha256":"52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035"},"downloads":-1,"filename":"0805nexter-1.1.0.zip","has_sig":false,"md5_digest":"07fc93fc12821c1405c3483db88154af","packagetype":"sdist","python_version":"source","requires_python":null,"size":862,"upload_time":"2016-01-31T05:28:42","url":"https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip"}],"1.2.0":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]},"urls":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]}
\ No newline at end of file
diff --git a/swh/loader/pypi/tests/test_client.py b/swh/loader/pypi/tests/test_client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_client.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+
+from nose.tools import istest
+
+from swh.loader.pypi import converters
+from swh.loader.pypi.client import _project_pkginfo
+
+from .common import WithProjectTest
+
+
+class PyPIProjectTest(WithProjectTest):
+    @istest
+    def releases(self):
+        actual_releases = self.project.releases([])
+
+        expected_release_artifacts = {
+            '1.1.0': {
+                'archive_type': 'zip',
+                'blake2s256': 'df9413bde66e6133b10cadefad6fcf9cbbc369b47831089112c846d79f14985a',  # noqa
+                'date': '2016-01-31T05:28:42',
+                'filename': '0805nexter-1.1.0.zip',
+                'sha1': '127d8697db916ba1c67084052196a83319a25000',
+                'sha1_git': '4b8f1350e6d9fa00256e974ae24c09543d85b196',
+                'sha256': '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035',  # noqa
+                'size': 862,
+                'url': 'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip',  # noqa
+            },
+            '1.2.0': {
+                'archive_type': 'zip',
+                'blake2s256': '67010586b5b9a4aaa3b1c386f9dc8b4c99e6e40f37732a717a5f9b9b1185e588', # noqa
+                'date': '2016-01-31T05:51:25',
+                'filename': '0805nexter-1.2.0.zip',
+                'sha1': 'd55238554b94da7c5bf4a349ece0fe3b2b19f79c',
+                'sha1_git': '8638d33a96cb25d8319af21417f00045ec6ee810',
+                'sha256': '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709',  # noqa
+                'size': 898,
+                'url': 'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip',  # noqa
+            }
+        }
+
+        expected_releases = {
+            '1.1.0': {
+                'name': '1.1.0',
+                'message': '',
+            },
+            '1.2.0': {
+                'name': '1.2.0',
+                'message': '',
+            },
+        }
+
+        dir_paths = []
+        for pkginfo, author, release, artifact, dir_path in actual_releases:
+            version = pkginfo['version']
+            expected_pkginfo = _project_pkginfo(dir_path)
+            self.assertEquals(pkginfo, expected_pkginfo)
+            expected_author = converters.author(expected_pkginfo)
+            self.assertEqual(author, expected_author)
+            expected_artifact = expected_release_artifacts[version]
+            self.assertEqual(artifact, expected_artifact)
+            expected_release = expected_releases[version]
+            self.assertEqual(release, expected_release)
+
+            self.assertTrue(version in dir_path)
+            self.assertTrue(self.project_name in dir_path)
+            # path still exists
+            self.assertTrue(os.path.exists(dir_path))
+            dir_paths.append(dir_path)
+
+        # Ensure uncompressed paths have been destroyed
+        for dir_path in dir_paths:
+            # path no longer exists
+            self.assertFalse(os.path.exists(dir_path))
diff --git a/swh/loader/pypi/tests/test_converters.py b/swh/loader/pypi/tests/test_converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_converters.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest import TestCase
+from nose.tools import istest
+
+from swh.loader.pypi.converters import author, info
+
+from .common import WithProjectTest
+
+
+class Test(WithProjectTest):
+    @istest
+    def info(self):
+        actual_info = self.project.info()
+
+        expected_info = {
+            'home_page': self.data['info']['home_page'],
+            'description': self.data['info']['description'],
+            'summary': self.data['info']['summary'],
+            'license': self.data['info']['license'],
+            'package_url': self.data['info']['package_url'],
+            'project_url': self.data['info']['project_url'],
+            'upstream': self.data['info']['project_urls']['Homepage'],
+        }
+
+        self.assertEqual(expected_info, actual_info)
+
+    @istest
+    def author(self):
+        info = self.data['info']
+        actual_author = author(info)
+
+        name = info['author'].encode('utf-8')
+        email = info['author_email'].encode('utf-8')
+        expected_author = {
+            'fullname': b'%s <%s>' % (name, email),
+            'name': name,
+            'email': email,
+        }
+
+        self.assertEqual(expected_author, actual_author)
+
+
+class ParseAuthorTest(TestCase):
+    @istest
+    def author_basic(self):
+        data = {
+            'author': "i-am-groot",
+            'author_email': 'iam@groot.org',
+        }
+        actual_author = author(data)
+
+        expected_author = {
+            'fullname': b'i-am-groot <iam@groot.org>',
+            'name': b'i-am-groot',
+            'email': b'iam@groot.org',
+        }
+
+        self.assertEquals(actual_author, expected_author)
+
+    @istest
+    def author_malformed(self):
+        data = {
+            'author': "['pierre', 'paul', 'jacques']",
+            'author_email': None,
+        }
+
+        actual_author = author(data)
+
+        expected_author = {
+            'fullname': b"['pierre', 'paul', 'jacques']",
+            'name': b"['pierre', 'paul', 'jacques']",
+            'email': None,
+        }
+
+        self.assertEquals(actual_author, expected_author)
+
+    @istest
+    def author_malformed_2(self):
+        data = {
+            'author': '[marie, jeanne]',
+            'author_email': '[marie@some, jeanne@thing]',
+        }
+
+        actual_author = author(data)
+
+        expected_author = {
+            'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
+            'name': b'[marie, jeanne]',
+            'email': b'[marie@some, jeanne@thing]',
+        }
+
+        self.assertEquals(actual_author, expected_author)
+
+    @istest
+    def author_malformed_3(self):
+        data = {
+            'author': '[marie, jeanne, pierre]',
+            'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
+        }
+
+        actual_author = author(data)
+
+        expected_author = {
+            'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>',  # noqa
+            'name': b'[marie, jeanne, pierre]',
+            'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
+        }
+
+        self.assertEquals(actual_author, expected_author)
diff --git a/swh/loader/pypi/tests/test_loader.py b/swh/loader/pypi/tests/test_loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_loader.py
@@ -0,0 +1,259 @@
+# Copyright (C) 2016-2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import tempfile
+
+from nose.plugins.attrib import attr
+from nose.tools import istest
+from unittest import TestCase
+
+from swh.model import hashutil
+
+from swh.loader.pypi.client import PyPIProject
+from swh.loader.pypi.loader import PyPILoader
+from .common import PyPIClientWithCache, RESOURCES_PATH, LoaderNoStorage
+
+
+class TestPyPILoader(LoaderNoStorage, PyPILoader):
+    """Real PyPILoader for test purposes (storage and pypi interactions
+       inhibited)
+
+    """
+    def __init__(self, project_name):
+        project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project_name)
+        project_metadata_url = 'https://pypi.org/pypi/%s/json' % project_name
+        with open(project_metadata_file) as f:
+            data = json.load(f)
+
+        temp_dir = tempfile.mkdtemp(
+            dir='/tmp/', prefix='swh.loader.pypi.tests-')
+        # Will use the pypi with cache
+        client = PyPIClientWithCache(
+            temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+        super().__init__(client=client)
+        self.project = PyPIProject(
+            client=client,
+            project=project_name,
+            project_metadata_url=project_metadata_url,
+            data=data)
+
+    def prepare(self, project_name, origin_url,
+                origin_metadata_url=None):
+        self.project_name = project_name
+        self.origin_url = origin_url
+        self.origin_metadata_url = origin_metadata_url
+        self.visit = 1  # first visit
+        self._prepare_state()
+
+
+@attr('fs')
+class BaseLoaderITest(TestCase):
+    """Loader Test Mixin to prepare the pypi to 'load' in a test context.
+
+    In this setup, the loader uses the cache to load data so no
+    network interaction (no storage, no pypi).
+
+    """
+    def setUp(self, project_name='0805nexter',
+              dummy_pypi_instance='https://dummy.org'):
+        self.tmp_root_path = tempfile.mkdtemp()
+        self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+        self._project = project_name
+        self._origin_url = '%s/pypi/%s/' % (dummy_pypi_instance, project_name)
+        self._project_metadata_url = '%s/pypi/%s/json' % (
+            dummy_pypi_instance, project_name)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmp_root_path)
+
+    def assertContentsOk(self, expected_contents):
+        contents = self.loader.all_contents
+        self.assertEquals(len(contents), len(expected_contents))
+
+        for content in contents:
+            content_id = hashutil.hash_to_hex(content['sha1'])
+            self.assertIn(content_id, expected_contents)
+
+    def assertDirectoriesOk(self, expected_directories):
+        directories = self.loader.all_directories
+        self.assertEquals(len(directories), len(expected_directories))
+
+        for _dir in directories:
+            _dir_id = hashutil.hash_to_hex(_dir['id'])
+            self.assertIn(_dir_id, expected_directories)
+
+    def assertSnapshotOk(self, expected_snapshot, expected_revisions):
+        snapshots = self.loader.all_snapshots
+        self.assertEqual(len(snapshots), 1)
+
+        snap = snapshots[0]
+        snap_id = hashutil.hash_to_hex(snap['id'])
+        self.assertEqual(snap_id, expected_snapshot)
+
+        branches = snap['branches']
+        self.assertEqual(len(expected_revisions), len(branches))
+
+        for branch, target in branches.items():
+            rev_id = hashutil.hash_to_hex(target['target'])
+            self.assertIn(rev_id, expected_revisions)
+            self.assertEqual('revision', target['target_type'])
+
+    def assertRevisionsOk(self, expected_revisions):  # noqa: N802
+        """Check the loader's revisions match the expected revisions.
+
+        Expects self.loader to be instantiated and ready to be
+        inspected (meaning the loading took place).
+
+        Args:
+            expected_revisions (dict): Dict with key revision id,
+            value the targeted directory id.
+
+        """
+        # The last revision being the one used later to start back from
+        for rev in self.loader.all_revisions:
+            rev_id = hashutil.hash_to_hex(rev['id'])
+            directory_id = hashutil.hash_to_hex(rev['directory'])
+
+            self.assertEquals(expected_revisions[rev_id], directory_id)
+
+
+# Define loaders with no storage
+# They'll just accumulate the data in place
+# Only for testing purposes.
+
+
+class PyPILoaderNoSnapshot(TestPyPILoader):
+    """Same as TestPyPILoader with no prior snapshot seen
+
+    """
+    def _last_snapshot(self):
+        return None
+
+
+class LoaderITest(BaseLoaderITest):
+    def setUp(self, project_name='0805nexter',
+              dummy_pypi_instance='https://dummy.org'):
+        super().setUp(project_name, dummy_pypi_instance)
+        self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+
+    @istest
+    def load(self):
+        """Load a pypi origin
+
+        """
+        # when
+        self.loader.load(
+            self._project, self._origin_url, self._project_metadata_url)
+
+        # then
+        self.assertEquals(len(self.loader.all_contents), 6,
+                          '3 contents per release artifact files (2)')
+        self.assertEquals(len(self.loader.all_directories), 4)
+        self.assertEquals(len(self.loader.all_revisions), 2,
+                          '2 releases so 2 revisions should be created')
+        self.assertEquals(len(self.loader.all_releases), 0,
+                          'No release is created in the pypi loader')
+        self.assertEquals(len(self.loader.all_snapshots), 1,
+                          'Only 1 snapshot targetting all revisions')
+
+        expected_contents = [
+            'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
+            '938c33483285fd8ad57f15497f538320df82aeb8',
+            'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
+            '405859113963cb7a797642b45f171d6360425d16',
+            'e5686aa568fdb1d19d7f1329267082fe40482d31',
+            '83ecf6ec1114fd260ca7a833a2d165e71258c338',
+        ]
+
+        self.assertContentsOk(expected_contents)
+
+        expected_directories = [
+            '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
+            'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
+            'b178b66bd22383d5f16f4f5c923d39ca798861b4',
+            'c3a58f8b57433a4b56caaa5033ae2e0931405338',
+        ]
+        self.assertDirectoriesOk(expected_directories)
+
+        # {revision hash: directory hash}
+        expected_revisions = {
+            '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d',  # noqa
+            'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4',  # noqa
+        }
+        self.assertRevisionsOk(expected_revisions)
+
+        self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+                              expected_revisions)
+
+
+class PyPILoaderWithSnapshot(TestPyPILoader):
+    """Same as TestPyPILoader with no prior snapshot seen
+
+    """
+    def _last_snapshot(self):
+        return {
+            'id': b'\xf4V\xb0>\x8b\xf1\x92\rd\xb0\r\xf24\xb1\xef\xed\xc2[l\x93',  # noqa
+            'branches': {
+                b'0805nexter-1.1.0.zip': {
+                    'target': b'L\x99\x89\x1f\x93\xb8\x14P'
+                    b'8Ww#Z7\xb5\xe9f\xdd\x15q',
+                    'target_type': 'revision'
+                },
+                b'0805nexter-1.2.0.zip': {
+                    'target': b'\xe4E\xdaM\xa2+1\xbf'
+                    b'\xeb\xb6\xff\xc48=\xbf\x83'
+                    b'\x9a\x07M!',
+                    'target_type': 'revision'
+                },
+            },
+        }
+
+    def _known_artifacts(self, last_snapshot):
+        yield from [
+            (
+                '0805nexter-1.1.0.zip',
+                '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035'  # noqa
+            ),
+            (
+                '0805nexter-1.2.0.zip',
+                '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709'  # noqa
+            )
+        ]
+
+
+class LoaderWithOriginAlreadySeenITest(BaseLoaderITest):
+    def setUp(self, project_name='0805nexter',
+              dummy_pypi_instance='https://dummy.org'):
+        super().setUp(project_name, dummy_pypi_instance)
+        self.loader = PyPILoaderWithSnapshot(project_name=project_name)
+
+    @istest
+    def load(self):
+        """Load a pypi origin already injected will result with only 1 snapshot
+
+        """
+        # when
+        self.loader.load(
+            self._project, self._origin_url, self._project_metadata_url)
+
+        # then
+        self.assertEquals(len(self.loader.all_contents), 0)
+        self.assertEquals(len(self.loader.all_directories), 0)
+        self.assertEquals(len(self.loader.all_revisions), 0)
+        self.assertEquals(len(self.loader.all_releases), 0)
+        self.assertEquals(len(self.loader.all_snapshots), 1)
+
+        self.assertContentsOk([])
+        self.assertDirectoriesOk([])
+        self.assertRevisionsOk(expected_revisions={})
+
+        expected_revisions = {
+            '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d',  # noqa
+            'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4',  # noqa
+        }
+        self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+                              expected_revisions)