diff --git a/swh/loader/base/__init__.py b/swh/loader/base/__init__.py
new file mode 100644
diff --git a/swh/loader/base/abstractattribute.py b/swh/loader/base/abstractattribute.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/base/abstractattribute.py
@@ -0,0 +1,26 @@
+# Copyright (C) 2019 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+class AbstractAttribute:
+    """AbstractAttributes in a base class must be overridden by the subclass.
+
+    It's like the :func:`abc.abstractmethod` decorator, but for things that
+    are explicitly attributes/properties, not methods, without the need for
+    empty method def boilerplate. Like abc.abstractmethod, the class containing
+    AbstractAttributes must inherit from :class:`abc.ABC` or use the
+    :class:`abc.ABCMeta` metaclass.
+
+    Usage example::
+
+        import abc
+        class ClassContainingAnAbstractAttribute(abc.ABC):
+            foo = AbstractAttribute('descriptive docstring for foo')
+
+    """
+    __isabstractmethod__ = True
+
+    def __init__(self, docstring=None):
+        if docstring is not None:
+            self.__doc__ = 'AbstractAttribute: ' + docstring
diff --git a/swh/loader/base/build.py b/swh/loader/base/build.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/base/build.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import time
+from swh.model.identifiers import normalize_timestamp
+from dateutil import parser as date_parser
+
+
+class construct_revision:
+    """Construct revision from the metadata of the package version
+
+    Construct the revision for a package version using the metadata provided.
+    There are several hookpoints that can be overridden as per the need of
+    package manager.
+
+    """
+
+    UTC_OFFSET = 0
+    SWH_PERSON = {
+        'name': 'Software Heritage',
+        'fullname': 'Software Heritage',
+        'email': 'robot@softwareheritage.org'
+    }
+    REVISION_MESSAGE = 'swh-loader-base: synthetic revision message'
+
+    def modify_revision(self, revision):
+        """Make modification on revision created
+        If the revision of a package manager needs to be in a specific format,
+        this method can be overridden to perform that operation insted of
+        overriding the `compute_revision()`.
+
+        Args:
+            revision (dict): Created revision
+
+        Returns:
+            dict: Modified revision
+        """
+        return revision
+
+    def _time_from_last_modified(self, last_modified):
+        """Compute the modification time from the tarpath.
+
+        Args:
+            last_modified (str): Last modification time
+
+        Returns:
+            dict representing a timestamp with keys {seconds, microseconds}
+
+        """
+        last_modified = arrow.get(last_modified)
+        mtime = last_modified.float_timestamp
+        normalized_time = list(map(int, str(mtime).split('.')))
+        return {
+            'seconds': normalized_time[0],
+            'microseconds': normalized_time[1]
+        }
+
+    def compute_revision(self, directory, package_source_data):
+        """Compute a revision.
+
+        Args:
+            directory (str): absolute path to the tarball
+            package_source_data (dict): Information about the package
+                                        release version
+
+        Returns:
+            dict: Revision
+
+        """
+        revision = {
+            'metadata': self.find_metadata(package_source_data),
+            'date': self.find_date(package_source_data),
+            'committer_date': self.find_date(package_source_data),
+            'author': self.find_author(package_source_data),
+            'committer': self.find_author(package_source_data),
+            'type': self.find_type(package_source_data),
+            'message': self.find_message(package_source_data),
+            'directory': self.directory(directory),
+            'synthetic': self.find_synthetic(),
+        }
+
+        return self.modify_revision(revision)
+
+    def find_synthetic(self):
+        return True
+
+    def find_type(self, package_source_data):
+        return package_source_data['nature']
+
+    def find_message(self, package_source_data):
+        return self.REVISION_MESSAGE
+
+    def directory(self, directory):
+        return directory.hash
+
+    def find_author(self, package_source_data):
+        if 'author' in package_source_data:
+            return package_source_data['author']
+        return self.SWH_PERSON
+
+    def find_metadata(self, package_source_data):
+        return {
+            'package': package_source_data
+        }
+
+    def find_date(self, package_source_data):
+        date = date_parser.parse(package_source_data['date'])
+        date = normalize_timestamp(int(date.timestamp()))
+        if 'date' in package_source_data:
+            date = date_parser.parse(package_source_data['date'])
+            date = normalize_timestamp(int(date.timestamp()))
+            return date
+        timestamp = time.time()
+        timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
+                                  time.gmtime(timestamp))
+        return self._time_from_last_modified(timestamp)
diff --git a/swh/loader/base/dowload.py b/swh/loader/base/dowload.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/base/dowload.py
@@ -0,0 +1,354 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import requests
+import time
+from .abstractattribute import AbstractAttribute
+
+try:
+    from _version import __version__
+except ImportError:
+    __version__ = 'devel'
+
+
+# This file contains methods to check and remove archived package version
+
+
+class If_Modified_Since:
+    """Uses if_modified_then header to check for if the package is previously
+       archived.
+
+    This class is to be used to identify and avoid the reprocessing of
+    previously archived package version when there is no reliable field is
+    provided in the metadata that can so the job.
+
+    The following operations are  performed:
+        - Retrive known versions and store then in a dict with tarball `url`
+          as key(can be changed by overriding `get_key` method)
+        - Check if the tarballs are present in knowns versions.
+            * If the match found, it sends a request with `if_modified_since`
+              header to confirm the match
+            * If the match is not found it sends a simple request
+        - Store the request and the time for further processing
+        - Instantiate a generator to process a specific package released
+          version
+
+    """
+    def __init__(self):
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage Tar Loader (%s)' % (
+                    __version__
+                )
+            }
+        }
+
+    def get_artifact(self, revision):
+        """Fetch artifact form revision
+
+        Args:
+            revision (dict): Previous revision
+
+        Returns:
+            dict: metadata present in the revision
+
+        """
+        return revision['metadata']['package']
+
+    def get_key(self):
+        """Returns the key to be used to identify known revisions
+
+        """
+        return 'url'
+
+    def known_versions(self, last_snapshot):
+        """
+        Retrieve the known release versions for the npm package
+        (i.e. those already ingested into the archive).
+
+        Args
+            last_snapshot (dict): Last snapshot for the visit
+
+        Returns:
+            dict: Dict whose key is url and values are revision ids.
+
+        """
+        if not last_snapshot or 'branches' not in last_snapshot:
+            return {}
+
+        # retrieve only revisions (e.g the alias we do not want here)
+        revs = [rev['target']
+                for rev in last_snapshot['branches'].values()
+                if rev and rev['target_type'] == 'revision']
+        known_revisions = self.storage.revision_get(revs)
+        ret = {}
+        key = self.get_key()
+        for revision in known_revisions:
+            if not revision:  # revision_get can return None
+                continue
+
+            artifact = self.get_artifact(revision)
+            ret[artifact[key]] = [revision['id'],
+                                  artifact['time_last_visit']]
+        return ret
+
+    def filter_package_versions(self, tarballs, known_versions):
+        """Return the available tarballs that are not previously archived.
+
+        Args:
+            tarballs (list): a list of dicts containing information about the
+                respective tarball that is provided by lister.
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Returns:
+            A list of dicts containing information about the respective
+            tarballs that are not previously archived.
+
+        """
+        # Done
+        versions = []
+        key = self.get_key()
+        for release in tarballs:
+            tarball_url = release['url']
+
+            if release[key] in known_versions:
+                tarball_request = self._request(
+                    tarball_url,
+                    time_last_visit=known_versions[tarball_url][1],
+                    throw_error=False)
+            else:
+                tarball_request = self._request(
+                    tarball_url, time_last_visit=None, throw_error=False)
+
+            if tarball_request.status_code == 304:
+                continue
+
+            elif tarball_request.status_code == 404:
+                self.log.debug('Tarball url %s returns a 404 error.',
+                               tarball_url)
+                continue
+
+            release['response'] = tarball_request
+            timestamp = time.time()
+            timestamp = time.strftime('%a, %d %b %Y %H:%M:%S GMT',
+                                      time.gmtime(timestamp))
+            release['time_last_visit'] = timestamp
+            versions.append(release)
+
+        return versions
+
+    def _request(self, url, time_last_visit=None, throw_error=True):
+        """Request the remote tarball url.
+
+        Args:
+            url (str): Url (file or http*)
+
+        Raises:
+            ValueError in case of failing to query
+
+        Returns:
+            server response
+
+        """
+        # Done
+        if time_last_visit:
+            self.params['headers']['If-Modified-Since'] = time_last_visit
+        response = self.session.get(url, **self.params, stream=True)
+        if response.status_code != 200:
+            raise ValueError("Fail to query '%s'. Reason: %s" % (
+                url, response.status_code))
+        return response
+
+    def prepare_package_versions(self, tarballs, known_versions=None):
+        """
+        Instantiate a generator that will process a specific package released
+        version at each iteration step. The following operations will be
+        performed:
+
+            1. Create a temporary directory to download and extract the
+               release tarball
+            2. Download the tarball
+            3. Uncompress the tarball
+            4. Parse the file associated to the package version to extract
+               metadata (optional)
+            5. Delete unnecessary files (optional)
+
+        Args:
+            tarballs (list): a list of dicts containing information about the
+                respective tarball that is provided by lister.
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Yields:
+            Tuple[dict, str]: tuples containing the following
+            members:
+
+                * a dict holding package tarball information and metadata
+                * a string holding the path of the uncompressed package to
+                  load into the archive
+
+        """
+        new_versions = self.filter_package_versions(tarballs, known_versions)
+        for package_source_data in new_versions:
+            tarball_request = package_source_data['response']
+
+            # To make things simple while creating revisions
+            del package_source_data['response']
+            yield self._prepare_package_version(package_source_data,
+                                                tarball_request)
+
+
+class compare_field:
+    """Uses a field present in the metadata to check for if the package is
+       previously archived.
+
+    This class is to be used to identify and avoid the reprocessing of
+    previously archived package version using a field provided by the
+    API in the metadata of the package version
+
+    The following operations are  performed:
+        - Retrive known versions and store then in a dict with key of same
+          field that is mentioned in compare field
+        - Check if the tarballs are present in knowns versions.
+        - Instantiate a generator to process a specific package released
+          version
+
+    """
+    compare_field = AbstractAttribute("Field used to identify if the package"
+                                      "version is previously archived")
+    # eg for pypi loader compare_field = 'sha'
+
+    def __init__(self):
+        self.session = requests.session()
+        self.params = {
+            'headers': {
+                'User-Agent': 'Software Heritage Tar Loader (%s)' % (
+                    __version__
+                )
+            }
+        }
+
+    def _request(self, url, throw_error=True):
+        """Request the remote tarball url.
+
+        Args:
+            url (str): Url (file or http*)
+
+        Raises:
+            ValueError in case of failing to query
+
+        Returns:
+            Tuple of local (filepath, hashes of filepath)
+
+        """
+        # Done
+        response = self.session.get(url, **self.params, stream=True)
+        if response.status_code != 200 and throw_error:
+            raise ValueError("Fail to query '%s'. Reason: %s" % (
+                url, response.status_code))
+        return response
+
+    def known_versions(self, last_snapshot):
+        """
+        Retrieve the known release versions for the npm package
+        (i.e. those already ingested into the archive).
+
+        Args
+            last_snapshot (dict): Last snapshot for the visit
+
+        Returns:
+            dict: Dict whose key is the value of field chosen for
+            checking archived artifacts and values are revision ids.
+
+        """
+        if not last_snapshot or 'branches' not in last_snapshot:
+            return {}
+
+        # retrieve only revisions (e.g the alias we do not want here)
+        revs = [rev['target']
+                for rev in last_snapshot['branches'].values()
+                if rev and rev['target_type'] == 'revision']
+        known_revisions = self.storage.revision_get(revs)
+        ret = {}
+        for revision in known_revisions:
+            if not revision:  # revision_get can return None
+                continue
+            if 'original_artifact' in revision['metadata']:      # Fix me
+                artifact = revision['metadata']['original_artifact']
+                ret[artifact[self.compare_field]] = revision['id']  # Check me
+        return ret
+
+    def filter_package_versions(self, tarballs, known_versions):
+        """
+        Return the available tarballs that are not previously archived.
+
+        Args:
+            tarballs (list): a list of dicts containing information about the
+                respective tarball that is provided by lister.
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Returns:
+            A list of dicts containing information about the respective
+            tarballs that are not previously archived.
+
+        """
+        # Done
+        versions = []
+
+        for release in tarballs:
+            if release[self.compare_field] in known_versions:
+                continue
+            versions.append(release)
+
+        return versions
+
+    def prepare_package_versions(self, tarballs, known_versions=None):
+        """
+        Instantiate a generator that will process a specific package released
+        version at each iteration step. The following operations will be
+        performed:
+
+            1. Create a temporary directory to download and extract the
+               release tarball
+            2. Download the tarball
+            3. Uncompress the tarball
+            4. Parse the file associated to the package version to extract
+               metadata (optional)
+            5. Delete unnecessary files (optional)
+
+        Args:
+            tarballs (list): a list of dicts containing information about the
+                respective tarball that is provided by lister.
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Yields:
+            Tuple[dict, str]: tuples containing the following
+            members:
+
+                * a dict holding package tarball information and metadata
+                * a string holding the path of the uncompressed package to
+                  load into the archive
+
+        """
+        new_versions = self.filter_package_versions(tarballs, known_versions)
+        for package_source_data in new_versions:
+            # filter out version with missing tarball,
+            # package visit will be marked as partial at the end of
+            # the loading process
+
+            tarball_url = package_source_data['url']
+            tarball_request = self._request(tarball_url,
+                                            throw_error=False)
+            if tarball_request.status_code == 404:
+                self.log.debug('Tarball url %s returns a 404 error.',
+                               tarball_url)
+                continue
+
+            yield self._prepare_package_version(package_source_data,
+                                                tarball_request)
diff --git a/swh/loader/base/loader.py b/swh/loader/base/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/base/loader.py
@@ -0,0 +1,413 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import shutil
+
+from abc import abstractmethod
+from tempfile import mkdtemp
+
+from swh.core import tarball
+from .abstractattribute import AbstractAttribute
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import BufferedLoader
+from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+from swh.model.from_disk import Directory
+
+from .build import construct_revision
+
+from swh.model.identifiers import (
+    identifier_to_bytes, revision_identifier
+)
+
+DEBUG_MODE = '** DEBUG MODE **'
+
+
+class BaseLoader(BufferedLoader, construct_revision):
+    """
+
+    Required Overrides:
+        loader_name
+        class_name
+        def convert_to_standard_format
+
+    Optional Overrides:
+        def cleanup_artifact
+        def extract_metadata
+
+    """
+
+    loader_name = AbstractAttribute("Name of the package manager")  # e.g pypi
+    class_name = AbstractAttribute("Name of the loader class")  # eg PyPILoader
+
+    def __init__(self):
+        super().__init__(logging_class='swh.loader.%s.%s' % (self.loader_name,
+                                                             self.class_name))
+        self.TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.%s.' % self.loader_name
+        self.CONFIG_BASE_FILENAME = 'loader/%s' % self.loader_name
+
+        self.ADDITIONAL_CONFIG = {
+            'temp_directory': ('str',
+                               '/tmp/swh.loader.%s/' % self.loader_name),
+            'cache': ('bool', False),
+            'cache_dir': ('str', ''),
+            'debug': ('bool', False),  # NOT FOR PRODUCTION
+        }
+
+        self.local_cache = None
+        self.dir_path = None
+
+        temp_directory = self.config['temp_directory']
+        os.makedirs(temp_directory, exist_ok=True)
+
+        self.temp_directory = mkdtemp(
+            suffix='-%s' % os.getpid(),
+            prefix=self.TEMPORARY_DIR_PREFIX_PATTERN,
+            dir=temp_directory)
+
+        self.debug = self.config.get('debug', False)
+
+    @abstractmethod
+    def convert_to_standard_format(self, **kwargs):
+        """Fetch the metadata and convert it into a standard format
+        The standard format is a dict with keys
+            `name` (str): Holding name of the package
+            `origin_url` (str): Holding the origin_url of the package
+            `tarballs` (list): A list of dicts where each dict contains
+                information related to a single version of the
+                package. The `url` key in the dict is necessary and will
+                hold tarball url. Other keys are optional and as per
+                availability of metadata.
+
+        Note: Keys `nature` and `response` are reserved keywords and cannot be
+              used in the dict that are present under key `tarballs`
+
+        Args:
+            **kwargs: Arbitrary keyword arguments passed by the lister.
+
+        Returns:
+            dict: Containing information as directed by the guidelines
+                  mentioned above
+
+        Example:
+        {
+            name:'8sync',
+            origin_url:'https://ftp.gnu.org/gnu/8sync/',
+            tarballs:[{url: 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
+                       time_modified: 1562878592 },
+                      {url: 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz',
+                       time_modified: 1599887203 },
+                       ...
+                     ]
+        }
+
+        """
+        pass
+
+    def cleanup_artifact(self, uncompressed_path):
+        """Clean up unnecessary files from the downloaded tarball
+            also some special operation if needed.
+
+        Implementation of this method depends on the file structure of the
+        tarball. It is used to clean up files from the uncompressed tarball
+        that are not to be archived(eg binaries).
+
+        Args:
+            uncompressed_path (str): Path of uncompressed tarball
+
+        Returns:
+            uncompressed_path (str): Path of uncompressed tarball after
+                                     removing unnecessary files
+
+        """
+        return uncompressed_path
+
+    def extract_metadata(self, package_path, package_source_data):
+        """Fetch the metadata from the downloaded file.
+
+        """
+        return package_source_data
+
+    # You probably don't need to override anything below this line.
+
+    def prepare_origin_visit(self, **kwargs):
+        """Prepare package visit.
+
+        Args:
+            **kwargs: Arbitrary keyword arguments passed by the lister.
+
+        """
+        # reset statuses
+        self._load_status = 'uneventful'
+        self._visit_status = 'full'
+        self.done = False
+        # fetch the npm package metadata from the registry
+        self.package_details = self.convert_to_standard_format(**kwargs)
+        self.origin = {
+            'url': self.package_details['origin_url'],
+            'type': self.loader_name,
+        }
+        self.visit_date = None  # loader core will populate it
+
+    def prepare(self, **kwargs):
+        """Prepare effective loading of source tarballs for a package manager
+           package.
+
+        Args:
+            **kwargs: Arbitrary keyword arguments passed by the lister.
+
+        """
+        self.contents = []
+        self.directories = []
+        self.revisions = []
+        self.package_temp_dir = os.path.join(self.temp_directory,
+                                             self.package_details['name'])
+
+        last_snapshot = self.last_snapshot()
+        self.known_versions = self.known_versions(last_snapshot)
+
+        self.new_artifacts = \
+            self.prepare_package_versions(self.package_details['tarballs'],
+                                          self.known_versions)
+
+    def _prepare_package_version(self, package_source_data, tarball_request):
+        """Process the package release version.
+
+        The following operations are performed:
+
+            1. Download the tarball
+            2. Uncompress the tarball
+            3. Delete unnecessary files (optional)
+            4. Parse the file associated to the package version to extract
+               metadata (optional)
+
+        Args:
+            package_source_data (list): a list of dicts containing information
+                about the respective tarball that is provided by lister.
+            known_versions (dict): may be provided by the loader, it enables
+                to filter out versions already ingested in the archive.
+
+        Return:
+            Tuple[dict, str]: tuples containing the following
+            members:
+
+                * a dict holding package tarball information and metadata
+                * a string holding the path of the uncompressed package to
+                  load into the archive
+
+        """
+        url = package_source_data['url']
+        tarball_path, hashes = self.download_generate_hash(tarball_request,
+                                                           url)
+        uncompressed_path = os.path.join(self.package_temp_dir, 'uncompressed',
+                                         url)     # SEE ME
+        package_source_data['nature'] = self.uncompress_tarball(
+                                        tarball_path, uncompressed_path)
+
+        # remove tarball
+        os.remove(tarball_path)
+
+        if self.tarball_invalid:
+            return None, None
+
+        package_path = self.cleanup_artifact(uncompressed_path)
+        package_source_data = self.extract_metadata(package_path,
+                                                    package_source_data)
+        return package_source_data, package_path
+
+    def fetch_data(self):
+        """Called once per release artifact version (can be many for one
+           release).
+
+        This will for each call:
+        - retrieve a release artifact (associated to a release version)
+        - Computes the swh objects
+
+        Returns:
+            True as long as data to fetch exist
+
+        """
+        data = None
+        if self.done:
+            return False
+
+        try:
+            data = next(self.new_artifacts)
+            self._load_status = 'eventful'
+        except StopIteration:
+            self.done = True
+            return False
+
+        package_source_data, dir_path = data
+        #  package release tarball was corrupted
+        if self.tarball_invalid:
+            return not self.done
+
+        dir_path = dir_path.encode('utf-8')
+        directory = Directory.from_disk(path=dir_path, data=True)
+        objects = directory.collect()
+
+        if 'content' not in objects:
+            objects['content'] = {}
+        if 'directory' not in objects:
+            objects['directory'] = {}
+
+        self.contents = objects['content'].values()
+        self.directories = objects['directory'].values()
+
+        revision = self.compute_revision(directory,
+                                         package_source_data)   # FIX ME
+
+        revision['id'] = identifier_to_bytes(
+            revision_identifier(revision))
+        self.revisions.append(revision)
+
+        # Change me to be compatable with if modified since
+        package_key = package_source_data[self.compare_field]  # check for this
+        self.known_versions[package_key] = revision['id']   # SEE ME
+
+        self.log.debug('Removing unpacked package files at %s', dir_path)
+        shutil.rmtree(dir_path)
+
+        return not self.done
+
+    def last_snapshot(self):
+        """Retrieve the last snapshot of the package if any.
+
+        """
+        # Done
+        visit = self.storage.origin_visit_get_latest(
+            self.origin['url'], require_snapshot=True)
+        if visit:
+            return snapshot_get_all_branches(self.storage, visit['snapshot'])
+
+    def store_data(self):
+        """Store fetched data in the database.
+
+        """
+        # Done
+        self.maybe_load_contents(self.contents)
+        self.maybe_load_directories(self.directories)
+        self.maybe_load_revisions(self.revisions)
+
+        if self.done:
+            self.generate_and_load_snapshot()
+            self.flush()
+
+    def generate_and_load_snapshot(self):
+        """
+        Make me
+        """
+        pass
+
+    def download_generate_hash(self, response, url):
+        """Store file in temp directory and computes hash of its filepath
+
+        Args:
+            response (Response): Server response of the url
+            url (str): Url of the tarball
+
+        Returns:
+            Tuple of local (filepath, hashes of filepath)
+
+        """
+        # Done
+        length = int(response.headers['content-length'])
+
+        # SEE ME
+        filepath = os.path.join(self.package_temp_dir, os.path.basename(url))
+
+        h = MultiHash(length=length)
+        with open(filepath, 'wb') as f:
+            for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE):
+                h.update(chunk)
+                f.write(chunk)
+
+        actual_length = os.path.getsize(filepath)
+        if length != actual_length:
+            raise ValueError('Error when checking size: %s != %s' % (
+                length, actual_length))
+
+        hashes = {
+            'length': length,
+            **h.hexdigest()
+        }
+        return filepath, hashes
+
+    def uncompress_tarball(self, filepath, path):
+        """Uncompress a tarball.
+
+        Args:
+            filepath (str): Path of tarball to uncompress
+            path (str): The destination folder where to uncompress the tarball
+        Returns:
+            The nature of the tarball, zip or tar.
+
+        """
+        # Done
+        # filepath = tempdir + url
+        try:
+            self.tarball_invalid = False
+            return tarball.uncompress(filepath, path)
+        except Exception:
+            self.tarball_invalid = True
+            return None
+
+    def pre_cleanup(self):
+        """To prevent disk explosion if some other workers exploded
+        in mid-air (OOM killed), we try and clean up dangling files.
+
+        """
+        # Done
+        if self.debug:
+            self.log.warn('%s Will not pre-clean up temp dir %s' % (
+                DEBUG_MODE, self.temp_directory
+            ))
+            return
+        clean_dangling_folders(self.config['temp_directory'],
+                               pattern_check=self.TEMPORARY_DIR_PREFIX_PATTERN,
+                               log=self.log)
+
+    def flush(self):
+        """Flush any potential dangling data not sent to swh-storage.
+
+        Bypass the maybe_load_* methods which awaits threshold reached
+        signal. We actually want to store those as we are done
+        loading.
+
+        """
+        # Done
+        contents = self.contents.pop()
+        directories = self.directories.pop()
+        revisions = self.revisions.pop()
+        releases = self.releases.pop()
+
+        # and send those to storage if asked
+        if self.config['send_contents']:
+            self.send_batch_contents(contents)
+        if self.config['send_contents']:
+            self.send_batch_directories(directories)
+        if self.config['send_revisions']:
+            self.send_batch_revisions(revisions)
+        if self.config['send_releases']:
+            self.send_batch_releases(releases)
+        if self.config['send_snapshot'] and self.snapshot:
+            self.send_snapshot(self.snapshot)
+
+    def cleanup(self):
+        """Clean up temporary disk use after downloading and extracting
+        package tarballs.
+
+        """
+        # Done
+        if self.debug:
+            self.log.warn('%s Will not clean up temp dir %s' % (
+                DEBUG_MODE, self.temp_directory
+            ))
+            return
+        if os.path.exists(self.temp_directory):
+            self.log.debug('Clean up %s' % self.temp_directory)
+            shutil.rmtree(self.temp_directory)
diff --git a/swh/loader/base/tests/__init__.py b/swh/loader/base/tests/__init__.py
new file mode 100644
diff --git a/swh/loader/base/tests/test_download.py b/swh/loader/base/tests/test_download.py
new file mode 100644
diff --git a/swh/loader/base/tests/test_loader.py b/swh/loader/base/tests/test_loader.py
new file mode 100644