diff --git a/PKG-INFO b/PKG-INFO
index 5a55c46..a3e4f67 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.loader.git
-Version: 0.0.28
+Version: 0.0.29
 Summary: Software Heritage git loader
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/swh.loader.git.egg-info/PKG-INFO b/swh.loader.git.egg-info/PKG-INFO
index 5a55c46..a3e4f67 100644
--- a/swh.loader.git.egg-info/PKG-INFO
+++ b/swh.loader.git.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.loader.git
-Version: 0.0.28
+Version: 0.0.29
 Summary: Software Heritage git loader
 Home-page: https://forge.softwareheritage.org/diffusion/DCORE/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/swh/loader/git/loader.py b/swh/loader/git/loader.py
index 175b90b..76be640 100644
--- a/swh/loader/git/loader.py
+++ b/swh/loader/git/loader.py
@@ -1,181 +1,185 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import dulwich.repo
 import os
 import shutil
 
 from collections import defaultdict
 
 from swh.core import hashutil
 from . import base, converters, utils
 
 
 class GitLoader(base.BaseLoader):
     """Load a git repository from a directory.
     """
 
     CONFIG_BASE_FILENAME = 'loader/git-loader'
 
     def prepare(self, origin_url, directory, fetch_date):
         self.origin_url = origin_url
         self.repo = dulwich.repo.Repo(directory)
         self.fetch_date = fetch_date
 
     def get_origin(self):
         """Get the origin that is currently being loaded"""
         return converters.origin_url_to_origin(self.origin_url)
 
     def iter_objects(self):
         object_store = self.repo.object_store
 
         for pack in object_store.packs:
             objs = list(pack.index.iterentries())
             objs.sort(key=lambda x: x[1])
             for sha, offset, crc32 in objs:
                 yield hashutil.hash_to_bytehex(sha)
 
         yield from object_store._iter_loose_objects()
         yield from object_store._iter_alternate_objects()
 
     def fetch_data(self):
         """Fetch the data from the data source"""
         type_to_ids = defaultdict(list)
         for oid in self.iter_objects():
             type_name = self.repo[oid].type_name
             type_to_ids[type_name].append(oid)
 
         self.type_to_ids = type_to_ids
 
     def has_contents(self):
         """Checks whether we need to load contents"""
         return bool(self.type_to_ids[b'blob'])
 
     def get_contents(self):
         """Get the contents that need to be loaded"""
         max_content_size = self.config['content_size_limit']
 
         for oid in self.type_to_ids[b'blob']:
             yield converters.dulwich_blob_to_content(
                 self.repo[oid], log=self.log,
                 max_content_size=max_content_size,
                 origin_id=self.origin_id)
 
     def has_directories(self):
         """Checks whether we need to load directories"""
         return bool(self.type_to_ids[b'tree'])
 
     def get_directories(self):
         """Get the directories that need to be loaded"""
         for oid in self.type_to_ids[b'tree']:
             yield converters.dulwich_tree_to_directory(
                 self.repo[oid], log=self.log)
 
     def has_revisions(self):
         """Checks whether we need to load revisions"""
         return bool(self.type_to_ids[b'commit'])
 
     def get_revisions(self):
         """Get the revisions that need to be loaded"""
         for oid in self.type_to_ids[b'commit']:
             yield converters.dulwich_commit_to_revision(
                 self.repo[oid], log=self.log)
 
     def has_releases(self):
         """Checks whether we need to load releases"""
         return bool(self.type_to_ids[b'tag'])
 
     def get_releases(self):
         """Get the releases that need to be loaded"""
         for oid in self.type_to_ids[b'tag']:
             yield converters.dulwich_tag_to_release(
                 self.repo[oid], log=self.log)
 
     def has_occurrences(self):
         """Checks whether we need to load occurrences"""
         return True
 
     def get_occurrences(self):
         """Get the occurrences that need to be loaded"""
         repo = self.repo
         origin_id = self.origin_id
         visit = self.visit
 
         for ref, target in repo.refs.as_dict().items():
             target_type_name = repo[target].type_name
             target_type = converters.DULWICH_TYPES[target_type_name]
             yield {
                 'branch': ref,
                 'origin': origin_id,
                 'target': hashutil.bytehex_to_hash(target),
                 'target_type': target_type,
                 'visit': visit,
             }
 
     def get_fetch_history_result(self):
         """Return the data to store in fetch_history for the current loader"""
         return {
             'contents': len(self.type_to_ids[b'blob']),
             'directories': len(self.type_to_ids[b'tree']),
             'revisions': len(self.type_to_ids[b'commit']),
             'releases': len(self.type_to_ids[b'tag']),
             'occurrences': len(self.repo.refs.allkeys()),
         }
 
     def save_data(self):
         """We already have the data locally, no need to save it"""
         pass
 
     def eventful(self):
         """Whether the load was eventful"""
         return True
 
 
 class GitLoaderFromArchive(GitLoader):
     """Load a git repository from an archive.
 
     """
-    CONFIG_BASE_FILENAME = 'loader/archive-git-loader'
+    def project_name_from_archive(self, archive_path):
+        """Compute the project name from the archive's path.
+
+        """
+        return os.path.basename(os.path.dirname(archive_path))
 
     def prepare(self, origin_url, archive_path, fetch_date):
         """1. Uncompress the archive in temporary location.
            2. Prepare as the GitLoader does
            3. Load as GitLoader does
 
         """
+        project_name = self.project_name_from_archive(archive_path)
         self.temp_dir, self.repo_path = utils.init_git_repo_from_archive(
-            archive_path)
-        self.project_name = os.path.basename(self.repo_path)
+            project_name, archive_path)
 
         self.log.info('Project %s - Uncompressing archive %s at %s' % (
-            self.project_name, os.path.basename(archive_path), self.repo_path))
+            origin_url, os.path.basename(archive_path), self.repo_path))
         super().prepare(origin_url, self.repo_path, fetch_date)
 
     def cleanup(self):
         """Cleanup the temporary location (if it exists).
 
         """
         if self.temp_dir and os.path.exists(self.temp_dir):
             shutil.rmtree(self.temp_dir)
         self.log.info('Project %s - Done injecting %s' % (
-            self.project_name, self.repo_path))
+            self.origin_url, self.repo_path))
 
 
 if __name__ == '__main__':
     import logging
     import sys
 
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s %(process)d %(message)s'
     )
     loader = GitLoader()
 
     origin_url = sys.argv[1]
     directory = sys.argv[2]
     fetch_date = datetime.datetime.now(tz=datetime.timezone.utc)
 
     print(loader.load(origin_url, directory, fetch_date))
diff --git a/swh/loader/git/reader.py b/swh/loader/git/reader.py
index 634e23d..6646e63 100644
--- a/swh/loader/git/reader.py
+++ b/swh/loader/git/reader.py
@@ -1,204 +1,255 @@
 # Copyright (C) 2016  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import click
+from collections import defaultdict
 import logging
+import pprint
 
-from collections import defaultdict
+import click
 
 from swh.core import hashutil, utils
 
 from .updater import BulkUpdater, SWHRepoRepresentation
 from . import converters
 
 
 class SWHRepoFullRepresentation(SWHRepoRepresentation):
     """Overridden representation of a swh repository to permit to read
     completely the remote repository.
 
     """
     def __init__(self, storage, origin_id, occurrences=None):
         self.storage = storage
         self._parents_cache = {}
         self._type_cache = {}
         self.heads = set()
 
     def determine_wants(self, refs):
         """Filter the remote references to figure out which ones Software
            Heritage needs. In this particular context, we want to know
            everything.
 
         """
         if not refs:
             return []
 
         for target in refs.values():
             self.heads.add(target)
 
         return self.filter_unwanted_refs(refs).values()
 
     def find_remote_ref_types_in_swh(self, remote_refs):
         """Find the known swh remote.
         In that particular context, we know nothing.
 
         """
         return {}
 
 
 class DummyGraphWalker(object):
     """Dummy graph walker which claims that the client doesn’t have any
        objects.
 
     """
     def ack(self, sha): pass
 
     def next(self): pass
 
     def __next__(self): pass
 
 
-class GitSha1RemoteReader(BulkUpdater):
-    """Read sha1 git from a remote repository and dump only repository's
-       content sha1 as list.
-
-    """
+class BaseGitRemoteReader(BulkUpdater):
     CONFIG_BASE_FILENAME = 'loader/git-remote-reader'
 
     ADDITIONAL_CONFIG = {
         'pack_size_bytes': ('int', 4 * 1024 * 1024 * 1024),
         'pack_storage_base': ('str', ''),  # don't want to store packs so empty
         'next_task': (
             'dict', {
                 'queue': 'swh.storage.archiver.tasks.SWHArchiverToBackendTask',
                 'batch_size': 100,
                 'destination': 'azure'
             }
         )
     }
 
     def __init__(self):
         super().__init__(SWHRepoFullRepresentation)
         self.next_task = self.config['next_task']
         self.batch_size = self.next_task['batch_size']
         self.task_destination = self.next_task['queue']
         self.destination = self.next_task['destination']
 
     def graph_walker(self):
         return DummyGraphWalker()
 
     def prepare(self, origin_url, base_url=None):
         """Only retrieve information about the origin, set everything else to
            empty.
 
         """
-        ori = converters.origin_url_to_origin(origin_url)
-        self.origin = self.storage.origin_get(ori)
-        self.origin_id = self.origin['id']
+        self.origin = converters.origin_url_to_origin(origin_url)
+        self.origin_id = 0
         self.base_occurrences = []
-        self.base_origin_id = self.origin['id']
+        self.base_origin_id = 0
+
+    def keep_object(self, obj):
+        """Do we want to keep this object or not?"""
+        raise NotImplementedError('Please implement keep_object')
+
+    def get_id_and_data(self, obj):
+        """Get the id, type and data of the given object"""
+        raise NotImplementedError('Please implement get_id_and_data')
 
     def list_pack(self, pack_data, pack_size):
         """Override list_pack to only keep contents' sha1.
 
         Returns:
             id_to_type (dict): keys are sha1, values are their associated type
             type_to_ids (dict): keys are types, values are list of associated
-            ids (sha1 for blobs)
+            data (sha1 for blobs)
 
         """
+        self.data = {}
         id_to_type = {}
         type_to_ids = defaultdict(set)
 
         inflater = self.get_inflater()
 
         for obj in inflater:
-            type = obj.type_name
-            if type != b'blob':  # don't keep other types
+            if not self.keep_object(obj):
                 continue
 
-            # compute the sha1 (obj.id is the sha1_git)
-            data = obj.as_raw_string()
-            hashes = hashutil.hashdata(data, {'sha1'})
-            oid = hashes['sha1']
+            object_id, type, data = self.get_id_and_data(obj)
 
-            id_to_type[oid] = type
-            type_to_ids[type].add(oid)
+            id_to_type[object_id] = type
+            type_to_ids[type].add(object_id)
+            self.data[object_id] = data
 
         return id_to_type, type_to_ids
 
     def load(self, *args, **kwargs):
         """Override the loading part which simply reads the repository's
            contents' sha1.
 
         Returns:
             Returns the list of discovered sha1s for that origin.
 
         """
-        try:
-            self.prepare(*args, **kwargs)
-        except:
-            self.log.error('Unknown repository, skipping...')
-            return []
-
+        self.prepare(*args, **kwargs)
         self.fetch_data()
-        return self.type_to_ids[b'blob']
+
+
+class GitSha1RemoteReader(BaseGitRemoteReader):
+    """Read sha1 git from a remote repository and dump only repository's
+       content sha1 as list.
+
+    """
+    def keep_object(self, obj):
+        """Only keep blobs"""
+        return obj.type_name == b'blob'
+
+    def get_id_and_data(self, obj):
+        """We want to store only object identifiers"""
+        # compute the sha1 (obj.id is the sha1_git)
+        data = obj.as_raw_string()
+        hashes = hashutil.hashdata(data, {'sha1'})
+        oid = hashes['sha1']
+        return (oid, b'blob', oid)
 
 
 class GitSha1RemoteReaderAndSendToQueue(GitSha1RemoteReader):
     """Read sha1 git from a remote repository and dump only repository's
        content sha1 as list and send batch of those sha1s to a celery
        queue for consumption.
 
     """
     def load(self, *args, **kwargs):
         """Retrieve the list of sha1s for a particular origin and send those
            sha1s as group of sha1s to a specific queue.
 
         """
-        data = super().load(*args, **kwargs)
+        super().load(*args, **kwargs)
+
+        data = self.type_to_ids[b'blob']
 
         from swh.scheduler.celery_backend.config import app
         try:
             # optional dependency
             from swh.storage.archiver import tasks  # noqa
         except ImportError:
             pass
         from celery import group
 
         task_destination = app.tasks[self.task_destination]
         groups = []
         for ids in utils.grouper(data, self.batch_size):
             sig_ids = task_destination.s(destination=self.destination,
                                          batch=list(ids))
             groups.append(sig_ids)
         group(groups).delay()
         return data
 
 
-@click.command()
-@click.option('--origin-url', help='Origin\'s url')
-@click.option('--send/--nosend', default=False, help='Origin\'s url')
-def main(origin_url, send):
+class GitCommitRemoteReader(BaseGitRemoteReader):
+    def keep_object(self, obj):
+        return obj.type_name == b'commit'
+
+    def get_id_and_data(self, obj):
+        return obj.id, b'commit', converters.dulwich_commit_to_revision(obj)
+
+    def load(self, *args, **kwargs):
+        super().load(*args, **kwargs)
+        return self.data
+
+
+@click.group()
+@click.option('--origin-url', help='Origin url')
+@click.pass_context
+def main(ctx, origin_url):
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s %(process)d %(message)s'
     )
+    ctx.obj['origin_url'] = origin_url
+
+
+@main.command()
+@click.option('--send/--nosend', default=False, help='Origin\'s url')
+@click.pass_context
+def blobs(ctx, send):
+    origin_url = ctx.obj['origin_url']
 
     if send:
         loader = GitSha1RemoteReaderAndSendToQueue()
         ids = loader.load(origin_url)
         print('%s sha1s were sent to queue' % len(ids))
         return
 
     loader = GitSha1RemoteReader()
     ids = loader.load(origin_url)
 
     if ids:
         for oid in ids:
             print(hashutil.hash_to_hex(oid))
 
 
+@main.command()
+@click.option('--ids-only', is_flag=True, help='print ids only')
+@click.pass_context
+def commits(ctx, ids_only):
+    origin_url = ctx.obj['origin_url']
+
+    reader = GitCommitRemoteReader()
+    commits = reader.load(origin_url)
+    for commit_id, commit in commits.items():
+        if ids_only:
+            print(commit_id.decode())
+        else:
+            pprint.pprint(commit)
+
+
 if __name__ == '__main__':
-    main()
+    main(obj={})
diff --git a/swh/loader/git/utils.py b/swh/loader/git/utils.py
index ac4bd49..e5b2825 100644
--- a/swh/loader/git/utils.py
+++ b/swh/loader/git/utils.py
@@ -1,48 +1,49 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import shutil
 import tempfile
 
 from subprocess import call
 
 
-def init_git_repo_from_archive(archive_path, root_temp_dir='/tmp'):
+def init_git_repo_from_archive(project_name, archive_path,
+                               root_temp_dir='/tmp'):
     """Given a path to an archive containing a git repository.
+
     Uncompress that archive to a temporary location and returns the path.
 
     If any problem whatsoever is raised, clean up the temporary location.
 
-    Returns:
+    Args:
+        project_name (str): Project's name
+        archive_path (str): Full path to the archive
+        root_temp_dir (str): Optional temporary directory mount point
+                             (default to /tmp)
+
+    Returns
         A tuple:
         - temporary folder: containing the mounted repository
         - repo_path, path to the mounted repository inside the temporary folder
 
-    Raises:
+    Raises
         ValueError in case of failure to run the command to uncompress
 
     """
-    project_name = os.path.basename(os.path.dirname(archive_path))
-    temp_dir = tempfile.mkdtemp(suffix='.swh.loader.git',
-                                prefix='tmp.',
-                                dir=root_temp_dir)
+    temp_dir = tempfile.mkdtemp(
+        suffix='.swh.loader.git', prefix='tmp.', dir=root_temp_dir)
 
     try:
-        repo_path = os.path.join(temp_dir, project_name)
-
         # create the repository that will be loaded with the dump
-        cmd = ['unzip', '-q', '-o', archive_path, '-d', temp_dir]
-        r = call(cmd)
-
+        r = call(['unzip', '-q', '-o', archive_path, '-d', temp_dir])
         if r != 0:
-            raise ValueError(
-                'Failed to uncompress git repository for %s' %
-                project_name)
+            raise ValueError('Failed to uncompress archive %s' % archive_path)
 
+        repo_path = os.path.join(temp_dir, project_name)
         return temp_dir, repo_path
     except Exception as e:
         shutil.rmtree(temp_dir)
         raise e
diff --git a/version.txt b/version.txt
index 7f0ac6c..b39233c 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.28-0-g1e8df3b
\ No newline at end of file
+v0.0.29-0-gdbd5ca3
\ No newline at end of file