diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 1755953..040b57b 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,427 +1,484 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
 from swh.storage import get_storage
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.model import hashutil
 from swh.scheduler.utils import get_task
 from swh.indexer.storage import get_indexer_storage, INDEXER_CFG_KEY
 
 
 class DiskIndexer:
     """Mixin intended to be used with other SomethingIndexer classes.
 
        Indexers inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Note:
            This expects `self.working_directory` variable defined at
            runtime.
 
     """
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the :func:`run` function which is in
     charge of triggering the computations on the batch dict/ids
     received.
 
     Indexers can:
 
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in
       storage.
 
     To implement a new object type indexer, inherit from the
-    BaseIndexer and implement the process of indexation:
+    BaseIndexer and implement indexing:
 
     :func:`run`:
       object_ids are different depending on object. For example: sha1 for
       content, sha1_git for revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level
-    classes: :class:`ContentIndexer`, :class:`RevisionIndexer` (later
-    on :class:`OriginIndexer` will also be available)
+    classes: :class:`ContentIndexer`, :class:`RevisionIndexer`,
+    :class:`OriginIndexer`.
 
     Then you need to implement the following functions:
 
     :func:`filter`:
       filter out data already indexed (in storage). This function is used by
       the orchestrator and not directly by the indexer
       (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
     :func:`index_object`:
       compute index on id with data (retrieved from the storage or the
       objstorage by the id key) and return the resulting index computation.
 
     :func:`persist_index_computations`:
       persist the results of multiple index computations in the storage.
 
     The new indexer implementation can also override the following functions:
 
     :func:`prepare`:
       Configuration preparation for the indexer.  When overriding, this must
       call the `super().prepare()` instruction.
 
     :func:`check`:
       Configuration check for the indexer.  When overriding, this must call the
       `super().check()` instruction.
 
     :func:`register_tools`:
       This should return a dict of the tool(s) to use when indexing or
       filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
         INDEXER_CFG_KEY: ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5007/'
             }
         }),
 
         # queue to reschedule if problem (none for no rescheduling,
         # the default)
         'rescheduling_task': ('str', None),
         'storage': ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5002/',
             }
         }),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         if self.config['storage']:
             self.storage = get_storage(**self.config['storage'])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         idx_storage = self.config[INDEXER_CFG_KEY]
         self.idx_storage = get_indexer_storage(**idx_storage)
         rescheduling_task = self.config['rescheduling_task']
         if rescheduling_task:
             self.rescheduling_task = get_task(rescheduling_task)
         else:
             self.rescheduling_task = None
 
         _log = logging.getLogger('requests.packages.urllib3.connectionpool')
         _log.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = list(self.register_tools(self.config['tools']))
 
     def check(self):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.tools)
 
     def _prepare_tool(self, tool):
         """Prepare the tool dict to be compliant with the storage api.
 
         """
         return {'tool_%s' % key: value for key, value in tool.items()}
 
     def register_tools(self, tools):
         """Permit to register tools to the storage.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
            Expects the self.config['tools'] property to be set with
            one or more tools.
 
         Args:
             tools (dict/[dict]): Either a dict or a list of dict.
 
         Returns:
             List of dict with additional id key.
 
         Raises:
             ValueError if not a list nor a dict.
 
         """
         tools = self.config['tools']
         if isinstance(tools, list):
             tools = map(self._prepare_tool, tools)
         elif isinstance(tools, dict):
             tools = [self._prepare_tool(tools)]
         else:
             raise ValueError('Configuration tool(s) must be a dict or list!')
 
         return self.idx_storage.indexer_configuration_add(tools)
 
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the id and associated raw data.
 
         Args:
             id (bytes): identifier
             data (bytes): id's data from storage or objstorage depending on
                              object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
 
             results ([result]): List of results. One result is the
                                 result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
                                 by index function.
 
         Returns:
             None
 
         """
         pass
 
     @abc.abstractmethod
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
-    implements the process of indexation for Contents using the run
-    method
+    implements Content indexing using the run method
 
     Note: the :class:`ContentIndexer` is not an instantiable
     object. To use it in another context, one should inherit from this
     class and override the methods mentioned in the
     :class:`BaseIndexer` class.
 
     """
 
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         try:
             for sha1 in ids:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.next_step(results)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(ids, policy_update)
 
 
+class OriginIndexer(BaseIndexer):
+    """An object type indexer, inherits from the :class:`BaseIndexer` and
+    implements Origin indexing using the run method
+
+    Note: the :class:`OriginIndexer` is not an instantiable object.
+    To use it in another context one should inherit from this class
+    and override the methods mentioned in the :class:`BaseIndexer`
+    class.
+
+    """
+    def run(self, ids, policy_update, parse_ids=False):
+        """Given a list of origin ids:
+
+        - retrieve origins from storage
+        - execute the indexing computations
+        - store the results (according to policy_update)
+
+        Args:
+            ids ([Union[int, Tuple[str, bytes]]]): list of origin ids or
+                                                   (type, url) tuples.
+            policy_update ([str]): either 'update-dups' or 'ignore-dups' to
+                                   respectively update duplicates or ignore
+                                   them
+            parse_ids ([bool]: If `True`, will try to convert `ids`
+                               from a human input to the valid type.
+
+        """
+        if parse_ids:
+            ids = [
+                    o.split('+', 1) if ':' in o else int(o)  # type+url or id
+                    for o in ids]
+
+        results = []
+
+        for id_ in ids:
+            if isinstance(id_, (tuple, list)):
+                if len(id_) != 2:
+                    raise TypeError('Expected a (type, url) tuple.')
+                (type_, url) = id_
+                params = {'type': type_, 'url': url}
+            elif isinstance(id_, int):
+                params = {'id': id_}
+            else:
+                raise TypeError('Invalid value for "ids": %r' % id_)
+            origin = self.storage.origin_get(params)
+            if not origin:
+                self.log.warn('Origins %s not found in storage' %
+                              list(ids))
+                continue
+            try:
+                res = self.index(origin)
+                if origin:  # If no results, skip it
+                    results.append(res)
+            except Exception:
+                self.log.exception(
+                        'Problem when processing origin %s' % id_)
+        self.persist_index_computations(results, policy_update)
+
+
 class RevisionIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
-    implements the process of indexation for Revisions using the run
-    method
+    implements Revision indexing using the run method
 
     Note: the :class:`RevisionIndexer` is not an instantiable object.
     To use it in another context one should inherit from this class
     and override the methods mentioned in the :class:`BaseIndexer`
     class.
 
     """
     def run(self, ids, policy_update):
         """Given a list of sha1_gits:
 
         - retrieve revisions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1_git's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         revs = self.storage.revision_get(ids)
 
         for rev in revs:
             if not rev:
                 self.log.warn('Revisions %s not found in storage' %
                               list(map(hashutil.hash_to_hex, ids)))
                 continue
             try:
                 res = self.index(rev)
                 if res:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing revision')
         self.persist_index_computations(results, policy_update)
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index 8adff82..f2121b0 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,283 +1,280 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.storage import INDEXER_CFG_KEY
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         super().__init__()
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             context = self.tool['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(context, data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
         except Exception:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """can be called only if run method was called before
 
         Returns:
             list: list of content_metadata entries calculated by
                   current indexer
 
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containing metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     ContentMetadataIndexer = ContentMetadataIndexer
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (bytes): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
                 - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tool['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             INDEXER_CFG_KEY: self.idx_storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = self.ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.idx_storage.content_metadata_get(
                 detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
 @click.option('--revs', '-i',
-              default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
-                       '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
-                       '9699072e21eded4be8d45e3b8d543952533fa190'],
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
new file mode 100644
index 0000000..86c622d
--- /dev/null
+++ b/swh/indexer/origin_head.py
@@ -0,0 +1,163 @@
+# Copyright (C) 2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+import click
+import logging
+
+from swh.indexer.indexer import OriginIndexer
+
+
+class OriginHeadIndexer(OriginIndexer):
+    """Origin-level indexer.
+
+    This indexer is in charge of looking up the revision that acts as the
+    "head" of an origin.
+
+    In git, this is usually the commit pointed to by the 'master' branch."""
+
+    ADDITIONAL_CONFIG = {
+        'tools': ('dict', {
+            'name': 'origin-metadata',
+            'version': '0.0.1',
+            'configuration': {},
+        }),
+    }
+
+    def filter(self, ids):
+        yield from ids
+
+    def persist_index_computations(self, results, policy_update):
+        """Do nothing. The indexer's results are not persistant, they
+        should only be piped to another indexer via the orchestrator."""
+        pass
+
+    # Dispatch
+
+    def index(self, origin):
+        origin_id = origin['id']
+        latest_snapshot = self.storage.snapshot_get_latest(origin_id)
+        method = getattr(self, '_try_get_%s_head' % origin['type'], None)
+        if method is None:
+            method = self._try_get_head_generic
+        rev_id = method(latest_snapshot)
+        if rev_id is None:
+            return None
+        result = {
+                'origin_id': origin_id,
+                'revision_id': rev_id,
+                }
+        return result
+
+    # VCSs
+
+    def _try_get_vcs_head(self, snapshot):
+        try:
+            if isinstance(snapshot, dict):
+                branches = snapshot['branches']
+                if branches[b'HEAD']['target_type'] == 'revision':
+                    return branches[b'HEAD']['target']
+        except KeyError:
+            return None
+
+    _try_get_hg_head = _try_get_git_head = _try_get_vcs_head
+
+    # Tarballs
+
+    _archive_filename_re = re.compile(
+            b'^'
+            b'(?P<pkgname>.*)[-_]'
+            b'(?P<version>[0-9]+(\.[0-9])*)'
+            b'(?P<preversion>[-+][a-zA-Z0-9.~]+?)?'
+            b'(?P<extension>(\.[a-zA-Z0-9]+)+)'
+            b'$')
+
+    @classmethod
+    def _parse_version(cls, filename):
+        """Extracts the release version from an archive filename,
+        to get an ordering whose maximum is likely to be the last
+        version of the software
+
+        >>> OriginHeadIndexer._parse_version(b'foo')
+        (-inf,)
+        >>> OriginHeadIndexer._parse_version(b'foo.tar.gz')
+        (-inf,)
+        >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1.tar.gz')
+        (0, 0, 1, 0)
+        >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1-beta2.tar.gz')
+        (0, 0, 1, -1, 'beta2')
+        >>> OriginHeadIndexer._parse_version(b'gnu-hello-0.0.1+foobar.tar.gz')
+        (0, 0, 1, 1, 'foobar')
+        """
+        res = cls._archive_filename_re.match(filename)
+        if res is None:
+            return (float('-infinity'),)
+        version = [int(n) for n in res.group('version').decode().split('.')]
+        if res.group('preversion') is None:
+            version.append(0)
+        else:
+            preversion = res.group('preversion').decode()
+            if preversion.startswith('-'):
+                version.append(-1)
+                version.append(preversion[1:])
+            elif preversion.startswith('+'):
+                version.append(1)
+                version.append(preversion[1:])
+            else:
+                assert False, res.group('preversion')
+        return tuple(version)
+
+    def _try_get_ftp_head(self, snapshot):
+        archive_names = list(snapshot['branches'])
+        max_archive_name = max(archive_names, key=self._parse_version)
+        r = self._try_resolve_target(snapshot['branches'], max_archive_name)
+        return r
+
+    # Generic
+
+    def _try_get_head_generic(self, snapshot):
+        # Works on 'deposit', 'svn', and 'pypi'.
+        try:
+            if isinstance(snapshot, dict):
+                branches = snapshot['branches']
+        except KeyError:
+            return None
+        else:
+            return (
+                    self._try_resolve_target(branches, b'HEAD') or
+                    self._try_resolve_target(branches, b'master')
+                    )
+
+    def _try_resolve_target(self, branches, target_name):
+        try:
+            target = branches[target_name]
+            while target['target_type'] == 'alias':
+                target = branches[target['target']]
+            if target['target_type'] == 'revision':
+                return target['target']
+            elif target['target_type'] == 'content':
+                return None  # TODO
+            elif target['target_type'] == 'directory':
+                return None  # TODO
+            elif target['target_type'] == 'release':
+                return None  # TODO
+            else:
+                assert False
+        except KeyError:
+            return None
+
+
+@click.command()
+@click.option('--origins', '-i',
+              help='Origins to lookup, in the "type+url" format',
+              multiple=True)
+def main(origins):
+    rev_metadata_indexer = OriginHeadIndexer()
+    rev_metadata_indexer.run(origins, 'update-dups', parse_ids=True)
+
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.INFO)
+    main()
diff --git a/swh/indexer/tests/test_origin_head.py b/swh/indexer/tests/test_origin_head.py
new file mode 100644
index 0000000..62912ab
--- /dev/null
+++ b/swh/indexer/tests/test_origin_head.py
@@ -0,0 +1,213 @@
+# Copyright (C) 2017  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+import logging
+from nose.tools import istest
+
+from swh.indexer.origin_head import OriginHeadIndexer
+from swh.indexer.tests.test_utils import MockIndexerStorage
+
+ORIGINS = [
+        {
+            'id': 52189575,
+            'lister': None,
+            'project': None,
+            'type': 'git',
+            'url': 'https://github.com/SoftwareHeritage/swh-storage'},
+        {
+            'id': 4423668,
+            'lister': None,
+            'project': None,
+            'type': 'ftp',
+            'url': 'rsync://ftp.gnu.org/gnu/3dldf'},
+        {
+            'id': 77775770,
+            'lister': None,
+            'project': None,
+            'type': 'deposit',
+            'url': 'https://forge.softwareheritage.org/source/jesuisgpl/'},
+        {
+            'id': 85072327,
+            'lister': None,
+            'project': None,
+            'type': 'pypi',
+            'url': 'https://pypi.org/project/limnoria/'},
+        {
+            'id': 49908349,
+            'lister': None,
+            'project': None,
+            'type': 'svn',
+            'url': 'http://0-512-md.googlecode.com/svn/'},
+        ]
+
+SNAPSHOTS = {
+        52189575: {
+            'branches': {
+                b'refs/heads/add-revision-origin-cache': {
+                    'target': b'L[\xce\x1c\x88\x8eF\t\xf1"\x19\x1e\xfb\xc0'
+                              b's\xe7/\xe9l\x1e',
+                    'target_type': 'revision'},
+                b'HEAD': {
+                    'target': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{\xd7}'
+                              b'\xac\xefrm',
+                    'target_type': 'revision'},
+                b'refs/tags/v0.0.103': {
+                    'target': b'\xb6"Im{\xfdLb\xb0\x94N\xea\x96m\x13x\x88+'
+                              b'\x0f\xdd',
+                    'target_type': 'release'},
+                }},
+        4423668: {
+            'branches': {
+                b'3DLDF-1.1.4.tar.gz': {
+                    'target': b'dJ\xfb\x1c\x91\xf4\x82B%]6\xa2\x90|\xd3\xfc'
+                              b'"G\x99\x11',
+                    'target_type': 'revision'},
+                b'3DLDF-2.0.2.tar.gz': {
+                    'target': b'\xb6\x0e\xe7\x9e9\xac\xaa\x19\x9e='
+                              b'\xd1\xc5\x00\\\xc6\xfc\xe0\xa6\xb4V',
+                    'target_type': 'revision'},
+                b'3DLDF-2.0.3-examples.tar.gz': {
+                    'target': b'!H\x19\xc0\xee\x82-\x12F1\xbd\x97'
+                              b'\xfe\xadZ\x80\x80\xc1\x83\xff',
+                    'target_type': 'revision'},
+                b'3DLDF-2.0.3.tar.gz': {
+                    'target': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
+                              b'\xcc\x1a\xb4`\x8c\x8by',
+                    'target_type': 'revision'},
+                b'3DLDF-2.0.tar.gz': {
+                    'target': b'F6*\xff(?\x19a\xef\xb6\xc2\x1fv$S\xe3G'
+                              b'\xd3\xd1m',
+                    b'target_type': 'revision'}
+                }},
+        77775770: {
+            'branches': {
+                b'master': {
+                    'target': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
+                              b'\xa6\xe9\x99\xb1\x9e]q\xeb',
+                    'target_type': 'revision'}
+            },
+            'id': b"h\xc0\xd2a\x04\xd4~'\x8d\xd6\xbe\x07\xeda\xfa\xfbV"
+                  b"\x1d\r "},
+        85072327: {
+            'branches': {
+                b'HEAD': {
+                    'target': b'releases/2018.09.09',
+                    'target_type': 'alias'},
+                b'releases/2018.09.01': {
+                    'target': b'<\xee1(\xe8\x8d_\xc1\xc9\xa6rT\xf1\x1d'
+                              b'\xbb\xdfF\xfdw\xcf',
+                    'target_type': 'revision'},
+                b'releases/2018.09.09': {
+                    'target': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
+                              b'A\x10\x9d\xc5\xfa2\xf8t',
+                    'target_type': 'revision'}},
+            'id': b'{\xda\x8e\x84\x7fX\xff\x92\x80^\x93V\x18\xa3\xfay'
+                  b'\x12\x9e\xd6\xb3'},
+        49908349: {
+                'branches': {
+                    b'master': {
+                        'target': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
+                                  b'\xc9\xad#.\x1bw=\x18',
+                        'target_type': 'revision'}},
+                'id': b'\xa1\xa2\x8c\n\xb3\x87\xa8\xf9\xe0a\x8c\xb7'
+                      b'\x05\xea\xb8\x1f\xc4H\xf4s'},
+        }
+
+
+class MockStorage:
+    def origin_get(self, id_):
+        for origin in ORIGINS:
+            if origin['type'] == id_['type'] and origin['url'] == id_['url']:
+                return origin
+        assert False, id_
+
+    def snapshot_get_latest(self, origin_id):
+        if origin_id in SNAPSHOTS:
+            return SNAPSHOTS[origin_id]
+        else:
+            assert False, origin_id
+
+
+class TestOriginHeadIndexer(OriginHeadIndexer):
+    """Specific indexer whose configuration is enough to satisfy the
+       indexing tests.
+    """
+    def prepare(self):
+        self.config = {
+            'tools': {
+                'name': 'origin-metadata',
+                'version': '0.0.1',
+                'configuration': {},
+            },
+        }
+        self.storage = MockStorage()
+        self.idx_storage = MockIndexerStorage()
+        self.log = logging.getLogger('swh.indexer')
+        self.objstorage = None
+        self.tools = self.register_tools(self.config['tools'])
+        self.tool = self.tools[0]
+        self.results = None
+
+    def persist_index_computations(self, results, policy_update):
+        self.results = results
+
+
+class OriginHead(unittest.TestCase):
+    @istest
+    def test_git(self):
+        indexer = TestOriginHeadIndexer()
+        indexer.run(
+                ['git+https://github.com/SoftwareHeritage/swh-storage'],
+                'update-dups', parse_ids=True)
+        self.assertEqual(indexer.results, [{
+            'revision_id': b'8K\x12\x00d\x03\xcc\xe4]bS\xe3\x8f{'
+                           b'\xd7}\xac\xefrm',
+            'origin_id': 52189575}])
+
+    @istest
+    def test_ftp(self):
+        indexer = TestOriginHeadIndexer()
+        indexer.run(
+                ['ftp+rsync://ftp.gnu.org/gnu/3dldf'],
+                'update-dups', parse_ids=True)
+        self.assertEqual(indexer.results, [{
+            'revision_id': b'\x8e\xa9\x8e/\xea}\x9feF\xf4\x9f\xfd\xee'
+                           b'\xcc\x1a\xb4`\x8c\x8by',
+            'origin_id': 4423668}])
+
+    @istest
+    def test_deposit(self):
+        indexer = TestOriginHeadIndexer()
+        indexer.run(
+                ['deposit+https://forge.softwareheritage.org/source/'
+                 'jesuisgpl/'],
+                'update-dups', parse_ids=True)
+        self.assertEqual(indexer.results, [{
+            'revision_id': b'\xe7n\xa4\x9c\x9f\xfb\xb7\xf76\x11\x08{'
+                           b'\xa6\xe9\x99\xb1\x9e]q\xeb',
+            'origin_id': 77775770}])
+
+    @istest
+    def test_pypi(self):
+        indexer = TestOriginHeadIndexer()
+        indexer.run(
+                ['pypi+https://pypi.org/project/limnoria/'],
+                'update-dups', parse_ids=True)
+        self.assertEqual(indexer.results, [{
+            'revision_id': b'\x83\xb9\xb6\xc7\x05\xb1%\xd0\xfem\xd8k'
+                           b'A\x10\x9d\xc5\xfa2\xf8t',
+            'origin_id': 85072327}])
+
+    @istest
+    def test_svn(self):
+        indexer = TestOriginHeadIndexer()
+        indexer.run(
+                ['svn+http://0-512-md.googlecode.com/svn/'],
+                'update-dups', parse_ids=True)
+        self.assertEqual(indexer.results, [{
+            'revision_id': b'\xe4?r\xe1,\x88\xab\xec\xe7\x9a\x87\xb8'
+                           b'\xc9\xad#.\x1bw=\x18',
+            'origin_id': 49908349}])
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
index a1090d2..4a0f1c5 100644
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -1,260 +1,269 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from swh.objstorage.exc import ObjNotFoundError
 
 
 class MockObjStorage:
     """Mock an swh-objstorage objstorage with predefined contents.
 
     """
     data = {}
 
     def __init__(self):
         self.data = {
             '01c9379dfc33803963d07c1ccc748d3fe4c96bb5': b'this is some text',
             '688a5ef812c53907562fe379d4b3851e69c7cb15': b'another text',
             '8986af901dd2043044ce8f0d8fc039153641cf17': b'yet another text',
             '02fb2c89e14f7fab46701478c83779c7beb7b069': b"""
             import unittest
             import logging
             from swh.indexer.mimetype import ContentMimetypeIndexer
             from swh.indexer.tests.test_utils import MockObjStorage
 
             class MockStorage():
                 def content_mimetype_add(self, mimetypes):
                     self.state = mimetypes
                     self.conflict_update = conflict_update
 
                 def indexer_configuration_add(self, tools):
                     return [{
                         'id': 10,
                     }]
             """,
             '103bc087db1d26afc3a0283f38663d081e9b01e6': b"""
                 #ifndef __AVL__
                 #define __AVL__
 
                 typedef struct _avl_tree avl_tree;
 
                 typedef struct _data_t {
                   int content;
                 } data_t;
             """,
             '93666f74f1cf635c8c8ac118879da6ec5623c410': b"""
             (should 'pygments (recognize 'lisp 'easily))
 
             """,
             '26a9f72a7c87cc9205725cfd879f514ff4f3d8d5': b"""
             {
                 "name": "test_metadata",
                 "version": "0.0.1",
                 "description": "Simple package.json test for indexer",
                 "repository": {
                   "type": "git",
                   "url": "https://github.com/moranegg/metadata_test"
               }
             }
             """,
             'd4c647f0fc257591cc9ba1722484229780d1c607': b"""
             {
               "version": "5.0.3",
               "name": "npm",
               "description": "a package manager for JavaScript",
               "keywords": [
                 "install",
                 "modules",
                 "package manager",
                 "package.json"
               ],
               "preferGlobal": true,
               "config": {
                 "publishtest": false
               },
               "homepage": "https://docs.npmjs.com/",
               "author": "Isaac Z. Schlueter <i@izs.me> (http://blog.izs.me)",
               "repository": {
                 "type": "git",
                 "url": "https://github.com/npm/npm"
               },
               "bugs": {
                 "url": "https://github.com/npm/npm/issues"
               },
               "dependencies": {
                 "JSONStream": "~1.3.1",
                 "abbrev": "~1.1.0",
                 "ansi-regex": "~2.1.1",
                 "ansicolors": "~0.3.2",
                 "ansistyles": "~0.1.3"
               },
               "devDependencies": {
                 "tacks": "~1.2.6",
                 "tap": "~10.3.2"
               },
               "license": "Artistic-2.0"
             }
 
             """,
             'a7ab314d8a11d2c93e3dcf528ca294e7b431c449': b"""
             """,
             'da39a3ee5e6b4b0d3255bfef95601890afd80709': b'',
         }
 
     def __iter__(self):
         yield from self.data.keys()
 
     def __contains__(self, sha1):
         return self.data.get(sha1) is not None
 
     def get(self, sha1):
         raw_content = self.data.get(sha1)
         if raw_content is None:
             raise ObjNotFoundError(sha1)
         return raw_content
 
 
 class MockIndexerStorage():
     """Mock an swh-indexer storage.
 
     """
     def indexer_configuration_add(self, tools):
         tool = tools[0]
         if tool['tool_name'] == 'swh-metadata-translator':
             return [{
                 'id': 30,
                 'tool_name': 'swh-metadata-translator',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
         elif tool['tool_name'] == 'swh-metadata-detector':
             return [{
                 'id': 7,
                 'tool_name': 'swh-metadata-detector',
                 'tool_version': '0.0.1',
                 'tool_configuration': {
                     'type': 'local',
                     'context': 'npm'
                 },
             }]
+        elif tool['tool_name'] == 'origin-metadata':
+            return [{
+                'id': 8,
+                'tool_name': 'origin-metadata',
+                'tool_version': '0.0.1',
+                'tool_configuration': {},
+            }]
+        else:
+            assert False, 'Unknown tool {tool_name}'.format(**tool)
 
     def content_metadata_missing(self, sha1s):
         yield from []
 
     def content_metadata_add(self, metadata, conflict_update=None):
         self.state = metadata
         self.conflict_update = conflict_update
 
     def revision_metadata_add(self, metadata, conflict_update=None):
         self.state = metadata
         self.conflict_update = conflict_update
 
     def content_metadata_get(self, sha1s):
         return [{
             'tool': {
                 'configuration': {
                     'type': 'local',
                     'context': 'npm'
                     },
                 'version': '0.0.1',
                 'id': 6,
                 'name': 'swh-metadata-translator'
             },
             'id': b'cde',
             'translated_metadata': {
                 'issueTracker': {
                     'url': 'https://github.com/librariesio/yarn-parser/issues'
                 },
                 'version': '1.0.0',
                 'name': 'yarn-parser',
                 'author': 'Andrew Nesbitt',
                 'url': 'https://github.com/librariesio/yarn-parser#readme',
                 'processorRequirements': {'node': '7.5'},
                 'other': {
                     'scripts': {
                                     'start': 'node index.js'
                     },
                     'main': 'index.js'
                 },
                 'license': 'AGPL-3.0',
                 'keywords': ['yarn', 'parse', 'lock', 'dependencies'],
                 'codeRepository': {
                     'type': 'git',
                     'url': 'git+https://github.com/librariesio/yarn-parser.git'
                 },
                 'description': 'Tiny web service for parsing yarn.lock files',
                 'softwareRequirements': {
                     'yarn': '^0.21.0',
                     'express': '^4.14.0',
                     'body-parser': '^1.15.2'}
                 }
         }]
 
 
 class MockStorage():
     """Mock a real swh-storage storage to simplify reading indexers'
     outputs.
 
     """
     def revision_get(self, revisions):
         return [{
             'id': b'8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
             'committer': {
                 'id': 26,
                 'name': b'Andrew Nesbitt',
                 'fullname': b'Andrew Nesbitt <andrewnez@gmail.com>',
                 'email': b'andrewnez@gmail.com'
             },
             'synthetic': False,
             'date': {
                 'negative_utc': False,
                 'timestamp': {
                     'seconds': 1487596456,
                     'microseconds': 0
                 },
                 'offset': 0
             },
             'directory': b'10'
         }]
 
     def directory_ls(self, directory, recursive=False, cur=None):
         # with directory: b'\x9d',
         return [{
                 'sha1_git': b'abc',
                 'name': b'index.js',
                 'target': b'abc',
                 'length': 897,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'bcd'
                 },
                 {
                 'sha1_git': b'aab',
                 'name': b'package.json',
                 'target': b'aab',
                 'length': 712,
                 'status': 'visible',
                 'type': 'file',
                 'perms': 33188,
                 'dir_id': b'10',
                 'sha1': b'cde'
                 },
                 {
                 'dir_id': b'10',
                 'target': b'11',
                 'type': 'dir',
                 'length': None,
                 'name': b'.github',
                 'sha1': None,
                 'perms': 16384,
                 'sha1_git': None,
                 'status': None,
                 'sha256': None
                 }]