diff --git a/debian/control b/debian/control
index 65661f8..0f723a2 100644
--- a/debian/control
+++ b/debian/control
@@ -1,24 +1,25 @@
 Source: swh-indexer
 Maintainer: Software Heritage developers <swh-devel@inria.fr>
 Section: python
 Priority: optional
 Build-Depends: debhelper (>= 9),
                dh-python,
                python3-all,
                python3-nose,
                python3-setuptools,
                python3-swh.core (>= 0.0.27~),
+               python3-swh.model (>= 0.0.13~),
                python3-swh.storage (>= 0.0.75~),
                python3-swh.objstorage (>= 0.0.13~),
                python3-swh.scheduler (>= 0.0.9~),
                python3-chardet (>= 2.3.0~),
                python3-click,
                python3-pygments,
                python3-vcversioner
 Standards-Version: 3.9.6
 Homepage: https://forge.softwareheritage.org/diffusion/78/
 
 Package: python3-swh.indexer
 Architecture: all
 Depends: universal-ctags (>= 0.8~), fossology-nomossa (>= 3.1~), ${misc:Depends}, ${python3:Depends}
 Description: Software Heritage Content Indexer
diff --git a/requirements-swh.txt b/requirements-swh.txt
index afbdf6c..4f37752 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
 swh.core >= 0.0.27
 swh.storage >= 0.0.75
 swh.objstorage >= 0.0.13
 swh.scheduler >= 0.0.9
+swh.model >= 0.0.13
diff --git a/swh/indexer/ctags.py b/swh/indexer/ctags.py
index 6bdd78d..b99c7b2 100644
--- a/swh/indexer/ctags.py
+++ b/swh/indexer/ctags.py
@@ -1,164 +1,164 @@
-# Copyright (C) 2015-2016  The Software Heritage developers
+# Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 import json
 
-from swh.core import hashutil
+from swh.model import hashutil
 
 from .language import compute_language
 from .indexer import BaseIndexer, DiskIndexer
 
 
 # Options used to compute tags
 __FLAGS = [
     '--fields=+lnz',  # +l: language
                       # +n: line number of tag definition
                       # +z: include the symbol's kind (function, variable, ...)
     '--sort=no',      # sort output on tag name
     '--links=no',     # do not follow symlinks
     '--output-format=json',  # outputs in json
 ]
 
 
 def run_ctags(path, lang=None, ctags_command='ctags'):
     """Run ctags on file path with optional language.
 
     Args:
         path: path to the file
         lang: language for that path (optional)
 
     Returns:
         ctags' output
 
     """
     optional = []
     if lang:
         optional = ['--language-force=%s' % lang]
 
     cmd = [ctags_command] + __FLAGS + optional + [path]
     output = subprocess.check_output(cmd, universal_newlines=True)
 
     for symbol in output.split('\n'):
         if not symbol:
             continue
         js_symbol = json.loads(symbol)
         yield {
             'name': js_symbol['name'],
             'kind': js_symbol['kind'],
             'line': js_symbol['line'],
             'lang': js_symbol['language'],
         }
 
 
 class CtagsIndexer(BaseIndexer, DiskIndexer):
     CONFIG_BASE_FILENAME = 'indexer/ctags'
 
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.ctags'),
         'tool': ('dict', {
             'name': 'universal-ctags',
             'version': '~git7859817b',
             'command': '/usr/bin/ctags',
         }),
         'languages': ('dict', {
             'ada': 'Ada',
             'adl': None,
             'agda': None,
             # ...
         })
     }
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
         self.language_map = self.config['languages']
         self.ctags_command = self.config['tool']['command']
         self.tool_name = self.config['tool']['name']
         self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_ctags_missing((
             {
                 'id': sha1,
                 'tool_name': self.tool_name,
                 'tool_version': self.tool_version
             } for sha1 in sha1s
         ))
 
     def index_content(self, sha1, raw_content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             raw_content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
 
         """
         lang = compute_language(raw_content)['lang']
 
         if not lang:
             return None
 
         ctags_lang = self.language_map.get(lang)
 
         if not ctags_lang:
             return None
 
         ctags = {
             'id': sha1,
         }
 
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=raw_content)
 
         result = run_ctags(content_path,
                            lang=ctags_lang,
                            ctags_command=self.ctags_command)
         ctags.update({
             'ctags': list(result),
             'tool_name': self.tool_name,
             'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
 
         return ctags
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - ctags ([dict]): ctags list of symbols
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_ctags_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     r = list(run_ctags(path))
     print(r)
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
index 05ce54b..04ae709 100644
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -1,138 +1,138 @@
-# Copyright (C) 2016  The Software Heritage developers
+# Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
-from swh.core import hashutil
+from swh.model import hashutil
 
 from .indexer import BaseIndexer, DiskIndexer
 
 
 def compute_license(tool, path):
     """Determine license from file at path.
 
     Args:
         path: filepath to determine the license
 
     Returns:
         A dict with the following keys:
         - licenses ([str]): associated detected licenses to path
         - path (bytes): content filepath
         - tool (str): tool used to compute the output
 
     """
     properties = subprocess.check_output([tool, path],
                                          universal_newlines=True)
     if properties:
         res = properties.rstrip().split(' contains license(s) ')
         licenses = res[1].split(',')
 
         return {
             'licenses': licenses,
             'path': path,
         }
 
 
 class ContentFossologyLicenseIndexer(BaseIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {license, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.fossology.license'),
         'tool': ('dict', {
             'name': 'nomos',
             'version': '3.1.0rc2-31-ga2cbb8c',
             'command': '/usr/bin/nomossa',
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/fossology_license'
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
         self.tool = self.config['tool']['command']
         self.tool_name = self.config['tool']['name']
         self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_fossology_license_missing((
             {
                 'id': sha1,
                 'tool_name': self.tool_name,
                 'tool_version': self.tool_version
             } for sha1 in sha1s
         ))
 
     def index_content(self, sha1, content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_license, with keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
 
         """
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=content)
 
         properties = compute_license(self.tool, path=content_path)
         properties.update({
             'id': sha1,
             'tool_name': self.tool_name,
             'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_license, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - license (bytes): license in bytes
               - path (bytes): path
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         wrong_licenses = self.storage.content_fossology_license_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
         if wrong_licenses:
             for l in wrong_licenses:
                 self.log.warn('Content %s has some unknown licenses: %s' % (
                     hashutil.hash_to_hex(l['id']),
                     ','.join((name for name in l['licenses'])))
                 )
 
 
 @click.command(help='Compute license for path using tool')
 @click.option('--tool', default='nomossa', help="Path to tool")
 @click.option('--path', required=1, help="Path to execute index on")
 def main(tool, path):
     print(compute_license(tool, path))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 289c65a..de4337a 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,240 +1,240 @@
-# Copyright (C) 2016  The Software Heritage developers
+# Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
-from swh.core import hashutil
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
+from swh.model import hashutil
 from swh.storage import get_storage
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the `run` functions which is in charge to
     trigger the computations on the sha1s batch receiived as
     parameter.
 
     Indexers can:
     - filter out sha1 whose data has already been indexed.
     - retrieve sha1's content from objstorage, index this content then
       store the result in storage.
 
     Thus the following interface to implement per inheriting class:
       - def filter_contents(self, sha1s): filter out data already
         indexed (in storage)
 
       - def index_content(self, sha1, content): compute index on sha1 with
         data content (stored by sha1 in objstorage) and store result
         in storage.
 
       - def persist_index_computations(self, results, policy_update):
         the function to store the results (as per index_content
         defined).
 
     """
     CONFIG_BASE_FILENAME = 'indexer/base'
 
     DEFAULT_CONFIG = {
         'storage': ('dict', {
             'host': 'uffizi',
             'cls': 'remote',
             'args': {'root': '/tmp/softwareheritage/objects',
                      'slicing': '0:2/2:4/4:6'}
         }),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         super().__init__()
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         storage = self.config['storage']
         self.storage = get_storage(storage['cls'], storage['args'])
         l = logging.getLogger('requests.packages.urllib3.connectionpool')
         l.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
 
     @abc.abstractmethod
     def filter_contents(self, sha1s):
         """Filter missing sha1 for that particular indexer.
 
         Args:
             sha1s ([bytes]): list of contents' sha1
 
         Yields:
             iterator of missing sha1
 
         """
         pass
 
     @abc.abstractmethod
     def index_content(self, sha1, content):
         """Index computation for the sha1 and associated raw content.
 
         Args:
             sha1 (bytes): sha1 identifier
             content (bytes): sha1's raw content
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
             results ([result]): List of results. One result is the
             result of the index_content function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
             by index_content function.
 
         Returns:
             None
 
         """
         pass
 
     def run(self, sha1s, policy_update):
         """Given a list of sha1s:
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             sha1s ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         results = []
         for sha1 in sha1s:
             try:
                 raw_content = self.objstorage.get(sha1)
             except ObjNotFoundError:
                 self.log.warn('Content %s not found in objstorage' %
                               hashutil.hash_to_hex(sha1))
                 continue
             res = self.index_content(sha1, raw_content)
             if res:  # If no results, skip it
                 results.append(res)
 
         self.persist_index_computations(results, policy_update)
         self.next_step(results)
 
 
 class DiskIndexer:
     """Mixin intended to be used with other *Indexer classes.
 
        Indexer* inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Expects:
            self.working_directory variable defined at runtime.
 
     """
     def __init__(self):
         super().__init__()
 
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
index 97fc605..5aec7d7 100644
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -1,153 +1,153 @@
-# Copyright (C) 2016  The Software Heritage developers
+# Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import subprocess
 
-from swh.core import hashutil
+from swh.model import hashutil
 from swh.scheduler.celery_backend.config import app
 
 from .indexer import BaseIndexer, DiskIndexer
 
 
 def compute_mimetype_encoding(path):
     """Determine mimetype and encoding from file at path.
 
     Args:
         path: filepath to determine the mime type
 
     Returns:
         A dict with mimetype and encoding key and corresponding values.
 
     """
     cmd = ['file', '--mime', path]
     properties = subprocess.check_output(cmd)
     if properties:
         res = properties.split(b': ')[1].strip().split(b'; ')
         mimetype = res[0]
         encoding = res[1].split(b'=')[1]
         return {
             'mimetype': mimetype,
             'encoding': encoding
         }
 
 
 class ContentMimetypeIndexer(BaseIndexer, DiskIndexer):
     """Indexer in charge of:
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     ADDITIONAL_CONFIG = {
         'workdir': ('str', '/tmp/swh/indexer.mimetype'),
         'destination_queue': (
             'str', 'swh.indexer.tasks.SWHOrchestratorTextContentsTask'),
         'tool': ('dict', {
             'name': 'file',
             'version': '5.22'
         }),
     }
 
     CONFIG_BASE_FILENAME = 'indexer/mimetype'
 
     def __init__(self):
         super().__init__()
         self.working_directory = self.config['workdir']
         destination_queue = self.config['destination_queue']
         self.task_destination = app.tasks[destination_queue]
         self.tool_name = self.config['tool']['name']
         self.tool_version = self.config['tool']['version']
 
     def filter_contents(self, sha1s):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.storage.content_mimetype_missing((
             {
                 'id': sha1,
                 'tool_name': self.tool_name,
                 'tool_version': self.tool_version
             } for sha1 in sha1s
         ))
 
     def index_content(self, sha1, content):
         """Index sha1s' content and store result.
 
         Args:
             sha1 (bytes): content's identifier
             content (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         filename = hashutil.hash_to_hex(sha1)
         content_path = self.write_to_temp(
             filename=filename,
             data=content)
 
         properties = compute_mimetype_encoding(content_path)
         properties.update({
             'id': sha1,
             'tool_name': self.tool_name,
             'tool_version': self.tool_version,
         })
 
         self.cleanup(content_path)
         return properties
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.storage.content_mimetype_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def _filter_text(self, results):
         """Filter sha1 whose raw content is text.
 
         """
         for result in results:
             if b'binary' in result['encoding']:
                 continue
             yield result['id']
 
     def next_step(self, results):
         """When the computations is done, we'd like to send over only text
         contents to the text content orchestrator.
 
         Args:
             results ([dict]): List of content_mimetype results, dict
             with the following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
 
         """
         self.task_destination.delay(list(self._filter_text(results)))
 
 
 @click.command()
 @click.option('--path', help="Path to execute index on")
 def main(path):
     print(compute_mimetype_encoding(path))
 
 
 if __name__ == '__main__':
     main()
diff --git a/swh/indexer/producer.py b/swh/indexer/producer.py
index 36a4ff3..57572f3 100755
--- a/swh/indexer/producer.py
+++ b/swh/indexer/producer.py
@@ -1,69 +1,70 @@
-# Copyright (C) 2016  The Software Heritage developers
+# Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import click
 import random
 import sys
 
-from swh.core import utils, hashutil
+from swh.core import utils
+from swh.model import hashutil
 from swh.scheduler.celery_backend.config import app
 
 
 def read_from_stdin():
     for sha1 in sys.stdin:
-        yield hashutil.hex_to_hash(sha1.strip())
+        yield hashutil.hash_to_bytes(sha1.strip())
 
 
 def gen_sha1(batch):
     """Generate batch of grouped sha1s from the objstorage.
 
     """
     for sha1s in utils.grouper(read_from_stdin(), batch):
         sha1s = list(sha1s)
         random.shuffle(sha1s)
         yield sha1s
 
 
 def run_with_limit(task, limit, batch):
     count = 0
     for sha1s in gen_sha1(batch):
         count += len(sha1s)
         print('%s sent - [%s, ...]' % (len(sha1s), sha1s[0]))
         task.delay(sha1s)
         if count >= limit:
             return
 
 
 def run_no_limit(task, batch):
     for sha1s in gen_sha1(batch):
         print('%s sent - [%s, ...]' % (len(sha1s), sha1s[0]))
         task.delay(sha1s)
 
 
 @click.command(help='Read sha1 from stdin and send them for indexing')
 @click.option('--limit', default=None, help='Limit the number of data to read')
 @click.option('--batch', default='10', help='Group data by batch')
 @click.option('--task-name', default='orchestrator_all', help='')
 def main(limit, batch, task_name):
     batch = int(batch)
 
     from . import tasks, TASK_NAMES  # noqa
     possible_tasks = TASK_NAMES.keys()
 
     if task_name not in possible_tasks:
         print('The task_name can only be one of %s' %
               ', '.join(possible_tasks))
         return
 
     task = app.tasks[TASK_NAMES[task_name]]
 
     if limit:
         run_with_limit(task, int(limit), batch)
     else:
         run_no_limit(task, batch)
 
 
 if __name__ == '__main__':
     main()