diff --git a/sql/swh-func.sql b/sql/swh-func.sql
index e13f049..8f23740 100644
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -1,514 +1,481 @@
 -- Postgresql index helper function
 create or replace function hash_sha1(text)
     returns text
     language sql strict immutable
 as $$
     select encode(public.digest($1, 'sha1'), 'hex')
 $$;
 
 comment on function hash_sha1(text) is 'Compute sha1 hash as text';
 
 -- create a temporary table with a single "bytea" column for fast object lookup.
 create or replace function swh_mktemp_bytea()
     returns void
     language sql
 as $$
     create temporary table tmp_bytea (
       id bytea
     ) on commit drop;
 $$;
 
 -- create a temporary table called tmp_TBLNAME, mimicking existing table
 -- TBLNAME
 --
 -- Args:
 --     tblname: name of the table to mimick
 create or replace function swh_mktemp(tblname regclass)
     returns void
     language plpgsql
 as $$
 begin
     execute format('
 	create temporary table tmp_%1$I
 	    (like %1$I including defaults)
 	    on commit drop;
       alter table tmp_%1$I drop column if exists object_id;
 	', tblname);
     return;
 end
 $$;
 
 -- create a temporary table for content_mimetype tmp_content_mimetype,
 create or replace function swh_mktemp_content_mimetype()
     returns void
     language sql
 as $$
   create temporary table tmp_content_mimetype (
     like content_mimetype including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information';
 
 -- add tmp_content_mimetype entries to content_mimetype, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_mimetype_missing must take place before calling this
 -- function.
 --
 --
 -- operates in bulk: 0. swh_mktemp(content_mimetype), 1. COPY to tmp_content_mimetype,
 -- 2. call this function
 create or replace function swh_content_mimetype_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
         insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
         select id, mimetype, encoding, indexer_configuration_id
         from tmp_content_mimetype tcm
             on conflict(id, indexer_configuration_id)
                 do update set mimetype = excluded.mimetype,
                               encoding = excluded.encoding;
 
     else
         insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
         select id, mimetype, encoding, indexer_configuration_id
         from tmp_content_mimetype tcm
             on conflict(id, indexer_configuration_id) do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_mimetype_add(boolean) IS 'Add new content mimetypes';
 
 -- add tmp_content_language entries to content_language, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_language_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_content_language, 2. call this function
 create or replace function swh_content_language_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into content_language (id, lang, indexer_configuration_id)
       select id, lang, indexer_configuration_id
     	from tmp_content_language tcl
             on conflict(id, indexer_configuration_id)
                 do update set lang = excluded.lang;
 
     else
         insert into content_language (id, lang, indexer_configuration_id)
         select id, lang, indexer_configuration_id
     	  from tmp_content_language tcl
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_language_add(boolean) IS 'Add new content languages';
 
 -- create a temporary table for retrieving content_language
 create or replace function swh_mktemp_content_language()
     returns void
     language sql
 as $$
   create temporary table tmp_content_language (
     like content_language including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_language() is 'Helper table to add content language';
 
 
 -- create a temporary table for content_ctags tmp_content_ctags,
 create or replace function swh_mktemp_content_ctags()
     returns void
     language sql
 as $$
   create temporary table tmp_content_ctags (
     like content_ctags including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
 
 
 -- add tmp_content_ctags entries to content_ctags, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags,
 -- 2. call this function
 create or replace function swh_content_ctags_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
         delete from content_ctags
         where id in (select tmp.id
                      from tmp_content_ctags tmp
                      inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
     end if;
 
     insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id)
     select id, name, kind, line, lang, indexer_configuration_id
     from tmp_content_ctags tct
         on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id)
         do nothing;
     return;
 end
 $$;
 
 comment on function swh_content_ctags_add(boolean) IS 'Add new ctags symbols per content';
 
 create type content_ctags_signature as (
   id sha1,
   name text,
   kind text,
   line bigint,
   lang ctags_languages,
   tool_id integer,
   tool_name text,
   tool_version text,
   tool_configuration jsonb
 );
 
 -- Search within ctags content.
 --
 create or replace function swh_content_ctags_search(
        expression text,
        l integer default 10,
        last_sha1 sha1 default '\x0000000000000000000000000000000000000000')
     returns setof content_ctags_signature
     language sql
 as $$
     select c.id, name, kind, line, lang,
            i.id as tool_id, tool_name, tool_version, tool_configuration
     from content_ctags c
     inner join indexer_configuration i on i.id = c.indexer_configuration_id
     where hash_sha1(name) = hash_sha1(expression)
     and c.id > last_sha1
     order by id
     limit l;
 $$;
 
 comment on function swh_content_ctags_search(text, integer, sha1) IS 'Equality search through ctags'' symbols';
 
 
 -- create a temporary table for content_fossology_license tmp_content_fossology_license,
 create or replace function swh_mktemp_content_fossology_license()
     returns void
     language sql
 as $$
   create temporary table tmp_content_fossology_license (
     id                       sha1,
     license                  text,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license';
 
 -- add tmp_content_fossology_license entries to content_fossology_license, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- operates in bulk: 0. swh_mktemp(content_fossology_license), 1. COPY to
 -- tmp_content_fossology_license, 2. call this function
 create or replace function swh_content_fossology_license_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     -- insert unknown licenses first
     insert into fossology_license (name)
     select distinct license from tmp_content_fossology_license tmp
     where not exists (select 1 from fossology_license where name=tmp.license)
     on conflict(name) do nothing;
 
     if conflict_update then
         -- delete from content_fossology_license c
         --   using tmp_content_fossology_license tmp, indexer_configuration i
         --   where c.id = tmp.id and i.id=tmp.indexer_configuration_id
         delete from content_fossology_license
         where id in (select tmp.id
                      from tmp_content_fossology_license tmp
                      inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
     end if;
 
     insert into content_fossology_license (id, license_id, indexer_configuration_id)
     select tcl.id,
           (select id from fossology_license where name = tcl.license) as license,
           indexer_configuration_id
     from tmp_content_fossology_license tcl
         on conflict(id, license_id, indexer_configuration_id)
         do nothing;
     return;
 end
 $$;
 
 comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses';
 
 -- content_metadata functions
---
--- create a temporary table for content_metadata tmp_content_metadata,
-create or replace function swh_mktemp_content_metadata_missing()
-    returns void
-    language sql
-as $$
-  create temporary table tmp_content_metadata_missing (
-    id sha1,
-    indexer_configuration_id integer
-  ) on commit drop;
-$$;
-
-comment on function swh_mktemp_content_metadata_missing() is 'Helper table to filter missing metadata in content_metadata';
-
--- check which entries of tmp_bytea are missing from content_metadata
---
--- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
--- 2. call this function
-create or replace function swh_content_metadata_missing()
-    returns setof sha1
-    language plpgsql
-as $$
-begin
-    return query
-	select id::sha1 from tmp_content_metadata_missing as tmp
-	where not exists
-	    (select 1 from content_metadata as c
-             where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
-    return;
-end
-$$;
-
-comment on function swh_content_metadata_missing() IS 'Filter missing content metadata';
 
 -- add tmp_content_metadata entries to content_metadata, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_metadata_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_content_metadata, 2. call this function
 create or replace function swh_content_metadata_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into content_metadata (id, translated_metadata, indexer_configuration_id)
       select id, translated_metadata, indexer_configuration_id
     	from tmp_content_metadata tcm
             on conflict(id, indexer_configuration_id)
                 do update set translated_metadata = excluded.translated_metadata;
 
     else
         insert into content_metadata (id, translated_metadata, indexer_configuration_id)
         select id, translated_metadata, indexer_configuration_id
     	from tmp_content_metadata tcm
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_metadata_add(boolean) IS 'Add new content metadata';
 
 -- create a temporary table for retrieving content_metadata
 create or replace function swh_mktemp_content_metadata()
     returns void
     language sql
 as $$
   create temporary table tmp_content_metadata (
     like content_metadata including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata';
 
 --
 create type content_metadata_signature as (
     id sha1,
     translated_metadata jsonb,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of content metadata from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_content_metadata_get()
     returns setof content_metadata_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join content_metadata c on c.id = t.id
         inner join indexer_configuration i on i.id=c.indexer_configuration_id;
     return;
 end
 $$;
 
 comment on function swh_content_metadata_get() is 'List content''s metadata';
 -- end content_metadata functions
 
 -- revision_metadata functions
 --
 -- create a temporary table for revision_metadata tmp_revision_metadata,
 create or replace function swh_mktemp_revision_metadata_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_revision_metadata_missing (
     id sha1_git,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_revision_metadata_missing() is 'Helper table to filter missing metadata in revision_metadata';
 
 -- check which entries of tmp_bytea are missing from revision_metadata
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_revision_metadata_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	select id::sha1 from tmp_revision_metadata_missing as tmp
 	where not exists
 	    (select 1 from revision_metadata as c
              where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
     return;
 end
 $$;
 
 comment on function swh_revision_metadata_missing() IS 'Filter missing content metadata';
 
 -- add tmp_revision_metadata entries to revision_metadata, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_revision_metadata_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_revision_metadata, 2. call this function
 create or replace function swh_revision_metadata_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
       select id, translated_metadata, indexer_configuration_id
     	from tmp_revision_metadata tcm
             on conflict(id, indexer_configuration_id)
                 do update set translated_metadata = excluded.translated_metadata;
 
     else
         insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
         select id, translated_metadata, indexer_configuration_id
     	from tmp_revision_metadata tcm
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_revision_metadata_add(boolean) IS 'Add new revision metadata';
 
 -- create a temporary table for retrieving revision_metadata
 create or replace function swh_mktemp_revision_metadata()
     returns void
     language sql
 as $$
   create temporary table tmp_revision_metadata (
     like revision_metadata including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_revision_metadata() is 'Helper table to add revision metadata';
 
 --
 create type revision_metadata_signature as (
     id sha1_git,
     translated_metadata jsonb,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of revision metadata from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_revision_metadata_get()
     returns setof revision_metadata_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join revision_metadata c on c.id = t.id
         inner join indexer_configuration i on i.id=c.indexer_configuration_id;
     return;
 end
 $$;
 
 create or replace function swh_mktemp_indexer_configuration()
     returns void
     language sql
 as $$
     create temporary table tmp_indexer_configuration (
       like indexer_configuration including defaults
     ) on commit drop;
     alter table tmp_indexer_configuration drop column id;
 $$;
 
 
 -- add tmp_indexer_configuration entries to indexer_configuration,
 -- skipping duplicates if any.
 --
 -- operates in bulk: 0. create temporary tmp_indexer_configuration, 1. COPY to
 -- it, 2. call this function to insert and filtering out duplicates
 create or replace function swh_indexer_configuration_add()
     returns setof indexer_configuration
     language plpgsql
 as $$
 begin
       insert into indexer_configuration(tool_name, tool_version, tool_configuration)
       select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp
       on conflict(tool_name, tool_version, tool_configuration) do nothing;
 
       return query
           select id, tool_name, tool_version, tool_configuration
           from tmp_indexer_configuration join indexer_configuration
               using(tool_name, tool_version, tool_configuration);
 
       return;
 end
 $$;
diff --git a/sql/upgrades/115.sql b/sql/upgrades/115.sql
index 80c6d98..89cb0d2 100644
--- a/sql/upgrades/115.sql
+++ b/sql/upgrades/115.sql
@@ -1,28 +1,31 @@
 -- SWH Indexer DB schema upgrade
 -- from_version: 114
 -- to_version: 115
 -- description: Remove temporary table use in reading api
 
 insert into dbversion(version, release, description)
 values(115, now(), 'Work In Progress');
 
 drop function swh_mktemp_content_mimetype_missing();
 drop function swh_content_mimetype_missing();
 
 drop function swh_content_mimetype_get();
 drop type content_mimetype_signature;
 
 drop function swh_mktemp_content_language_missing();
 drop function swh_content_language_missing();
 
 drop function swh_content_language_get();
 drop type content_language_signature;
 
 drop function swh_mktemp_content_ctags_missing();
 drop function swh_content_ctags_missing();
 
 drop function swh_content_ctags_get();
 --drop type content_ctags_signature;  -- still used in swh_content_ctags_search
 
 drop function swh_content_fossology_license_get();
 drop type content_fossology_license_signature;
+
+drop function swh_mktemp_content_metadata_missing();
+drop function swh_content_metadata_missing();
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
index ccd6e8f..c71bd70 100644
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -1,556 +1,554 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import dateutil.parser
 import psycopg2
 
 from swh.storage.common import db_transaction_generator, db_transaction
 from swh.storage.exc import StorageDBError
 from .db import Db
 
 from . import converters
 
 
 INDEXER_CFG_KEY = 'indexer_storage'
 
 
 def get_indexer_storage(cls, args):
     """Get an indexer storage object of class `storage_class` with
     arguments `storage_args`.
 
     Args:
         args (dict): dictionary with keys:
         - cls (str): storage's class, either 'local' or 'remote'
         - args (dict): dictionary with keys
 
     Returns:
         an instance of swh.indexer's storage (either local or remote)
 
     Raises:
         ValueError if passed an unknown storage class.
 
     """
     if cls == 'remote':
         from .api.client import RemoteStorage as IndexerStorage
     elif cls == 'local':
         from . import IndexerStorage
     else:
         raise ValueError('Unknown indexer storage class `%s`' % cls)
 
     return IndexerStorage(**args)
 
 
 class IndexerStorage():
     """SWH Indexer Storage
 
     """
     def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
 
         """
         try:
             if isinstance(db, psycopg2.extensions.connection):
                 self._pool = None
                 self._db = Db(db)
             else:
                 self._pool = psycopg2.pool.ThreadedConnectionPool(
                     min_pool_conns, max_pool_conns, db
                 )
                 self._db = None
         except psycopg2.OperationalError as e:
             raise StorageDBError(e)
 
     def get_db(self):
         if self._db:
             return self._db
         return Db.from_pool(self._pool)
 
     def check_config(self, *, check_write):
         """Check that the storage is configured and ready to go."""
         # Check permissions on one of the tables
         with self.get_db().transaction() as cur:
             if check_write:
                 check = 'INSERT'
             else:
                 check = 'SELECT'
 
             cur.execute(
                 "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
                 (check,)
             )
             return cur.fetchone()[0]
 
         return True
 
     @db_transaction_generator()
     def content_mimetype_missing(self, mimetypes, db=None, cur=None):
         """List mimetypes missing from storage.
 
         Args:
             mimetypes (iterable): iterable of dict with keys:
 
                 - id (bytes): sha1 identifier
                 - indexer_configuration_id (int): tool used to compute
                   the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
         for obj in db.content_mimetype_missing_from_list(mimetypes, cur):
             yield obj[0]
 
     @db_transaction()
     def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
                              cur=None):
         """Add mimetypes not present in storage.
 
         Args:
             mimetypes (iterable): dictionaries with keys:
 
                 - id (bytes): sha1 identifier
                 - mimetype (bytes): raw content's mimetype
                 - encoding (bytes): raw content's encoding
                 - indexer_configuration_id (int): tool's id used to
                   compute the results
                 - conflict_update: Flag to determine if we want to
                   overwrite (true) or skip duplicates (false, the default)
 
         """
         db.mktemp_content_mimetype(cur)
         db.copy_to(mimetypes, 'tmp_content_mimetype',
                    ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
                    cur)
         db.content_mimetype_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator()
     def content_mimetype_get(self, ids, db=None, cur=None):
         """Retrieve full content mimetype per ids.
 
         Args:
             ids (iterable): sha1 identifier
 
         Yields:
             mimetypes (iterable): dictionaries with keys:
 
                 id (bytes): sha1 identifier
                 mimetype (bytes): raw content's mimetype
                 encoding (bytes): raw content's encoding
                 tool_id (id): tool's id used to compute the results
                 tool_name (str): tool's name
                 tool_version (str):  tool's version
                 tool_configuration: tool's configuration
 
         """
         for c in db.content_mimetype_get_from_list(ids, cur):
             yield converters.db_to_mimetype(
                 dict(zip(db.content_mimetype_cols, c)))
 
     @db_transaction_generator()
     def content_language_missing(self, languages, db=None, cur=None):
         """List languages missing from storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 id (bytes): sha1 identifier
                 indexer_configuration_id (int): tool used to compute
                 the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
         for obj in db.content_language_missing_from_list(languages, cur):
             yield obj[0]
 
     @db_transaction_generator()
     def content_language_get(self, ids, db=None, cur=None):
         """Retrieve full content language per ids.
 
         Args:
             ids (iterable): sha1 identifier
 
         Yields:
             languages (iterable): dictionaries with keys:
 
                 id (bytes): sha1 identifier
                 lang (bytes): raw content's language
                 tool_id (id): tool's id used to compute the results
                 tool_name (str): tool's name
                 tool_version (str):  tool's version
                 tool_configuration: tool's configuration
 
         """
         for c in db.content_language_get_from_list(ids, cur):
             yield converters.db_to_language(
                 dict(zip(db.content_language_cols, c)))
 
     @db_transaction()
     def content_language_add(self, languages, conflict_update=False, db=None,
                              cur=None):
         """Add languages not present in storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 - id: sha1
                 - lang: bytes
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
         db.mktemp_content_language(cur)
         # empty language is mapped to 'unknown'
         db.copy_to(
             ({
                 'id': l['id'],
                 'lang': 'unknown' if not l['lang'] else l['lang'],
                 'indexer_configuration_id': l['indexer_configuration_id'],
             } for l in languages),
             'tmp_content_language',
             ['id', 'lang', 'indexer_configuration_id'], cur)
 
         db.content_language_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator()
     def content_ctags_missing(self, ctags, db=None, cur=None):
         """List ctags missing from storage.
 
         Args:
             ctags (iterable): dicts with keys:
 
                 id (bytes): sha1 identifier
                 indexer_configuration_id (int): tool used to compute
                 the results
 
         Yields:
             an iterable of missing id for the tuple (id,
             indexer_configuration_id)
 
         """
         for obj in db.content_ctags_missing_from_list(ctags, cur):
             yield obj[0]
 
     @db_transaction_generator()
     def content_ctags_get(self, ids, db=None, cur=None):
         """Retrieve ctags per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         """
         for c in db.content_ctags_get_from_list(ids, cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
 
     @db_transaction()
     def content_ctags_add(self, ctags, conflict_update=False, db=None,
                           cur=None):
         """Add ctags not present in storage
 
         Args:
             ctags (iterable): dictionaries with keys:
 
                 - id (bytes): sha1
                 - ctags ([list): List of dictionary with keys: name, kind,
                   line, language
 
         """
         def _convert_ctags(__ctags):
             """Convert ctags dict to list of ctags.
 
             """
             for ctags in __ctags:
                 yield from converters.ctags_to_db(ctags)
 
         db.mktemp_content_ctags(cur)
         db.copy_to(list(_convert_ctags(ctags)),
                    tblname='tmp_content_ctags',
                    columns=['id', 'name', 'kind', 'line',
                             'lang', 'indexer_configuration_id'],
                    cur=cur)
 
         db.content_ctags_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator()
     def content_ctags_search(self, expression,
                              limit=10, last_sha1=None, db=None, cur=None):
         """Search through content's raw ctags symbols.
 
         Args:
             expression (str): Expression to search for
             limit (int): Number of rows to return (default to 10).
             last_sha1 (str): Offset from which retrieving data (default to '').
 
         Yields:
             rows of ctags including id, name, lang, kind, line, etc...
 
         """
         for obj in db.content_ctags_search(expression, last_sha1, limit,
                                            cur=cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
 
     @db_transaction_generator()
     def content_fossology_license_get(self, ids, db=None, cur=None):
         """Retrieve licenses per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             list: dictionaries with the following keys:
 
             - id (bytes)
             - licenses ([str]): associated licenses for that content
 
         """
         for c in db.content_fossology_license_get_from_list(ids, cur):
             license = dict(zip(db.content_fossology_license_cols, c))
             yield converters.db_to_fossology_license(license)
 
     @db_transaction()
     def content_fossology_license_add(self, licenses, conflict_update=False,
                                       db=None, cur=None):
         """Add licenses not present in storage.
 
         Args:
             licenses (iterable): dictionaries with keys:
 
                 - id: sha1
                 - license ([bytes]): List of licenses associated to sha1
                 - tool (str): nomossa
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         Returns:
             list: content_license entries which failed due to unknown licenses
 
         """
         # Then, we add the correct ones
         db.mktemp_content_fossology_license(cur)
         db.copy_to(
             ({
                 'id': sha1['id'],
                 'indexer_configuration_id': sha1['indexer_configuration_id'],
                 'license': license,
               } for sha1 in licenses
                 for license in sha1['licenses']),
             tblname='tmp_content_fossology_license',
             columns=['id', 'license', 'indexer_configuration_id'],
             cur=cur)
         db.content_fossology_license_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator()
     def content_metadata_missing(self, metadatas, db=None, cur=None):
         """List metadatas missing from storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
-                - id (bytes): sha1 identifier
-                - tool_name (str): tool used to compute the results
-                - tool_version (str): associated tool's version
+                id (bytes): sha1 identifier
+                indexer_configuration_id (int): tool used to compute
+                the results
 
-        Returns:
-            iterable: missing ids
+        Yields:
+            an iterable of missing id for the tuple (id,
+            indexer_configuration_id)
 
         """
-        db.mktemp_content_metadata_missing(cur)
-        db.copy_to(metadatas, 'tmp_content_metadata_missing',
-                   ['id', 'indexer_configuration_id'], cur)
-        for obj in db.content_metadata_missing_from_temp(cur):
+        for obj in db.content_metadata_missing_from_list(metadatas, cur):
             yield obj[0]
 
     @db_transaction_generator()
     def content_metadata_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.content_metadata_get_from_temp():
             yield converters.db_to_metadata(
                 dict(zip(db.content_metadata_cols, c)))
 
     @db_transaction()
     def content_metadata_add(self, metadatas, conflict_update=False, db=None,
                              cur=None):
         """Add metadatas not present in storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                 - id: sha1
                 - translated_metadata: bytes / jsonb ?
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
         db.mktemp_content_metadata(cur)
         # empty metadata is mapped to 'unknown'
 
         db.copy_to(metadatas, 'tmp_content_metadata',
                    ['id', 'translated_metadata', 'indexer_configuration_id'],
                    cur)
         db.content_metadata_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator()
     def revision_metadata_missing(self, metadatas, db=None, cur=None):
         """List metadatas missing from storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                - id (bytes): sha1_git revision identifier
                - tool_name (str): tool used to compute the results
                - tool_version (str): associated tool's version
 
         Returns:
             iterable: missing ids
 
         """
         db.mktemp_revision_metadata_missing(cur)
         db.copy_to(metadatas, 'tmp_revision_metadata_missing',
                    ['id', 'indexer_configuration_id'], cur)
         for obj in db.revision_metadata_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction_generator()
     def revision_metadata_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.revision_metadata_get_from_temp():
             yield converters.db_to_metadata(
                 dict(zip(db.revision_metadata_cols, c)))
 
     @db_transaction()
     def revision_metadata_add(self, metadatas, conflict_update=False, db=None,
                               cur=None):
         """Add metadatas not present in storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                 - id: sha1_git of revision
                 - translated_metadata: bytes / jsonb ?
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
         db.mktemp_revision_metadata(cur)
         # empty metadata is mapped to 'unknown'
 
         db.copy_to(metadatas, 'tmp_revision_metadata',
                    ['id', 'translated_metadata', 'indexer_configuration_id'],
                    cur)
         db.revision_metadata_add_from_temp(conflict_update, cur)
 
     @db_transaction()
     def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
                             db=None, cur=None):
         """ Add an origin_metadata for the origin at ts with provenance and
         metadata.
 
         Args:
             origin_id (int): the origin's id for which the metadata is added
             ts (datetime): timestamp of the found metadata
             provider (int): the provider of metadata (ex:'hal')
             tool (int): tool used to extract metadata
             metadata (jsonb): the metadata retrieved at the time and location
 
         Returns:
             id (int): the origin_metadata unique id
         """
         if isinstance(ts, str):
             ts = dateutil.parser.parse(ts)
 
         return db.origin_metadata_add(origin_id, ts, provider, tool,
                                       metadata, cur)
 
     @db_transaction_generator()
     def origin_metadata_get_by(self, origin_id, provider_type=None, db=None,
                                cur=None):
         """Retrieve list of all origin_metadata entries for the origin_id
 
         Args:
             origin_id (int): the unique origin identifier
             provider_type (str): (optional) type of provider
 
         Returns:
             list of dicts: the origin_metadata dictionary with the keys:
 
             - id (int): origin_metadata's id
             - origin_id (int): origin's id
             - discovery_date (datetime): timestamp of discovery
             - tool_id (int): metadata's extracting tool
             - metadata (jsonb)
             - provider_id (int): metadata's provider
             - provider_name (str)
             - provider_type (str)
             - provider_url (str)
 
         """
         for line in db.origin_metadata_get_by(origin_id, provider_type, cur):
             yield dict(zip(db.origin_metadata_get_cols, line))
 
     @db_transaction_generator()
     def indexer_configuration_add(self, tools, db=None, cur=None):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
             insert in the db. Dictionary with the following keys::
 
                 tool_name (str): tool's name
                 tool_version (str): tool's version
                 tool_configuration (dict): tool's configuration (free form
                                            dict)
 
         Returns:
             List of dict inserted in the db (holding the id key as
             well).  The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
         db.mktemp_indexer_configuration(cur)
         db.copy_to(tools, 'tmp_indexer_configuration',
                    ['tool_name', 'tool_version', 'tool_configuration'],
                    cur)
 
         tools = db.indexer_configuration_add_from_temp(cur)
         for line in tools:
             yield dict(zip(db.indexer_configuration_cols, line))
 
     @db_transaction()
     def indexer_configuration_get(self, tool, db=None, cur=None):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
             following keys::
 
                 tool_name (str): tool's name
                 tool_version (str): tool's version
                 tool_configuration (dict): tool's configuration (free form
                                            dict)
 
         Returns:
             The identifier of the tool if it exists, None otherwise.
 
         """
         tool_conf = tool['tool_configuration']
         if isinstance(tool_conf, dict):
             tool_conf = json.dumps(tool_conf)
         idx = db.indexer_configuration_get(tool['tool_name'],
                                            tool['tool_version'],
                                            tool_conf)
         if not idx:
             return None
         return dict(zip(db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/storage/db.py b/swh/indexer/storage/db.py
index 0e9f1ab..22fe33f 100644
--- a/swh/indexer/storage/db.py
+++ b/swh/indexer/storage/db.py
@@ -1,338 +1,351 @@
 # Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from swh.model import hashutil
 
 from swh.storage.db import BaseDb, stored_procedure, cursor_to_bytes
 from swh.storage.db import line_to_bytes, execute_values_to_bytes
 
 
 class Db(BaseDb):
     """Proxy to the SWH Indexer DB, with wrappers around stored procedures
 
     """
     @stored_procedure('swh_mktemp_bytea')
     def mktemp_bytea(self, cur=None): pass
 
     def store_tmp_bytea(self, ids, cur=None):
         """Store the given identifiers in a new tmp_bytea table"""
         cur = self._cursor(cur)
 
         self.mktemp_bytea(cur)
         self.copy_to(({'id': elem} for elem in ids), 'tmp_bytea',
                      ['id'], cur)
 
     content_mimetype_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_mimetype_missing_from_list(self, mimetypes, cur=None):
         """List missing mimetypes.
 
         """
         cur = self._cursor(cur)
         keys = ', '.join(self.content_mimetype_hash_keys)
         equality = ' AND '.join(
             ('t.%s = c.%s' % (key, key))
             for key in self.content_mimetype_hash_keys
         )
         yield from execute_values_to_bytes(
             cur, """
             select %s from (values %%s) as t(%s)
             where not exists (
                 select 1 from content_mimetype c
                 where %s
             )
             """ % (keys, keys, equality),
             (tuple(m[k] for k in self.content_mimetype_hash_keys)
              for m in mimetypes)
         )
 
     content_mimetype_cols = [
         'id', 'mimetype', 'encoding',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_mimetype')
     def mktemp_content_mimetype(self, cur=None): pass
 
     def content_mimetype_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_mimetype_add(%s)",
                                   (conflict_update, ))
 
     def _convert_key(self, key):
         """Convert keys according to specific use in the module.
 
         Expected:
             Tables content_{something} being aliased as 'c' (something
             in {language, mimetype, ...}), table indexer_configuration
             being aliased as 'i'.
 
         """
         if key == 'id':
             return 'c.id'
         elif key == 'tool_id':
             return 'i.id as tool_id'
         elif key == 'licenses':
             return '''array(select name
                             from fossology_license
                             where id = ANY(
                                array_agg(c.license_id))) as licenses'''
         return key
 
     def content_mimetype_get_from_list(self, ids, cur=None):
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_mimetype_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_mimetype c
                 on c.id=t.id
             inner join indexer_configuration i
                 on c.indexer_configuration_id=i.id;
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
         )
 
     content_language_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_language_missing_from_list(self, languages, cur=None):
         """List missing languages.
 
         """
         cur = self._cursor(cur)
         keys = ', '.join(self.content_language_hash_keys)
         equality = ' AND '.join(
             ('t.%s = c.%s' % (key, key))
             for key in self.content_language_hash_keys
         )
         yield from execute_values_to_bytes(
             cur, """
             select %s from (values %%s) as t(%s)
             where not exists (
                 select 1 from content_language c
                 where %s
             )
             """ % (keys, keys, equality),
             (tuple(l[k] for k in self.content_language_hash_keys)
              for l in languages)
         )
 
     content_language_cols = [
         'id', 'lang',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_language')
     def mktemp_content_language(self, cur=None): pass
 
     def content_language_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_language_add(%s)",
                                   (conflict_update, ))
 
     def content_language_get_from_list(self, ids, cur=None):
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_language_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_language c
                 on c.id=t.id
             inner join indexer_configuration i
                 on c.indexer_configuration_id=i.id;
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
         )
 
     content_ctags_hash_keys = ['id', 'indexer_configuration_id']
 
     def content_ctags_missing_from_list(self, ctags, cur=None):
         """List missing ctags.
 
         """
         cur = self._cursor(cur)
         keys = ', '.join(self.content_ctags_hash_keys)
         equality = ' AND '.join(
             ('t.%s = c.%s' % (key, key))
             for key in self.content_ctags_hash_keys
         )
         yield from execute_values_to_bytes(
             cur, """
             select %s from (values %%s) as t(%s)
             where not exists (
                 select 1 from content_ctags c
                 where %s
             )
             """ % (keys, keys, equality),
             (tuple(c[k] for k in self.content_ctags_hash_keys)
              for c in ctags)
         )
 
     content_ctags_cols = [
         'id', 'name', 'kind', 'line', 'lang',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_ctags')
     def mktemp_content_ctags(self, cur=None): pass
 
     def content_ctags_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_ctags_add(%s)",
                                   (conflict_update, ))
 
     def content_ctags_get_from_list(self, ids, cur=None):
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_ctags_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_ctags c
                 on c.id=t.id
             inner join indexer_configuration i
                 on c.indexer_configuration_id=i.id
             order by line
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
         )
 
     def content_ctags_search(self, expression, last_sha1, limit, cur=None):
         cur = self._cursor(cur)
         if not last_sha1:
             query = """SELECT %s
                        FROM swh_content_ctags_search(%%s, %%s)""" % (
                            ','.join(self.content_ctags_cols))
             cur.execute(query, (expression, limit))
         else:
             if last_sha1 and isinstance(last_sha1, bytes):
                 last_sha1 = '\\x%s' % hashutil.hash_to_hex(last_sha1)
             elif last_sha1:
                 last_sha1 = '\\x%s' % last_sha1
 
             query = """SELECT %s
                        FROM swh_content_ctags_search(%%s, %%s, %%s)""" % (
                            ','.join(self.content_ctags_cols))
             cur.execute(query, (expression, limit, last_sha1))
 
         yield from cursor_to_bytes(cur)
 
     content_fossology_license_cols = [
         'id', 'tool_id', 'tool_name', 'tool_version', 'tool_configuration',
         'licenses']
 
     @stored_procedure('swh_mktemp_content_fossology_license')
     def mktemp_content_fossology_license(self, cur=None): pass
 
     def content_fossology_license_add_from_temp(self, conflict_update,
                                                 cur=None):
         """Add new licenses per content.
 
         """
         self._cursor(cur).execute(
             "SELECT swh_content_fossology_license_add(%s)",
             (conflict_update, ))
 
     def content_fossology_license_get_from_list(self, ids, cur=None):
         """Retrieve licenses per id.
 
         """
         cur = self._cursor(cur)
         keys = map(self._convert_key, self.content_fossology_license_cols)
         yield from execute_values_to_bytes(
             cur, """
             select %s
             from (values %%s) as t(id)
             inner join content_fossology_license c on t.id=c.id
             inner join indexer_configuration i
                 on i.id=c.indexer_configuration_id
             group by c.id, i.id, i.tool_name, i.tool_version,
                      i.tool_configuration;
             """ % ', '.join(keys),
             ((_id,) for _id in ids)
         )
 
+    content_metadata_hash_keys = ['id', 'indexer_configuration_id']
+
+    def content_metadata_missing_from_list(self, metadata, cur=None):
+        """List missing metadata.
+
+        """
+        cur = self._cursor(cur)
+        keys = ', '.join(self.content_metadata_hash_keys)
+        equality = ' AND '.join(
+            ('t.%s = c.%s' % (key, key))
+            for key in self.content_metadata_hash_keys
+        )
+        yield from execute_values_to_bytes(
+            cur, """
+            select %s from (values %%s) as t(%s)
+            where not exists (
+                select 1 from content_metadata c
+                where %s
+            )
+            """ % (keys, keys, equality),
+            (tuple(m[k] for k in self.content_metadata_hash_keys)
+             for m in metadata)
+        )
+
     content_metadata_cols = [
         'id', 'translated_metadata',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_content_metadata')
     def mktemp_content_metadata(self, cur=None): pass
 
-    @stored_procedure('swh_mktemp_content_metadata_missing')
-    def mktemp_content_metadata_missing(self, cur=None): pass
-
-    def content_metadata_missing_from_temp(self, cur=None):
-        """List missing metadatas.
-
-        """
-        cur = self._cursor(cur)
-        cur.execute("SELECT * FROM swh_content_metadata_missing()")
-        yield from cursor_to_bytes(cur)
-
     def content_metadata_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_content_metadata_add(%s)",
                                   (conflict_update, ))
 
     def content_metadata_get_from_temp(self, cur=None):
         cur = self._cursor(cur)
         query = "SELECT %s FROM swh_content_metadata_get()" % (
             ','.join(self.content_metadata_cols))
         cur.execute(query)
         yield from cursor_to_bytes(cur)
 
     revision_metadata_cols = [
         'id', 'translated_metadata',
         'tool_id', 'tool_name', 'tool_version', 'tool_configuration']
 
     @stored_procedure('swh_mktemp_revision_metadata')
     def mktemp_revision_metadata(self, cur=None): pass
 
     @stored_procedure('swh_mktemp_revision_metadata_missing')
     def mktemp_revision_metadata_missing(self, cur=None): pass
 
     def revision_metadata_missing_from_temp(self, cur=None):
         """List missing metadatas.
 
         """
         cur = self._cursor(cur)
         cur.execute("SELECT * FROM swh_revision_metadata_missing()")
         yield from cursor_to_bytes(cur)
 
     def revision_metadata_add_from_temp(self, conflict_update, cur=None):
         self._cursor(cur).execute("SELECT swh_revision_metadata_add(%s)",
                                   (conflict_update, ))
 
     def revision_metadata_get_from_temp(self, cur=None):
         cur = self._cursor(cur)
         query = "SELECT %s FROM swh_revision_metadata_get()" % (
             ','.join(self.revision_metadata_cols))
         cur.execute(query)
         yield from cursor_to_bytes(cur)
 
     indexer_configuration_cols = ['id', 'tool_name', 'tool_version',
                                   'tool_configuration']
 
     @stored_procedure('swh_mktemp_indexer_configuration')
     def mktemp_indexer_configuration(self, cur=None):
         pass
 
     def indexer_configuration_add_from_temp(self, cur=None):
         cur = self._cursor(cur)
         cur.execute("SELECT %s from swh_indexer_configuration_add()" % (
             ','.join(self.indexer_configuration_cols), ))
         yield from cursor_to_bytes(cur)
 
     def indexer_configuration_get(self, tool_name,
                                   tool_version, tool_configuration, cur=None):
         cur = self._cursor(cur)
         cur.execute('''select %s
                        from indexer_configuration
                        where tool_name=%%s and
                              tool_version=%%s and
                              tool_configuration=%%s''' % (
                                  ','.join(self.indexer_configuration_cols)),
                     (tool_name, tool_version, tool_configuration))
 
         data = cur.fetchone()
         if not data:
             return None
         return line_to_bytes(data)