diff --git a/PKG-INFO b/PKG-INFO
index 7f5912a..a9e83c6 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.indexer
-Version: 0.0.47
+Version: 0.0.48
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/debian/control b/debian/control
index c4b42bc..554619c 100644
--- a/debian/control
+++ b/debian/control
@@ -1,47 +1,47 @@
 Source: swh-indexer
 Maintainer: Software Heritage developers <swh-devel@inria.fr>
 Section: python
 Priority: optional
 Build-Depends: debhelper (>= 9),
                dh-python (>= 2),
                python3-all,
                python3-chardet (>= 2.3.0~),
                python3-click,
                python3-nose,
                python3-pygments,
                python3-magic,
                python3-setuptools,
-               python3-swh.core (>= 0.0.27~),
+               python3-swh.core (>= 0.0.37~),
                python3-swh.model (>= 0.0.15~),
                python3-swh.objstorage (>= 0.0.13~),
                python3-swh.scheduler (>= 0.0.14~),
-               python3-swh.storage (>= 0.0.93~),
+               python3-swh.storage (>= 0.0.100~),
                python3-vcversioner
 Standards-Version: 3.9.6
 Homepage: https://forge.softwareheritage.org/diffusion/78/
 
 Package: python3-swh.indexer.storage
 Architecture: all
-Depends: python3-swh.core (>= 0.0.27~),
+Depends: python3-swh.core (>= 0.0.37~),
          python3-swh.model (>= 0.0.15~),
          python3-swh.objstorage (>= 0.0.13~),
          python3-swh.scheduler (>= 0.0.14~),
-         python3-swh.storage (>= 0.0.93~),
+         python3-swh.storage (>= 0.0.100~),
          ${misc:Depends},
          ${python3:Depends}
 Description: Software Heritage Content Indexer Storage
 
 Package: python3-swh.indexer
 Architecture: all
 Depends: python3-swh.scheduler (>= 0.0.14~),
-         python3-swh.core (>= 0.0.27~),
+         python3-swh.core (>= 0.0.37~),
          python3-swh.model (>= 0.0.15~),
          python3-swh.objstorage (>= 0.0.13~),
          python3-swh.scheduler (>= 0.0.14~),
-         python3-swh.storage (>= 0.0.93~),
+         python3-swh.storage (>= 0.0.100~),
          python3-swh.indexer.storage (= ${binary:Version}),
          universal-ctags (>= 0.8~),
          fossology-nomossa (>= 3.1~),
          ${misc:Depends},
          ${python3:Depends}
 Description: Software Heritage Content Indexer
diff --git a/requirements-swh.txt b/requirements-swh.txt
index 1c8f7d6..4fc505d 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,5 +1,5 @@
-swh.core >= 0.0.27
+swh.core >= 0.0.37
 swh.model >= 0.0.15
 swh.objstorage >= 0.0.13
 swh.scheduler >= 0.0.14
-swh.storage >= 0.0.93
+swh.storage >= 0.0.100
diff --git a/sql/swh-func.sql b/sql/swh-func.sql
index 62df8fa..82d0955 100644
--- a/sql/swh-func.sql
+++ b/sql/swh-func.sql
@@ -1,721 +1,731 @@
+-- Postgresql index helper function
+create or replace function hash_sha1(text)
+    returns text
+    language sql strict immutable
+as $$
+    select encode(public.digest($1, 'sha1'), 'hex')
+$$;
+
+comment on function hash_sha1(text) is 'Compute sha1 hash as text';
+
 -- create a temporary table with a single "bytea" column for fast object lookup.
 create or replace function swh_mktemp_bytea()
     returns void
     language sql
 as $$
     create temporary table tmp_bytea (
       id bytea
     ) on commit drop;
 $$;
 
 -- create a temporary table called tmp_TBLNAME, mimicking existing table
 -- TBLNAME
 --
 -- Args:
 --     tblname: name of the table to mimick
 create or replace function swh_mktemp(tblname regclass)
     returns void
     language plpgsql
 as $$
 begin
     execute format('
 	create temporary table tmp_%1$I
 	    (like %1$I including defaults)
 	    on commit drop;
       alter table tmp_%1$I drop column if exists object_id;
 	', tblname);
     return;
 end
 $$;
 
 -- create a temporary table for content_ctags tmp_content_mimetype_missing,
 create or replace function swh_mktemp_content_mimetype_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_content_mimetype_missing (
     id sha1,
     indexer_configuration_id bigint
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_mimetype_missing() IS 'Helper table to filter existing mimetype information';
 
 -- check which entries of tmp_bytea are missing from content_mimetype
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_mimetype_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	(select id::sha1 from tmp_content_mimetype_missing as tmp
 	 where not exists
 	     (select 1 from content_mimetype as c
               where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id));
     return;
 end
 $$;
 
 comment on function swh_content_mimetype_missing() is 'Filter existing mimetype information';
 
 -- create a temporary table for content_mimetype tmp_content_mimetype,
 create or replace function swh_mktemp_content_mimetype()
     returns void
     language sql
 as $$
   create temporary table tmp_content_mimetype (
     like content_mimetype including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_mimetype() IS 'Helper table to add mimetype information';
 
 -- add tmp_content_mimetype entries to content_mimetype, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_mimetype_missing must take place before calling this
 -- function.
 --
 --
 -- operates in bulk: 0. swh_mktemp(content_mimetype), 1. COPY to tmp_content_mimetype,
 -- 2. call this function
 create or replace function swh_content_mimetype_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
         insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
         select id, mimetype, encoding, indexer_configuration_id
         from tmp_content_mimetype tcm
             on conflict(id, indexer_configuration_id)
                 do update set mimetype = excluded.mimetype,
                               encoding = excluded.encoding;
 
     else
         insert into content_mimetype (id, mimetype, encoding, indexer_configuration_id)
         select id, mimetype, encoding, indexer_configuration_id
         from tmp_content_mimetype tcm
             on conflict(id, indexer_configuration_id) do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_mimetype_add(boolean) IS 'Add new content mimetypes';
 
 create type content_mimetype_signature as(
     id sha1,
     mimetype bytea,
     encoding bytea,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of content mimetype from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_mimetype_get()
     returns setof content_mimetype_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, mimetype, encoding,
                i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join content_mimetype c on c.id=t.id
         inner join indexer_configuration i on c.indexer_configuration_id=i.id;
     return;
 end
 $$;
 
 comment on function swh_content_mimetype_get() IS 'List content''s mimetypes';
 
 -- create a temporary table for content_language tmp_content_language,
 create or replace function swh_mktemp_content_language_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_content_language_missing (
     id sha1,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_language_missing() is 'Helper table to filter missing language';
 
 -- check which entries of tmp_bytea are missing from content_language
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_language_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	select id::sha1 from tmp_content_language_missing as tmp
 	where not exists
 	    (select 1 from content_language as c
              where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
     return;
 end
 $$;
 
 comment on function swh_content_language_missing() IS 'Filter missing content languages';
 
 -- add tmp_content_language entries to content_language, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_language_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_content_language, 2. call this function
 create or replace function swh_content_language_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into content_language (id, lang, indexer_configuration_id)
       select id, lang, indexer_configuration_id
     	from tmp_content_language tcl
             on conflict(id, indexer_configuration_id)
                 do update set lang = excluded.lang;
 
     else
         insert into content_language (id, lang, indexer_configuration_id)
         select id, lang, indexer_configuration_id
     	  from tmp_content_language tcl
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_language_add(boolean) IS 'Add new content languages';
 
 -- create a temporary table for retrieving content_language
 create or replace function swh_mktemp_content_language()
     returns void
     language sql
 as $$
   create temporary table tmp_content_language (
     like content_language including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_language() is 'Helper table to add content language';
 
 create type content_language_signature as (
     id sha1,
     lang languages,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of content language from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_content_language_get()
     returns setof content_language_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, lang, i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join content_language c on c.id = t.id
         inner join indexer_configuration i on i.id=c.indexer_configuration_id;
     return;
 end
 $$;
 
 comment on function swh_content_language_get() is 'List content''s language';
 
 
 -- create a temporary table for content_ctags tmp_content_ctags,
 create or replace function swh_mktemp_content_ctags()
     returns void
     language sql
 as $$
   create temporary table tmp_content_ctags (
     like content_ctags including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_ctags() is 'Helper table to add content ctags';
 
 
 -- add tmp_content_ctags entries to content_ctags, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- operates in bulk: 0. swh_mktemp(content_ctags), 1. COPY to tmp_content_ctags,
 -- 2. call this function
 create or replace function swh_content_ctags_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
         delete from content_ctags
         where id in (select tmp.id
                      from tmp_content_ctags tmp
                      inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
     end if;
 
     insert into content_ctags (id, name, kind, line, lang, indexer_configuration_id)
     select id, name, kind, line, lang, indexer_configuration_id
     from tmp_content_ctags tct
         on conflict(id, hash_sha1(name), kind, line, lang, indexer_configuration_id)
         do nothing;
     return;
 end
 $$;
 
 comment on function swh_content_ctags_add(boolean) IS 'Add new ctags symbols per content';
 
 -- create a temporary table for content_ctags missing routine
 create or replace function swh_mktemp_content_ctags_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_content_ctags_missing (
     id           sha1,
     indexer_configuration_id    integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_ctags_missing() is 'Helper table to filter missing content ctags';
 
 -- check which entries of tmp_bytea are missing from content_ctags
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_ctags_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	(select id::sha1 from tmp_content_ctags_missing as tmp
 	 where not exists
 	     (select 1 from content_ctags as c
               where c.id = tmp.id and c.indexer_configuration_id=tmp.indexer_configuration_id
               limit 1));
     return;
 end
 $$;
 
 comment on function swh_content_ctags_missing() IS 'Filter missing content ctags';
 
 create type content_ctags_signature as (
   id sha1,
   name text,
   kind text,
   line bigint,
   lang ctags_languages,
   tool_id integer,
   tool_name text,
   tool_version text,
   tool_configuration jsonb
 );
 
 -- Retrieve list of content ctags from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_content_ctags_get()
     returns setof content_ctags_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, c.name, c.kind, c.line, c.lang,
                i.id as tool_id, i.tool_name, i.tool_version, i.tool_configuration
         from tmp_bytea t
         inner join content_ctags c using(id)
         inner join indexer_configuration i on i.id = c.indexer_configuration_id
         order by line;
     return;
 end
 $$;
 
 comment on function swh_content_ctags_get() IS 'List content ctags';
 
 -- Search within ctags content.
 --
 create or replace function swh_content_ctags_search(
        expression text,
        l integer default 10,
        last_sha1 sha1 default '\x0000000000000000000000000000000000000000')
     returns setof content_ctags_signature
     language sql
 as $$
     select c.id, name, kind, line, lang,
            i.id as tool_id, tool_name, tool_version, tool_configuration
     from content_ctags c
     inner join indexer_configuration i on i.id = c.indexer_configuration_id
     where hash_sha1(name) = hash_sha1(expression)
     and c.id > last_sha1
     order by id
     limit l;
 $$;
 
 comment on function swh_content_ctags_search(text, integer, sha1) IS 'Equality search through ctags'' symbols';
 
 
 -- create a temporary table for content_fossology_license tmp_content_fossology_license,
 create or replace function swh_mktemp_content_fossology_license()
     returns void
     language sql
 as $$
   create temporary table tmp_content_fossology_license (
     id                       sha1,
     license                  text,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_fossology_license() is 'Helper table to add content license';
 
 -- add tmp_content_fossology_license entries to content_fossology_license, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- operates in bulk: 0. swh_mktemp(content_fossology_license), 1. COPY to
 -- tmp_content_fossology_license, 2. call this function
 create or replace function swh_content_fossology_license_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     -- insert unknown licenses first
     insert into fossology_license (name)
     select distinct license from tmp_content_fossology_license tmp
     where not exists (select 1 from fossology_license where name=tmp.license)
     on conflict(name) do nothing;
 
     if conflict_update then
         -- delete from content_fossology_license c
         --   using tmp_content_fossology_license tmp, indexer_configuration i
         --   where c.id = tmp.id and i.id=tmp.indexer_configuration_id
         delete from content_fossology_license
         where id in (select tmp.id
                      from tmp_content_fossology_license tmp
                      inner join indexer_configuration i on i.id=tmp.indexer_configuration_id);
     end if;
 
     insert into content_fossology_license (id, license_id, indexer_configuration_id)
     select tcl.id,
           (select id from fossology_license where name = tcl.license) as license,
           indexer_configuration_id
     from tmp_content_fossology_license tcl
         on conflict(id, license_id, indexer_configuration_id)
         do nothing;
     return;
 end
 $$;
 
 comment on function swh_content_fossology_license_add(boolean) IS 'Add new content licenses';
 
 create type content_fossology_license_signature as (
   id                 sha1,
   tool_id            integer,
   tool_name          text,
   tool_version       text,
   tool_configuration jsonb,
   licenses           text[]
 );
 
 -- Retrieve list of content license from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_fossology_license_get()
     returns setof content_fossology_license_signature
     language plpgsql
 as $$
 begin
     return query
       select cl.id,
              ic.id as tool_id,
              ic.tool_name,
              ic.tool_version,
              ic.tool_configuration,
              array(select name
                    from fossology_license
                    where id = ANY(array_agg(cl.license_id))) as licenses
       from tmp_bytea tcl
       inner join content_fossology_license cl using(id)
       inner join indexer_configuration ic on ic.id=cl.indexer_configuration_id
       group by cl.id, ic.id, ic.tool_name, ic.tool_version, ic.tool_configuration;
     return;
 end
 $$;
 
 comment on function swh_content_fossology_license_get() IS 'List content licenses';
 
 -- content_metadata functions
 --
 -- create a temporary table for content_metadata tmp_content_metadata,
 create or replace function swh_mktemp_content_metadata_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_content_metadata_missing (
     id sha1,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_metadata_missing() is 'Helper table to filter missing metadata in content_metadata';
 
 -- check which entries of tmp_bytea are missing from content_metadata
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_content_metadata_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	select id::sha1 from tmp_content_metadata_missing as tmp
 	where not exists
 	    (select 1 from content_metadata as c
              where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
     return;
 end
 $$;
 
 comment on function swh_content_metadata_missing() IS 'Filter missing content metadata';
 
 -- add tmp_content_metadata entries to content_metadata, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_content_metadata_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_content_metadata, 2. call this function
 create or replace function swh_content_metadata_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into content_metadata (id, translated_metadata, indexer_configuration_id)
       select id, translated_metadata, indexer_configuration_id
     	from tmp_content_metadata tcm
             on conflict(id, indexer_configuration_id)
                 do update set translated_metadata = excluded.translated_metadata;
 
     else
         insert into content_metadata (id, translated_metadata, indexer_configuration_id)
         select id, translated_metadata, indexer_configuration_id
     	from tmp_content_metadata tcm
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_content_metadata_add(boolean) IS 'Add new content metadata';
 
 -- create a temporary table for retrieving content_metadata
 create or replace function swh_mktemp_content_metadata()
     returns void
     language sql
 as $$
   create temporary table tmp_content_metadata (
     like content_metadata including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_content_metadata() is 'Helper table to add content metadata';
 
 --
 create type content_metadata_signature as (
     id sha1,
     translated_metadata jsonb,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of content metadata from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_content_metadata_get()
     returns setof content_metadata_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join content_metadata c on c.id = t.id
         inner join indexer_configuration i on i.id=c.indexer_configuration_id;
     return;
 end
 $$;
 
 comment on function swh_content_metadata_get() is 'List content''s metadata';
 -- end content_metadata functions
 
 -- revision_metadata functions
 --
 -- create a temporary table for revision_metadata tmp_revision_metadata,
 create or replace function swh_mktemp_revision_metadata_missing()
     returns void
     language sql
 as $$
   create temporary table tmp_revision_metadata_missing (
     id sha1_git,
     indexer_configuration_id integer
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_revision_metadata_missing() is 'Helper table to filter missing metadata in revision_metadata';
 
 -- check which entries of tmp_bytea are missing from revision_metadata
 --
 -- operates in bulk: 0. swh_mktemp_bytea(), 1. COPY to tmp_bytea,
 -- 2. call this function
 create or replace function swh_revision_metadata_missing()
     returns setof sha1
     language plpgsql
 as $$
 begin
     return query
 	select id::sha1 from tmp_revision_metadata_missing as tmp
 	where not exists
 	    (select 1 from revision_metadata as c
              where c.id = tmp.id and c.indexer_configuration_id = tmp.indexer_configuration_id);
     return;
 end
 $$;
 
 comment on function swh_revision_metadata_missing() IS 'Filter missing content metadata';
 
 -- add tmp_revision_metadata entries to revision_metadata, overwriting
 -- duplicates if conflict_update is true, skipping duplicates otherwise.
 --
 -- If filtering duplicates is in order, the call to
 -- swh_revision_metadata_missing must take place before calling this
 -- function.
 --
 -- operates in bulk: 0. swh_mktemp(content_language), 1. COPY to
 -- tmp_revision_metadata, 2. call this function
 create or replace function swh_revision_metadata_add(conflict_update boolean)
     returns void
     language plpgsql
 as $$
 begin
     if conflict_update then
       insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
       select id, translated_metadata, indexer_configuration_id
     	from tmp_revision_metadata tcm
             on conflict(id, indexer_configuration_id)
                 do update set translated_metadata = excluded.translated_metadata;
 
     else
         insert into revision_metadata (id, translated_metadata, indexer_configuration_id)
         select id, translated_metadata, indexer_configuration_id
     	from tmp_revision_metadata tcm
             on conflict(id, indexer_configuration_id)
             do nothing;
     end if;
     return;
 end
 $$;
 
 comment on function swh_revision_metadata_add(boolean) IS 'Add new revision metadata';
 
 -- create a temporary table for retrieving revision_metadata
 create or replace function swh_mktemp_revision_metadata()
     returns void
     language sql
 as $$
   create temporary table tmp_revision_metadata (
     like revision_metadata including defaults
   ) on commit drop;
 $$;
 
 comment on function swh_mktemp_revision_metadata() is 'Helper table to add revision metadata';
 
 --
 create type revision_metadata_signature as (
     id sha1_git,
     translated_metadata jsonb,
     tool_id integer,
     tool_name text,
     tool_version text,
     tool_configuration jsonb
 );
 
 -- Retrieve list of revision metadata from the temporary table.
 --
 -- operates in bulk: 0. mktemp(tmp_bytea), 1. COPY to tmp_bytea, 2. call this function
 create or replace function swh_revision_metadata_get()
     returns setof revision_metadata_signature
     language plpgsql
 as $$
 begin
     return query
         select c.id, translated_metadata, i.id as tool_id, tool_name, tool_version, tool_configuration
         from tmp_bytea t
         inner join revision_metadata c on c.id = t.id
         inner join indexer_configuration i on i.id=c.indexer_configuration_id;
     return;
 end
 $$;
 
 create or replace function swh_mktemp_indexer_configuration()
     returns void
     language sql
 as $$
     create temporary table tmp_indexer_configuration (
       like indexer_configuration including defaults
     ) on commit drop;
     alter table tmp_indexer_configuration drop column id;
 $$;
 
 
 -- add tmp_indexer_configuration entries to indexer_configuration,
 -- skipping duplicates if any.
 --
 -- operates in bulk: 0. create temporary tmp_indexer_configuration, 1. COPY to
 -- it, 2. call this function to insert and filtering out duplicates
 create or replace function swh_indexer_configuration_add()
     returns setof indexer_configuration
     language plpgsql
 as $$
 begin
       insert into indexer_configuration(tool_name, tool_version, tool_configuration)
       select tool_name, tool_version, tool_configuration from tmp_indexer_configuration tmp
       on conflict(tool_name, tool_version, tool_configuration) do nothing;
 
       return query
           select id, tool_name, tool_version, tool_configuration
           from tmp_indexer_configuration join indexer_configuration
               using(tool_name, tool_version, tool_configuration);
 
       return;
 end
 $$;
diff --git a/sql/swh-init.sql b/sql/swh-init.sql
index e78ac3c..43774e3 100644
--- a/sql/swh-init.sql
+++ b/sql/swh-init.sql
@@ -1,13 +1,4 @@
 create extension if not exists btree_gist;
 create extension if not exists pgcrypto;
 
 create or replace language plpgsql;
-create or replace language plpython3u;
-
-create or replace function hash_sha1(text)
-returns text
-as $$
-select encode(digest($1, 'sha1'), 'hex')
-$$ language sql strict immutable;
-
-comment on function hash_sha1(text) is 'Compute sha1 hash as text';
diff --git a/sql/upgrades/114.sql b/sql/upgrades/114.sql
deleted file mode 100644
index 7699a6d..0000000
--- a/sql/upgrades/114.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-create sequence origin_metadata_translation_id_seq
-	start with 1
-	increment by 1
-	no maxvalue
-	no minvalue
-	cache 1;
-
-select setval('fossology_license_id_seq', 833, true);
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index 7f5912a..a9e83c6 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 1.0
 Name: swh.indexer
-Version: 0.0.47
+Version: 0.0.48
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Description: UNKNOWN
 Platform: UNKNOWN
diff --git a/swh.indexer.egg-info/SOURCES.txt b/swh.indexer.egg-info/SOURCES.txt
index e096619..ac07ca8 100644
--- a/swh.indexer.egg-info/SOURCES.txt
+++ b/swh.indexer.egg-info/SOURCES.txt
@@ -1,72 +1,71 @@
 .gitignore
 AUTHORS
 LICENSE
 MANIFEST.in
 Makefile
 README
 codemeta.json
 requirements-swh.txt
 requirements.txt
 setup.py
 version.txt
 debian/changelog
 debian/compat
 debian/control
 debian/copyright
 debian/rules
 debian/source/format
 docs/.gitignore
 docs/Makefile
 docs/conf.py
 docs/index.rst
 docs/_static/.placeholder
 docs/_templates/.placeholder
 sql/Makefile
 sql/swh-data.sql
 sql/swh-enums.sql
 sql/swh-func.sql
 sql/swh-indexes.sql
 sql/swh-init.sql
 sql/swh-schema.sql
 sql/bin/db-upgrade
 sql/bin/dot_add_content
 sql/doc/json
 sql/json/.gitignore
 sql/json/Makefile
 sql/json/indexer_configuration.tool_configuration.schema.json
 sql/json/revision_metadata.translated_metadata.json
-sql/upgrades/114.sql
 swh/__init__.py
 swh.indexer.egg-info/PKG-INFO
 swh.indexer.egg-info/SOURCES.txt
 swh.indexer.egg-info/dependency_links.txt
 swh.indexer.egg-info/requires.txt
 swh.indexer.egg-info/top_level.txt
 swh/indexer/__init__.py
 swh/indexer/ctags.py
 swh/indexer/fossology_license.py
 swh/indexer/indexer.py
 swh/indexer/language.py
 swh/indexer/metadata.py
 swh/indexer/metadata_detector.py
 swh/indexer/metadata_dictionary.py
 swh/indexer/mimetype.py
 swh/indexer/orchestrator.py
 swh/indexer/producer.py
 swh/indexer/rehash.py
 swh/indexer/tasks.py
 swh/indexer/storage/__init__.py
 swh/indexer/storage/converters.py
 swh/indexer/storage/db.py
 swh/indexer/storage/api/__init__.py
 swh/indexer/storage/api/client.py
 swh/indexer/storage/api/server.py
 swh/indexer/tests/__init__.py
 swh/indexer/tests/test_language.py
 swh/indexer/tests/test_metadata.py
 swh/indexer/tests/test_mimetype.py
 swh/indexer/tests/test_utils.py
 swh/indexer/tests/storage/__init__.py
 swh/indexer/tests/storage/test_api_client.py
 swh/indexer/tests/storage/test_converters.py
 swh/indexer/tests/storage/test_storage.py
\ No newline at end of file
diff --git a/swh.indexer.egg-info/requires.txt b/swh.indexer.egg-info/requires.txt
index a992fa1..383786d 100644
--- a/swh.indexer.egg-info/requires.txt
+++ b/swh.indexer.egg-info/requires.txt
@@ -1,10 +1,10 @@
 chardet
 click
 file_magic
 pygments
-swh.core>=0.0.27
+swh.core>=0.0.37
 swh.model>=0.0.15
 swh.objstorage>=0.0.13
 swh.scheduler>=0.0.14
-swh.storage>=0.0.93
+swh.storage>=0.0.100
 vcversioner
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
index 5231a12..6950a07 100644
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -1,418 +1,418 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import os
 import logging
 import shutil
 import tempfile
 
 from swh.core.config import SWHConfig
 from swh.objstorage import get_objstorage
 from swh.objstorage.exc import ObjNotFoundError
 from swh.model import hashutil
 from swh.scheduler.utils import get_task
 from swh.indexer.storage import get_indexer_storage, INDEXER_CFG_KEY
 
 
 class DiskIndexer:
     """Mixin intended to be used with other SomethingIndexer classes.
 
        Indexers inheriting from this class are a category of indexers
        which needs the disk for their computations.
 
        Note:
            This expects `self.working_directory` variable defined at
            runtime.
 
     """
     def write_to_temp(self, filename, data):
         """Write the sha1's content in a temporary file.
 
         Args:
             sha1 (str): the sha1 name
             filename (str): one of sha1's many filenames
             data (bytes): the sha1's content to write in temporary
             file
 
         Returns:
             The path to the temporary file created. That file is
             filled in with the raw content's data.
 
         """
         os.makedirs(self.working_directory, exist_ok=True)
         temp_dir = tempfile.mkdtemp(dir=self.working_directory)
         content_path = os.path.join(temp_dir, filename)
 
         with open(content_path, 'wb') as f:
             f.write(data)
 
         return content_path
 
     def cleanup(self, content_path):
         """Remove content_path from working directory.
 
         Args:
             content_path (str): the file to remove
 
         """
         temp_dir = os.path.dirname(content_path)
         shutil.rmtree(temp_dir)
 
 
 class BaseIndexer(SWHConfig,
                   metaclass=abc.ABCMeta):
     """Base class for indexers to inherit from.
 
     The main entry point is the :func:`run` function which is in
     charge of triggering the computations on the batch dict/ids
     received.
 
     Indexers can:
 
     - filter out ids whose data has already been indexed.
     - retrieve ids data from storage or objstorage
     - index this data depending on the object and store the result in
       storage.
 
     To implement a new object type indexer, inherit from the
     BaseIndexer and implement the process of indexation:
 
     :func:`run`:
       object_ids are different depending on object. For example: sha1 for
       content, sha1_git for revision, directory, release, and id for origin
 
     To implement a new concrete indexer, inherit from the object level
     classes: :class:`ContentIndexer`, :class:`RevisionIndexer` (later
     on :class:`OriginIndexer` will also be available)
 
     Then you need to implement the following functions:
 
     :func:`filter`:
       filter out data already indexed (in storage). This function is used by
       the orchestrator and not directly by the indexer
       (cf. swh.indexer.orchestrator.BaseOrchestratorIndexer).
 
     :func:`index_object`:
       compute index on id with data (retrieved from the storage or the
       objstorage by the id key) and return the resulting index computation.
 
     :func:`persist_index_computations`:
       persist the results of multiple index computations in the storage.
 
     The new indexer implementation can also override the following functions:
 
     :func:`prepare`:
       Configuration preparation for the indexer.  When overriding, this must
       call the `super().prepare()` instruction.
 
     :func:`check`:
       Configuration check for the indexer.  When overriding, this must call the
       `super().check()` instruction.
 
     :func:`register_tools`:
       This should return a dict of the tool(s) to use when indexing or
       filtering.
 
     """
     CONFIG = 'indexer/base'
 
     DEFAULT_CONFIG = {
         INDEXER_CFG_KEY: ('dict', {
             'cls': 'remote',
             'args': {
                 'db': 'service=swh-indexer-dev'
             }
         }),
 
         # queue to reschedule if problem (none for no rescheduling,
         # the default)
         'rescheduling_task': ('str', None),
         'objstorage': ('dict', {
             'cls': 'multiplexer',
             'args': {
                 'objstorages': [{
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '0euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '0'}
                         ]
                     }
                 }, {
                     'cls': 'filtered',
                     'args': {
                         'storage_conf': {
                             'cls': 'azure-storage',
                             'args': {
                                 'account_name': '1euwestswh',
                                 'api_secret_key': 'secret',
                                 'container_name': 'contents'
                             }
                         },
                         'filters_conf': [
                             {'type': 'readonly'},
                             {'type': 'prefix', 'prefix': '1'}
                         ]
                     }
                 }]
             },
         }),
     }
 
     ADDITIONAL_CONFIG = {}
 
     def __init__(self):
         """Prepare and check that the indexer is ready to run.
 
         """
         super().__init__()
         self.prepare()
         self.check()
 
     def prepare(self):
         """Prepare the indexer's needed runtime configuration.
            Without this step, the indexer cannot possibly run.
 
         """
         self.config = self.parse_config_file(
             additional_configs=[self.ADDITIONAL_CONFIG])
         objstorage = self.config['objstorage']
         self.objstorage = get_objstorage(objstorage['cls'], objstorage['args'])
         idx_storage = self.config[INDEXER_CFG_KEY]
         self.idx_storage = get_indexer_storage(**idx_storage)
         rescheduling_task = self.config['rescheduling_task']
         if rescheduling_task:
             self.rescheduling_task = get_task(rescheduling_task)
         else:
             self.rescheduling_task = None
 
-        l = logging.getLogger('requests.packages.urllib3.connectionpool')
-        l.setLevel(logging.WARN)
+        _log = logging.getLogger('requests.packages.urllib3.connectionpool')
+        _log.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = list(self.register_tools(self.config['tools']))
 
     def check(self):
         """Check the indexer's configuration is ok before proceeding.
            If ok, does nothing. If not raise error.
 
         """
         if not self.tools:
             raise ValueError('Tools %s is unknown, cannot continue' %
                              self.tools)
 
     def _prepare_tool(self, tool):
         """Prepare the tool dict to be compliant with the storage api.
 
         """
         return {'tool_%s' % key: value for key, value in tool.items()}
 
     def register_tools(self, tools):
         """Permit to register tools to the storage.
 
            Add a sensible default which can be overridden if not
            sufficient.  (For now, all indexers use only one tool)
 
            Expects the self.config['tools'] property to be set with
            one or more tools.
 
         Args:
             tools (dict/[dict]): Either a dict or a list of dict.
 
         Returns:
             List of dict with additional id key.
 
         Raises:
             ValueError if not a list nor a dict.
 
         """
         tools = self.config['tools']
         if isinstance(tools, list):
             tools = map(self._prepare_tool, tools)
         elif isinstance(tools, dict):
             tools = [self._prepare_tool(tools)]
         else:
             raise ValueError('Configuration tool(s) must be a dict or list!')
 
         return self.idx_storage.indexer_configuration_add(tools)
 
     @abc.abstractmethod
     def filter(self, ids):
         """Filter missing ids for that particular indexer.
 
         Args:
             ids ([bytes]): list of ids
 
         Yields:
             iterator of missing ids
 
         """
         pass
 
     @abc.abstractmethod
     def index(self, id, data):
         """Index computation for the id and associated raw data.
 
         Args:
             id (bytes): identifier
             data (bytes): id's data from storage or objstorage depending on
                              object type
 
         Returns:
             a dict that makes sense for the persist_index_computations
         function.
 
         """
         pass
 
     @abc.abstractmethod
     def persist_index_computations(self, results, policy_update):
         """Persist the computation resulting from the index.
 
         Args:
 
             results ([result]): List of results. One result is the
                                 result of the index function.
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         Returns:
             None
 
         """
         pass
 
     def next_step(self, results):
         """Do something else with computations results (e.g. send to another
         queue, ...).
 
         (This is not an abstractmethod since it is optional).
 
         Args:
             results ([result]): List of results (dict) as returned
                                 by index function.
 
         Returns:
             None
 
         """
         pass
 
     @abc.abstractmethod
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieves the data from the storage
         - executes the indexing computations
         - stores the results (according to policy_update)
 
         Args:
             ids ([bytes]): id's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         pass
 
 
 class ContentIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements the process of indexation for Contents using the run
     method
 
     Note: the :class:`ContentIndexer` is not an instantiable
     object. To use it in another context, one should inherit from this
     class and override the methods mentioned in the
     :class:`BaseIndexer` class.
 
     """
 
     def run(self, ids, policy_update):
         """Given a list of ids:
 
         - retrieve the content from the storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         try:
             for sha1 in ids:
                 try:
                     raw_content = self.objstorage.get(sha1)
                 except ObjNotFoundError:
                     self.log.warn('Content %s not found in objstorage' %
                                   hashutil.hash_to_hex(sha1))
                     continue
                 res = self.index(sha1, raw_content)
                 if res:  # If no results, skip it
                     results.append(res)
 
             self.persist_index_computations(results, policy_update)
             self.next_step(results)
         except Exception:
             self.log.exception(
                 'Problem when reading contents metadata.')
             if self.rescheduling_task:
                 self.log.warn('Rescheduling batch')
                 self.rescheduling_task.delay(ids, policy_update)
 
 
 class RevisionIndexer(BaseIndexer):
     """An object type indexer, inherits from the :class:`BaseIndexer` and
     implements the process of indexation for Revisions using the run
     method
 
     Note: the :class:`RevisionIndexer` is not an instantiable object.
     To use it in another context one should inherit from this class
     and override the methods mentioned in the :class:`BaseIndexer`
     class.
 
     """
     def run(self, ids, policy_update):
         """Given a list of sha1_gits:
 
         - retrieve revisions from storage
         - execute the indexing computations
         - store the results (according to policy_update)
 
         Args:
             ids ([bytes]): sha1_git's identifier list
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
                                    respectively update duplicates or ignore
                                    them
 
         """
         results = []
         revs = self.storage.revision_get(ids)
 
         for rev in revs:
             if not rev:
                 self.log.warn('Revisions %s not found in storage' %
                               list(map(hashutil.hash_to_hex, ids)))
                 continue
             try:
                 res = self.index(rev)
                 if res:  # If no results, skip it
                     results.append(res)
             except Exception:
                 self.log.exception(
                         'Problem when processing revision')
         self.persist_index_computations(results, policy_update)
diff --git a/swh/indexer/language.py b/swh/indexer/language.py
index 6433977..044d80a 100644
--- a/swh/indexer/language.py
+++ b/swh/indexer/language.py
@@ -1,208 +1,208 @@
 # Copyright (C) 2016-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import io
 
 from pygments.lexers import guess_lexer
 from pygments.util import ClassNotFound
 from chardet.universaldetector import UniversalDetector
 
 from .indexer import ContentIndexer
 
 
 def _cleanup_classname(classname):
     """Determine the language from the pygments' lexer names.
 
     """
     return classname.lower().replace(' ', '-')
 
 
 def _read_raw(raw_content, size=2048):
     """Read raw content in chunk.
 
     """
     bs = io.BytesIO(raw_content)
     while True:
         chunk = bs.read(size)
         if not chunk:
             break
         yield chunk
 
 
 def _detect_encoding(raw_content):
     """Given a raw content, try and detect its encoding.
 
     """
     detector = UniversalDetector()
     for chunk in _read_raw(raw_content):
         detector.feed(chunk)
         if detector.done:
             break
     detector.close()
     return detector.result['encoding']
 
 
 def compute_language_from_chunk(encoding, length, raw_content, max_size,
                                 log=None):
     """Determine the raw content's language.
 
     Args:
         encoding (str): Encoding to use to decode the content
         length (int): raw_content's length
         raw_content (bytes): raw content to work with
         max_size (int): max size to split the raw content at
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         if max_size <= length:
             raw_content = raw_content[0:max_size]
 
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except UnicodeDecodeError:
         raise
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 def compute_language(raw_content, encoding=None, log=None):
     """Determine the raw content's language.
 
     Args:
         raw_content (bytes): raw content to work with
 
     Returns:
         Dict with keys:
         - lang: None if nothing found or the possible language
 
     """
     try:
         encoding = _detect_encoding(raw_content)
         content = raw_content.decode(encoding)
         lang = _cleanup_classname(
             guess_lexer(content).name)
     except ClassNotFound:
         lang = None
     except Exception:
         if log:
             log.exception('Problem during language detection, skipping')
         lang = None
     return {
         'lang': lang
     }
 
 
 class ContentLanguageIndexer(ContentIndexer):
     """Indexer in charge of:
 
     - filtering out content already indexed
     - reading content from objstorage per the content's id (sha1)
     - computing {mimetype, encoding} from that content
     - store result in storage
 
     """
     CONFIG_BASE_FILENAME = 'indexer/language'
 
     ADDITIONAL_CONFIG = {
         'tools': ('dict', {
             'name': 'pygments',
             'version': '2.0.1+dfsg-1.1+deb8u1',
             'configuration': {
                 'type': 'library',
                 'debian-package': 'python3-pygments',
                 'max_content_size': 10240,
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         c = self.config
         self.max_content_size = c['tools']['configuration']['max_content_size']
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.content_language_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id']
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             A dict, representing a content_mimetype, with keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'lang': None,
         }
 
         encoding = _detect_encoding(data)
 
         if not encoding:
             return result
 
-        l = len(data)
+        _len = len(data)
         for i in range(0, 9):
             max_size = self.max_content_size + i
 
             try:
                 result = compute_language_from_chunk(
-                    encoding, l, data, max_size, log=self.log)
+                    encoding, _len, data, max_size, log=self.log)
             except UnicodeDecodeError:
                 self.log.warn('Decoding failed on wrong byte chunk at [0-%s]'
                               ', trying again at next ending byte.' % max_size)
                 continue
 
             # we found something, so we return it
             result.update({
                 'id': id,
                 'indexer_configuration_id': self.tool['id'],
             })
             break
 
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - lang (bytes): detected language
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_language_add(
             results, conflict_update=(policy_update == 'update-dups'))
diff --git a/swh/indexer/metadata.py b/swh/indexer/metadata.py
index fd44a5f..25d2332 100644
--- a/swh/indexer/metadata.py
+++ b/swh/indexer/metadata.py
@@ -1,300 +1,300 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import click
 import logging
 
 from swh.indexer.indexer import ContentIndexer, RevisionIndexer
 from swh.indexer.metadata_dictionary import compute_metadata
 from swh.indexer.metadata_detector import detect_metadata
 from swh.indexer.metadata_detector import extract_minimal_metadata_dict
 from swh.indexer.storage import INDEXER_CFG_KEY
 
 from swh.model import hashutil
 
 
 class ContentMetadataIndexer(ContentIndexer):
     """Content-level indexer
 
     This indexer is in charge of:
 
     - filtering out content already indexed in content_metadata
     - reading content from objstorage with the content's id sha1
     - computing translated_metadata by given context
     - using the metadata_dictionary as the 'swh-metadata-translator' tool
     - store result in content_metadata table
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     def __init__(self, tool, config):
         # twisted way to use the exact same config of RevisionMetadataIndexer
         # object that uses internally ContentMetadataIndexer
         self.config = config
         self.config['tools'] = tool
         super().__init__()
 
     def prepare(self):
         self.results = []
         if self.config[INDEXER_CFG_KEY]:
             self.idx_storage = self.config[INDEXER_CFG_KEY]
         if self.config['objstorage']:
             self.objstorage = self.config['objstorage']
-        l = logging.getLogger('requests.packages.urllib3.connectionpool')
-        l.setLevel(logging.WARN)
+        _log = logging.getLogger('requests.packages.urllib3.connectionpool')
+        _log.setLevel(logging.WARN)
         self.log = logging.getLogger('swh.indexer')
         self.tools = self.register_tools(self.config['tools'])
         # NOTE: only one tool so far, change when no longer true
         self.tool = self.tools[0]
 
     def filter(self, ids):
         """Filter out known sha1s and return only missing ones.
         """
         yield from self.idx_storage.content_metadata_missing((
             {
                 'id': sha1,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1 in ids
         ))
 
     def index(self, id, data):
         """Index sha1s' content and store result.
 
         Args:
             id (bytes): content's identifier
             data (bytes): raw content in bytes
 
         Returns:
             dict: dictionary representing a content_metadata. If the
             translation wasn't successful the translated_metadata keys will
             be returned as None
 
         """
         result = {
             'id': id,
             'indexer_configuration_id': self.tool['id'],
             'translated_metadata': None
         }
         try:
             context = self.tool['tool_configuration']['context']
             result['translated_metadata'] = compute_metadata(context, data)
             # a twisted way to keep result with indexer object for get_results
             self.results.append(result)
-        except:
+        except Exception:
             self.log.exception(
                 "Problem during tool retrieval of metadata translation")
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_metadata, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - translated_metadata (jsonb): detected metadata
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         self.idx_storage.content_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def get_results(self):
         """can be called only if run method was called before
 
         Returns:
             list: list of content_metadata entries calculated by
                   current indexer
 
         """
         return self.results
 
 
 class RevisionMetadataIndexer(RevisionIndexer):
     """Revision-level indexer
 
     This indexer is in charge of:
 
     - filtering revisions already indexed in revision_metadata table with
       defined computation tool
     - retrieve all entry_files in root directory
     - use metadata_detector for file_names containig metadata
     - compute metadata translation if necessary and possible (depends on tool)
     - send sha1s to content indexing if possible
     - store the results for revision
 
     """
     CONFIG_BASE_FILENAME = 'indexer/metadata'
 
     ADDITIONAL_CONFIG = {
         'storage': ('dict', {
             'cls': 'remote',
             'args': {
                 'url': 'http://localhost:5002/',
             }
         }),
         'tools': ('dict', {
             'name': 'swh-metadata-detector',
             'version': '0.0.1',
             'configuration': {
                 'type': 'local',
                 'context': ['npm', 'codemeta']
             },
         }),
     }
 
     def prepare(self):
         super().prepare()
         self.tool = self.tools[0]
 
     def filter(self, sha1_gits):
         """Filter out known sha1s and return only missing ones.
 
         """
         yield from self.idx_storage.revision_metadata_missing((
             {
                 'id': sha1_git,
                 'indexer_configuration_id': self.tool['id'],
             } for sha1_git in sha1_gits
         ))
 
     def index(self, rev):
         """Index rev by processing it and organizing result.
 
         use metadata_detector to iterate on filenames
 
         - if one filename detected -> sends file to content indexer
         - if multiple file detected -> translation needed at revision level
 
         Args:
           rev (bytes): revision artifact from storage
 
         Returns:
             dict: dictionary representing a revision_metadata, with keys:
 
                 - id (bytes): rev's identifier (sha1_git)
                 - indexer_configuration_id (bytes): tool used
                 - translated_metadata (bytes): dict of retrieved metadata
 
         """
         try:
             result = {
                 'id': rev['id'],
                 'indexer_configuration_id': self.tool['id'],
                 'translated_metadata': None
             }
 
             root_dir = rev['directory']
             dir_ls = self.storage.directory_ls(root_dir, recursive=False)
             files = (entry for entry in dir_ls if entry['type'] == 'file')
             detected_files = detect_metadata(files)
             result['translated_metadata'] = self.translate_revision_metadata(
                                                                 detected_files)
         except Exception as e:
             self.log.exception(
                 'Problem when indexing rev')
         return result
 
     def persist_index_computations(self, results, policy_update):
         """Persist the results in storage.
 
         Args:
             results ([dict]): list of content_mimetype, dict with the
             following keys:
               - id (bytes): content's identifier (sha1)
               - mimetype (bytes): mimetype in bytes
               - encoding (bytes): encoding in bytes
             policy_update ([str]): either 'update-dups' or 'ignore-dups' to
             respectively update duplicates or ignore them
 
         """
         # TODO: add functions in storage to keep data in revision_metadata
         self.idx_storage.revision_metadata_add(
             results, conflict_update=(policy_update == 'update-dups'))
 
     def translate_revision_metadata(self, detected_files):
         """
         Determine plan of action to translate metadata when containing
         one or multiple detected files:
 
         Args:
             detected_files (dict): dictionary mapping context names (e.g.,
               "npm", "authors") to list of sha1
 
         Returns:
             dict: dict with translated metadata according to the CodeMeta
             vocabulary
 
         """
         translated_metadata = []
         tool = {
                 'name': 'swh-metadata-translator',
                 'version': '0.0.1',
                 'configuration': {
                     'type': 'local',
                     'context': None
                 },
             }
         # TODO: iterate on each context, on each file
         # -> get raw_contents
         # -> translate each content
         config = {
             INDEXER_CFG_KEY: self.idx_storage,
             'objstorage': self.objstorage
         }
         for context in detected_files.keys():
             tool['configuration']['context'] = context
             c_metadata_indexer = ContentMetadataIndexer(tool, config)
             # sha1s that are in content_metadata table
             sha1s_in_storage = []
             metadata_generator = self.idx_storage.content_metadata_get(
                 detected_files[context])
             for c in metadata_generator:
                 # extracting translated_metadata
                 sha1 = c['id']
                 sha1s_in_storage.append(sha1)
                 local_metadata = c['translated_metadata']
                 # local metadata is aggregated
                 if local_metadata:
                     translated_metadata.append(local_metadata)
 
             sha1s_filtered = [item for item in detected_files[context]
                               if item not in sha1s_in_storage]
 
             if sha1s_filtered:
                 # schedule indexation of content
                 try:
                     c_metadata_indexer.run(sha1s_filtered,
                                            policy_update='ignore-dups')
                     # on the fly possibility:
                     results = c_metadata_indexer.get_results()
 
                     for result in results:
                         local_metadata = result['translated_metadata']
                         translated_metadata.append(local_metadata)
 
                 except Exception as e:
                     self.log.warn("""Exception while indexing content""", e)
 
         # transform translated_metadata into min set with swh-metadata-detector
         min_metadata = extract_minimal_metadata_dict(translated_metadata)
         return min_metadata
 
 
 @click.command()
 @click.option('--revs', '-i',
               default=['8dbb6aeb036e7fd80664eb8bfd1507881af1ba9f',
                        '026040ea79dec1b49b4e3e7beda9132b6b26b51b',
                        '9699072e21eded4be8d45e3b8d543952533fa190'],
               help='Default sha1_git to lookup', multiple=True)
 def main(revs):
     _git_sha1s = list(map(hashutil.hash_to_bytes, revs))
     rev_metadata_indexer = RevisionMetadataIndexer()
     rev_metadata_indexer.run(_git_sha1s, 'update-dups')
 
 
 if __name__ == '__main__':
     logging.basicConfig(level=logging.INFO)
     main()
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
index 56a0e54..3ed9fc5 100644
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -1,210 +1,210 @@
 # Copyright (C) 2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import json
 
 
 def convert(raw_content):
     """
     convert raw_content recursively:
 
     - from bytes to string
     - from string to dict
 
     Args:
         raw_content (bytes / string / dict)
 
     Returns:
         dict: content (if string was json, otherwise returns string)
 
     """
     if isinstance(raw_content, bytes):
         return convert(raw_content.decode())
     if isinstance(raw_content, str):
         try:
             content = json.loads(raw_content)
             if content:
                 return content
             else:
                 return raw_content
         except json.decoder.JSONDecodeError:
             return raw_content
     if isinstance(raw_content, dict):
         return raw_content
 
 
 class BaseMapping():
     """Base class for mappings to inherit from
 
     To implement a new mapping:
 
     - inherit this class
     - add a local property self.mapping
     - override translate function
     """
 
     def translate(self, content_dict):
         """
         Tranlsates content  by parsing content to a json object
         and translating with the npm mapping (for now hard_coded mapping)
 
         Args:
             context_text (text): should be json
 
         Returns:
             dict: translated metadata in jsonb form needed for the indexer
 
         """
         translated_metadata = {}
         default = 'other'
         translated_metadata['other'] = {}
         try:
             for k, v in content_dict.items():
                 try:
                     term = self.mapping.get(k, default)
                     if term not in translated_metadata:
                         translated_metadata[term] = v
                         continue
                     if isinstance(translated_metadata[term], str):
                         in_value = translated_metadata[term]
                         translated_metadata[term] = [in_value, v]
                         continue
                     if isinstance(translated_metadata[term], list):
                         translated_metadata[term].append(v)
                         continue
                     if isinstance(translated_metadata[term], dict):
                         translated_metadata[term][k] = v
                         continue
                 except KeyError:
                     self.log.exception(
                         "Problem during item mapping")
                     continue
-        except:
+        except Exception:
             return None
         return translated_metadata
 
 
 class NpmMapping(BaseMapping):
     """
     dedicated class for NPM (package.json) mapping and translation
     """
     mapping = {
         'repository': 'codeRepository',
         'os': 'operatingSystem',
         'cpu': 'processorRequirements',
         'engines': 'processorRequirements',
         'dependencies': 'softwareRequirements',
         'bundleDependencies': 'softwareRequirements',
         'peerDependencies': 'softwareRequirements',
         'author': 'author',
         'contributor': 'contributor',
         'keywords': 'keywords',
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'devDependencies': 'softwareSuggestions',
         'optionalDependencies': 'softwareSuggestions',
         'bugs': 'issueTracker',
         'homepage': 'url'
     }
 
     def translate(self, raw_content):
         content_dict = convert(raw_content)
         return super().translate(content_dict)
 
 
 class MavenMapping(BaseMapping):
     """
     dedicated class for Maven (pom.xml) mapping and translation
     """
     mapping = {
         'license': 'license',
         'version': 'version',
         'description': 'description',
         'name': 'name',
         'prerequisites': 'softwareRequirements',
         'repositories': 'codeRepository',
         'groupId': 'identifier',
         'ciManagement': 'contIntegration',
         'issuesManagement': 'issueTracker',
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 class DoapMapping(BaseMapping):
     mapping = {
 
     }
 
     def translate(self, raw_content):
         content = convert(raw_content)
         # parse content from xml to dict
         return super().translate(content)
 
 
 def parse_xml(content):
     """
     Parses content from xml to a python dict
     Args:
         - content (text): the string form of the raw_content ( in xml)
 
     Returns:
         - parsed_xml (dict): a python dict of the content after parsing
     """
     # check if xml
     # use xml parser to dict
     return content
 
 
 mapping_tool_fn = {
     "npm": NpmMapping(),
     "maven": MavenMapping(),
     "doap_xml": DoapMapping()
 }
 
 
 def compute_metadata(context, raw_content):
     """
     first landing method: a dispatcher that sends content
     to the right function to carry out the real parsing of syntax
     and translation of terms
 
     Args:
         context (text): defines to which function/tool the content is sent
         content (text): the string form of the raw_content
 
     Returns:
         dict: translated metadata jsonb dictionary needed for the indexer to
           store in storage
 
     """
     if raw_content is None or raw_content is b"":
         return None
 
     # TODO: keep mapping not in code (maybe fetch crosswalk from storage?)
     # if fetched from storage should be done once for batch of sha1s
     dictionary = mapping_tool_fn[context]
     translated_metadata = dictionary.translate(raw_content)
     return translated_metadata
 
 
 def main():
     raw_content = """{"name": "test_name", "unknown_term": "ut"}"""
     raw_content1 = b"""{"name": "test_name",
                         "unknown_term": "ut",
                         "prerequisites" :"packageXYZ"}"""
     result = compute_metadata("npm", raw_content)
     result1 = compute_metadata("maven", raw_content1)
 
     print(result)
     print(result1)
 
 
 if __name__ == "__main__":
     main()
diff --git a/swh/indexer/storage/__init__.py b/swh/indexer/storage/__init__.py
index e71523d..79c4017 100644
--- a/swh/indexer/storage/__init__.py
+++ b/swh/indexer/storage/__init__.py
@@ -1,550 +1,533 @@
 # Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import json
 import dateutil.parser
 import psycopg2
 
 from swh.storage.common import db_transaction_generator, db_transaction
 from swh.storage.exc import StorageDBError
 from .db import Db
 
 from . import converters
 
 
 INDEXER_CFG_KEY = 'indexer_storage'
 
 
 def get_indexer_storage(cls, args):
     """Get an indexer storage object of class `storage_class` with
     arguments `storage_args`.
 
     Args:
         args (dict): dictionary with keys:
         - cls (str): storage's class, either 'local' or 'remote'
         - args (dict): dictionary with keys
 
     Returns:
         an instance of swh.indexer's storage (either local or remote)
 
     Raises:
         ValueError if passed an unknown storage class.
 
     """
     if cls == 'remote':
         from .api.client import RemoteStorage as IndexerStorage
     elif cls == 'local':
         from . import IndexerStorage
     else:
         raise ValueError('Unknown indexer storage class `%s`' % cls)
 
     return IndexerStorage(**args)
 
 
 class IndexerStorage():
     """SWH Indexer Storage
 
     """
     def __init__(self, db):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
             obj_root: path to the root of the object storage
 
         """
         try:
             if isinstance(db, psycopg2.extensions.connection):
-                self.db = Db(db)
+                self._db = Db(db)
             else:
-                self.db = Db.connect(db)
+                self._db = Db.connect(db)
         except psycopg2.OperationalError as e:
             raise StorageDBError(e)
 
+    def get_db(self):
+        return self._db
+
     def check_config(self, *, check_write):
         """Check that the storage is configured and ready to go."""
         # Check permissions on one of the tables
-        with self.db.transaction() as cur:
+        with self.get_db().transaction() as cur:
             if check_write:
                 check = 'INSERT'
             else:
                 check = 'SELECT'
 
             cur.execute(
                 "select has_table_privilege(current_user, 'content_mimetype', %s)",  # noqa
                 (check,)
             )
             return cur.fetchone()[0]
 
         return True
 
     @db_transaction_generator
-    def content_mimetype_missing(self, mimetypes, cur=None):
+    def content_mimetype_missing(self, mimetypes, db=None, cur=None):
         """List mimetypes missing from storage.
 
         Args:
             mimetypes (iterable): iterable of dict with keys:
 
                 - id (bytes): sha1 identifier
                 - tool_name (str): tool used to compute the results
                 - tool_version (str): associated tool's version
 
         Returns:
             iterable: an iterable of missing id for the triplets id, tool_name,
             tool_version
 
         """
-        db = self.db
         db.mktemp_content_mimetype_missing(cur)
         db.copy_to(mimetypes, 'tmp_content_mimetype_missing',
                    ['id', 'indexer_configuration_id'],
                    cur)
         for obj in db.content_mimetype_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction
-    def content_mimetype_add(self, mimetypes, conflict_update=False, cur=None):
+    def content_mimetype_add(self, mimetypes, conflict_update=False, db=None,
+                             cur=None):
         """Add mimetypes not present in storage.
 
         Args:
             mimetypes (iterable): dictionaries with keys:
 
                 - id (bytes): sha1 identifier
                 - mimetype (bytes): raw content's mimetype
                 - encoding (bytes): raw content's encoding
                 - indexer_configuration_id (int): tool's id used to
                   compute the results
                 - conflict_update: Flag to determine if we want to
                   overwrite (true) or skip duplicates (false, the default)
 
         """
-        db = self.db
         db.mktemp_content_mimetype(cur)
         db.copy_to(mimetypes, 'tmp_content_mimetype',
                    ['id', 'mimetype', 'encoding', 'indexer_configuration_id'],
                    cur)
         db.content_mimetype_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator
-    def content_mimetype_get(self, ids, cur=None):
-        db = self.db
+    def content_mimetype_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.content_mimetype_get_from_temp():
             yield converters.db_to_mimetype(
                 dict(zip(db.content_mimetype_cols, c)))
 
     @db_transaction_generator
-    def content_language_missing(self, languages, cur=None):
+    def content_language_missing(self, languages, db=None, cur=None):
         """List languages missing from storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 - id (bytes): sha1 identifier
                 - tool_name (str): tool used to compute the results
                 - tool_version (str): associated tool's version
 
         Returns:
             iterable: identifiers of missing languages
 
         """
-        db = self.db
         db.mktemp_content_language_missing(cur)
         db.copy_to(languages, 'tmp_content_language_missing',
                    ['id', 'indexer_configuration_id'], cur)
         for obj in db.content_language_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction_generator
-    def content_language_get(self, ids, cur=None):
-        db = self.db
+    def content_language_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.content_language_get_from_temp():
             yield converters.db_to_language(
                 dict(zip(db.content_language_cols, c)))
 
     @db_transaction
-    def content_language_add(self, languages, conflict_update=False, cur=None):
+    def content_language_add(self, languages, conflict_update=False, db=None,
+                             cur=None):
         """Add languages not present in storage.
 
         Args:
             languages (iterable): dictionaries with keys:
 
                 - id: sha1
                 - lang: bytes
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
-        db = self.db
         db.mktemp_content_language(cur)
         # empty language is mapped to 'unknown'
         db.copy_to(
             ({
                 'id': l['id'],
                 'lang': 'unknown' if not l['lang'] else l['lang'],
                 'indexer_configuration_id': l['indexer_configuration_id'],
             } for l in languages),
             'tmp_content_language',
             ['id', 'lang', 'indexer_configuration_id'], cur)
 
         db.content_language_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator
-    def content_ctags_missing(self, ctags, cur=None):
+    def content_ctags_missing(self, ctags, db=None, cur=None):
         """List ctags missing from storage.
 
         Args:
             ctags (iterable): dicts with keys:
 
             - id (bytes): sha1 identifier
             - tool_name (str): tool name used
             - tool_version (str): associated version
 
         Returns:
             an iterable of missing id
 
         """
-        db = self.db
-
         db.mktemp_content_ctags_missing(cur)
         db.copy_to(ctags,
                    tblname='tmp_content_ctags_missing',
                    columns=['id', 'indexer_configuration_id'],
                    cur=cur)
         for obj in db.content_ctags_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction_generator
-    def content_ctags_get(self, ids, cur=None):
+    def content_ctags_get(self, ids, db=None, cur=None):
         """Retrieve ctags per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         """
-        db = self.db
         db.store_tmp_bytea(ids, cur)
         for c in db.content_ctags_get_from_temp():
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, c)))
 
     @db_transaction
-    def content_ctags_add(self, ctags, conflict_update=False, cur=None):
+    def content_ctags_add(self, ctags, conflict_update=False, db=None,
+                          cur=None):
         """Add ctags not present in storage
 
         Args:
             ctags (iterable): dictionaries with keys:
 
                 - id (bytes): sha1
                 - ctags ([list): List of dictionary with keys: name, kind,
                   line, language
 
         """
-        db = self.db
-
         def _convert_ctags(__ctags):
             """Convert ctags dict to list of ctags.
 
             """
             for ctags in __ctags:
                 yield from converters.ctags_to_db(ctags)
 
         db.mktemp_content_ctags(cur)
         db.copy_to(list(_convert_ctags(ctags)),
                    tblname='tmp_content_ctags',
                    columns=['id', 'name', 'kind', 'line',
                             'lang', 'indexer_configuration_id'],
                    cur=cur)
 
         db.content_ctags_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator
     def content_ctags_search(self, expression,
-                             limit=10, last_sha1=None, cur=None):
+                             limit=10, last_sha1=None, db=None, cur=None):
         """Search through content's raw ctags symbols.
 
         Args:
             expression (str): Expression to search for
             limit (int): Number of rows to return (default to 10).
             last_sha1 (str): Offset from which retrieving data (default to '').
 
         Yields:
             rows of ctags including id, name, lang, kind, line, etc...
 
         """
-        db = self.db
-
         for obj in db.content_ctags_search(expression, last_sha1, limit,
                                            cur=cur):
             yield converters.db_to_ctags(dict(zip(db.content_ctags_cols, obj)))
 
     @db_transaction_generator
-    def content_fossology_license_get(self, ids, cur=None):
+    def content_fossology_license_get(self, ids, db=None, cur=None):
         """Retrieve licenses per id.
 
         Args:
             ids (iterable): sha1 checksums
 
         Yields:
             list: dictionaries with the following keys:
 
             - id (bytes)
             - licenses ([str]): associated licenses for that content
 
         """
-        db = self.db
         db.store_tmp_bytea(ids, cur)
 
         for c in db.content_fossology_license_get_from_temp():
             license = dict(zip(db.content_fossology_license_cols, c))
             yield converters.db_to_fossology_license(license)
 
     @db_transaction
-    def content_fossology_license_add(self, licenses,
-                                      conflict_update=False, cur=None):
+    def content_fossology_license_add(self, licenses, conflict_update=False,
+                                      db=None, cur=None):
         """Add licenses not present in storage.
 
         Args:
             licenses (iterable): dictionaries with keys:
 
                 - id: sha1
                 - license ([bytes]): List of licenses associated to sha1
                 - tool (str): nomossa
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         Returns:
             list: content_license entries which failed due to unknown licenses
 
         """
-        db = self.db
-
         # Then, we add the correct ones
         db.mktemp_content_fossology_license(cur)
         db.copy_to(
             ({
                 'id': sha1['id'],
                 'indexer_configuration_id': sha1['indexer_configuration_id'],
                 'license': license,
               } for sha1 in licenses
                 for license in sha1['licenses']),
             tblname='tmp_content_fossology_license',
             columns=['id', 'license', 'indexer_configuration_id'],
             cur=cur)
         db.content_fossology_license_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator
-    def content_metadata_missing(self, metadatas, cur=None):
+    def content_metadata_missing(self, metadatas, db=None, cur=None):
         """List metadatas missing from storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                 - id (bytes): sha1 identifier
                 - tool_name (str): tool used to compute the results
                 - tool_version (str): associated tool's version
 
         Returns:
             iterable: missing ids
 
         """
-        db = self.db
         db.mktemp_content_metadata_missing(cur)
         db.copy_to(metadatas, 'tmp_content_metadata_missing',
                    ['id', 'indexer_configuration_id'], cur)
         for obj in db.content_metadata_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction_generator
-    def content_metadata_get(self, ids, cur=None):
-        db = self.db
+    def content_metadata_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.content_metadata_get_from_temp():
             yield converters.db_to_metadata(
                 dict(zip(db.content_metadata_cols, c)))
 
     @db_transaction
-    def content_metadata_add(self, metadatas, conflict_update=False, cur=None):
+    def content_metadata_add(self, metadatas, conflict_update=False, db=None,
+                             cur=None):
         """Add metadatas not present in storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                 - id: sha1
                 - translated_metadata: bytes / jsonb ?
 
             conflict_update: Flag to determine if we want to overwrite (true)
                 or skip duplicates (false, the default)
 
         """
-        db = self.db
         db.mktemp_content_metadata(cur)
         # empty metadata is mapped to 'unknown'
 
         db.copy_to(metadatas, 'tmp_content_metadata',
                    ['id', 'translated_metadata', 'indexer_configuration_id'],
                    cur)
         db.content_metadata_add_from_temp(conflict_update, cur)
 
     @db_transaction_generator
-    def revision_metadata_missing(self, metadatas, cur=None):
+    def revision_metadata_missing(self, metadatas, db=None, cur=None):
         """List metadatas missing from storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                - id (bytes): sha1_git revision identifier
                - tool_name (str): tool used to compute the results
                - tool_version (str): associated tool's version
 
         Returns:
             iterable: missing ids
 
         """
-        db = self.db
         db.mktemp_revision_metadata_missing(cur)
         db.copy_to(metadatas, 'tmp_revision_metadata_missing',
                    ['id', 'indexer_configuration_id'], cur)
         for obj in db.revision_metadata_missing_from_temp(cur):
             yield obj[0]
 
     @db_transaction_generator
-    def revision_metadata_get(self, ids, cur=None):
-        db = self.db
+    def revision_metadata_get(self, ids, db=None, cur=None):
         db.store_tmp_bytea(ids, cur)
         for c in db.revision_metadata_get_from_temp():
             yield converters.db_to_metadata(
                 dict(zip(db.revision_metadata_cols, c)))
 
     @db_transaction
-    def revision_metadata_add(self, metadatas,
-                              conflict_update=False, cur=None):
+    def revision_metadata_add(self, metadatas, conflict_update=False, db=None,
+                              cur=None):
         """Add metadatas not present in storage.
 
         Args:
             metadatas (iterable): dictionaries with keys:
 
                 - id: sha1_git of revision
                 - translated_metadata: bytes / jsonb ?
 
             conflict_update: Flag to determine if we want to overwrite (true)
               or skip duplicates (false, the default)
 
         """
-        db = self.db
         db.mktemp_revision_metadata(cur)
         # empty metadata is mapped to 'unknown'
 
         db.copy_to(metadatas, 'tmp_revision_metadata',
                    ['id', 'translated_metadata', 'indexer_configuration_id'],
                    cur)
         db.revision_metadata_add_from_temp(conflict_update, cur)
 
     @db_transaction
     def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
-                            cur=None):
+                            db=None, cur=None):
         """ Add an origin_metadata for the origin at ts with provenance and
         metadata.
 
         Args:
             origin_id (int): the origin's id for which the metadata is added
             ts (datetime): timestamp of the found metadata
             provider (int): the provider of metadata (ex:'hal')
             tool (int): tool used to extract metadata
             metadata (jsonb): the metadata retrieved at the time and location
 
         Returns:
             id (int): the origin_metadata unique id
         """
         if isinstance(ts, str):
             ts = dateutil.parser.parse(ts)
 
-        return self.db.origin_metadata_add(origin_id, ts, provider, tool,
-                                           metadata, cur)
+        return db.origin_metadata_add(origin_id, ts, provider, tool,
+                                      metadata, cur)
 
     @db_transaction_generator
-    def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None):
+    def origin_metadata_get_by(self, origin_id, provider_type=None, db=None,
+                               cur=None):
         """Retrieve list of all origin_metadata entries for the origin_id
 
         Args:
             origin_id (int): the unique origin identifier
             provider_type (str): (optional) type of provider
 
         Returns:
             list of dicts: the origin_metadata dictionary with the keys:
 
             - id (int): origin_metadata's id
             - origin_id (int): origin's id
             - discovery_date (datetime): timestamp of discovery
             - tool_id (int): metadata's extracting tool
             - metadata (jsonb)
             - provider_id (int): metadata's provider
             - provider_name (str)
             - provider_type (str)
             - provider_url (str)
 
         """
-        db = self.db
         for line in db.origin_metadata_get_by(origin_id, provider_type, cur):
             yield dict(zip(db.origin_metadata_get_cols, line))
 
     @db_transaction_generator
-    def indexer_configuration_add(self, tools, cur=None):
+    def indexer_configuration_add(self, tools, db=None, cur=None):
         """Add new tools to the storage.
 
         Args:
             tools ([dict]): List of dictionary representing tool to
             insert in the db. Dictionary with the following keys::
 
                 tool_name (str): tool's name
                 tool_version (str): tool's version
                 tool_configuration (dict): tool's configuration (free form
                                            dict)
 
         Returns:
             List of dict inserted in the db (holding the id key as
             well).  The order of the list is not guaranteed to match
             the order of the initial list.
 
         """
-        db = self.db
         db.mktemp_indexer_configuration(cur)
         db.copy_to(tools, 'tmp_indexer_configuration',
                    ['tool_name', 'tool_version', 'tool_configuration'],
                    cur)
 
         tools = db.indexer_configuration_add_from_temp(cur)
         for line in tools:
             yield dict(zip(db.indexer_configuration_cols, line))
 
     @db_transaction
-    def indexer_configuration_get(self, tool, cur=None):
+    def indexer_configuration_get(self, tool, db=None, cur=None):
         """Retrieve tool information.
 
         Args:
             tool (dict): Dictionary representing a tool with the
             following keys::
 
                 tool_name (str): tool's name
                 tool_version (str): tool's version
                 tool_configuration (dict): tool's configuration (free form
                                            dict)
 
         Returns:
             The identifier of the tool if it exists, None otherwise.
 
         """
-        db = self.db
         tool_conf = tool['tool_configuration']
         if isinstance(tool_conf, dict):
             tool_conf = json.dumps(tool_conf)
         idx = db.indexer_configuration_get(tool['tool_name'],
                                            tool['tool_version'],
                                            tool_conf)
         if not idx:
             return None
-        return dict(zip(self.db.indexer_configuration_cols, idx))
+        return dict(zip(db.indexer_configuration_cols, idx))
diff --git a/swh/indexer/tests/storage/test_api_client.py b/swh/indexer/tests/storage/test_api_client.py
index 210fcd2..90caa35 100644
--- a/swh/indexer/tests/storage/test_api_client.py
+++ b/swh/indexer/tests/storage/test_api_client.py
@@ -1,38 +1,38 @@
-# Copyright (C) 2015-2017  The Software Heritage developers
+# Copyright (C) 2015-2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
+from swh.core.tests.server_testing import ServerTestFixture
 from swh.indexer.storage import INDEXER_CFG_KEY
 from swh.indexer.storage.api.client import RemoteStorage
 from swh.indexer.storage.api.server import app
 
 from .test_storage import CommonTestStorage
-from swh.storage.tests.server_testing import ServerTestFixture
 
 
 class TestRemoteStorage(CommonTestStorage, ServerTestFixture,
                         unittest.TestCase):
     """Test the indexer's remote storage API.
 
     This class doesn't define any tests as we want identical
     functionality between local and remote storage. All the tests are
     therefore defined in
     `class`:swh.indexer.storage.test_storage.CommonTestStorage.
 
     """
 
     def setUp(self):
         self.config = {
             INDEXER_CFG_KEY: {
                 'cls': 'local',
                 'args': {
                     'db': 'dbname=%s' % self.TEST_STORAGE_DB_NAME,
                 }
             }
         }
         self.app = app
         super().setUp()
         self.storage = RemoteStorage(self.url())
diff --git a/version.txt b/version.txt
index 5f332d1..f70527a 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-v0.0.47-0-g81e4829
\ No newline at end of file
+v0.0.48-0-ga3ce1af
\ No newline at end of file