diff --git a/swh/storage/cassandra/converters.py b/swh/storage/cassandra/converters.py
index a1943011..253ba3cd 100644
--- a/swh/storage/cassandra/converters.py
+++ b/swh/storage/cassandra/converters.py
@@ -1,94 +1,96 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import json
 import attr
 
 from copy import deepcopy
 from typing import Any, Dict, Tuple
 
 from cassandra.cluster import ResultSet
 
 from swh.model.model import (
     ObjectType,
     OriginVisitStatus,
     Revision,
     RevisionType,
     Release,
     Sha1Git,
 )
 from swh.model.hashutil import DEFAULT_ALGORITHMS
 
 from .common import Row
 
 
 def revision_to_db(revision: Revision) -> Dict[str, Any]:
     # we use a deepcopy of the dict because we do not want to recurse the
     # Model->dict conversion (to keep Timestamp & al. entities), BUT we do not
     # want to modify original metadata (embedded in the Model entity), so we
     # non-recursively convert it as a dict but make a deep copy.
     db_revision = deepcopy(attr.asdict(revision, recurse=False))
     metadata = revision.metadata
     extra_headers = revision.extra_headers
     if not extra_headers and metadata and "extra_headers" in metadata:
         extra_headers = db_revision["metadata"].pop("extra_headers")
-    db_revision["metadata"] = json.dumps(db_revision["metadata"])
+    db_revision["metadata"] = json.dumps(
+        dict(db_revision["metadata"] if db_revision["metadata"] is not None else None)
+    )
     db_revision["extra_headers"] = extra_headers
     db_revision["type"] = db_revision["type"].value
     return db_revision
 
 
 def revision_from_db(db_revision: Row, parents: Tuple[Sha1Git]) -> Revision:
     revision = db_revision._asdict()  # type: ignore
     metadata = json.loads(revision.pop("metadata", None))
     extra_headers = revision.pop("extra_headers", ())
     if not extra_headers and metadata and "extra_headers" in metadata:
         extra_headers = metadata.pop("extra_headers")
     if extra_headers is None:
         extra_headers = ()
     return Revision(
         parents=parents,
         type=RevisionType(revision.pop("type")),
         metadata=metadata,
         extra_headers=extra_headers,
         **revision,
     )
 
 
 def release_to_db(release: Release) -> Dict[str, Any]:
     db_release = attr.asdict(release, recurse=False)
     db_release["target_type"] = db_release["target_type"].value
     return db_release
 
 
 def release_from_db(db_release: Row) -> Release:
     release = db_release._asdict()  # type: ignore
     return Release(target_type=ObjectType(release.pop("target_type")), **release,)
 
 
 def row_to_content_hashes(row: Row) -> Dict[str, bytes]:
     """Convert cassandra row to a content hashes
 
     """
     hashes = {}
     for algo in DEFAULT_ALGORITHMS:
         hashes[algo] = getattr(row, algo)
     return hashes
 
 
 def row_to_visit_status(row: ResultSet) -> OriginVisitStatus:
     """Format a row representing a visit_status to an actual dict representing an
     OriginVisitStatus.
 
     """
     return OriginVisitStatus.from_dict(
         {
             **row._asdict(),
             "origin": row.origin,
             "date": row.date.replace(tzinfo=datetime.timezone.utc),
             "metadata": (json.loads(row.metadata) if row.metadata else None),
         }
     )
diff --git a/swh/storage/cassandra/cql.py b/swh/storage/cassandra/cql.py
index 6a2997f8..66a5ce8c 100644
--- a/swh/storage/cassandra/cql.py
+++ b/swh/storage/cassandra/cql.py
@@ -1,1000 +1,1002 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import functools
 import json
 import logging
 import random
 from typing import (
     Any,
     Callable,
     Dict,
     Iterable,
     Iterator,
     List,
     Optional,
     Tuple,
     TypeVar,
     Union,
 )
 
 from cassandra import CoordinationFailure
 from cassandra.cluster import Cluster, EXEC_PROFILE_DEFAULT, ExecutionProfile, ResultSet
 from cassandra.policies import DCAwareRoundRobinPolicy, TokenAwarePolicy
 from cassandra.query import PreparedStatement, BoundStatement
 from tenacity import (
     retry,
     stop_after_attempt,
     wait_random_exponential,
     retry_if_exception_type,
 )
 
 from swh.model.model import (
     Sha1Git,
     TimestampWithTimezone,
     Timestamp,
     Person,
     Content,
     SkippedContent,
     OriginVisit,
     OriginVisitStatus,
     Origin,
 )
 
 from .common import Row, TOKEN_BEGIN, TOKEN_END, hash_url
 from .schema import CREATE_TABLES_QUERIES, HASH_ALGORITHMS
 from .. import extrinsic_metadata
 
 
 logger = logging.getLogger(__name__)
 
 
 _execution_profiles = {
     EXEC_PROFILE_DEFAULT: ExecutionProfile(
         load_balancing_policy=TokenAwarePolicy(DCAwareRoundRobinPolicy())
     ),
 }
 # Configuration for cassandra-driver's access to servers:
 # * hit the right server directly when sending a query (TokenAwarePolicy),
 # * if there's more than one, then pick one at random that's in the same
 #   datacenter as the client (DCAwareRoundRobinPolicy)
 
 
 def create_keyspace(
     hosts: List[str], keyspace: str, port: int = 9042, *, durable_writes=True
 ):
     cluster = Cluster(hosts, port=port, execution_profiles=_execution_profiles)
     session = cluster.connect()
     extra_params = ""
     if not durable_writes:
         extra_params = "AND durable_writes = false"
     session.execute(
         """CREATE KEYSPACE IF NOT EXISTS "%s"
                        WITH REPLICATION = {
                            'class' : 'SimpleStrategy',
                            'replication_factor' : 1
                        } %s;
                     """
         % (keyspace, extra_params)
     )
     session.execute('USE "%s"' % keyspace)
     for query in CREATE_TABLES_QUERIES:
         session.execute(query)
 
 
 T = TypeVar("T")
 
 
 def _prepared_statement(query: str) -> Callable[[Callable[..., T]], Callable[..., T]]:
     """Returns a decorator usable on methods of CqlRunner, to
     inject them with a 'statement' argument, that is a prepared
     statement corresponding to the query.
 
     This only works on methods of CqlRunner, as preparing a
     statement requires a connection to a Cassandra server."""
 
     def decorator(f):
         @functools.wraps(f)
         def newf(self, *args, **kwargs) -> T:
             if f.__name__ not in self._prepared_statements:
                 statement: PreparedStatement = self._session.prepare(query)
                 self._prepared_statements[f.__name__] = statement
             return f(
                 self, *args, **kwargs, statement=self._prepared_statements[f.__name__]
             )
 
         return newf
 
     return decorator
 
 
 def _prepared_insert_statement(table_name: str, columns: List[str]):
     """Shorthand for using `_prepared_statement` for `INSERT INTO`
     statements."""
     return _prepared_statement(
         "INSERT INTO %s (%s) VALUES (%s)"
         % (table_name, ", ".join(columns), ", ".join("?" for _ in columns),)
     )
 
 
 def _prepared_exists_statement(table_name: str):
     """Shorthand for using `_prepared_statement` for queries that only
     check which ids in a list exist in the table."""
     return _prepared_statement(f"SELECT id FROM {table_name} WHERE id IN ?")
 
 
 class CqlRunner:
     """Class managing prepared statements and building queries to be sent
     to Cassandra."""
 
     def __init__(self, hosts: List[str], keyspace: str, port: int):
         self._cluster = Cluster(
             hosts, port=port, execution_profiles=_execution_profiles
         )
         self._session = self._cluster.connect(keyspace)
         self._cluster.register_user_type(
             keyspace, "microtimestamp_with_timezone", TimestampWithTimezone
         )
         self._cluster.register_user_type(keyspace, "microtimestamp", Timestamp)
         self._cluster.register_user_type(keyspace, "person", Person)
 
         self._prepared_statements: Dict[str, PreparedStatement] = {}
 
     ##########################
     # Common utility functions
     ##########################
 
     MAX_RETRIES = 3
 
     @retry(
         wait=wait_random_exponential(multiplier=1, max=10),
         stop=stop_after_attempt(MAX_RETRIES),
         retry=retry_if_exception_type(CoordinationFailure),
     )
     def _execute_with_retries(self, statement, args) -> ResultSet:
         return self._session.execute(statement, args, timeout=1000.0)
 
     @_prepared_statement(
         "UPDATE object_count SET count = count + ? "
         "WHERE partition_key = 0 AND object_type = ?"
     )
     def _increment_counter(
         self, object_type: str, nb: int, *, statement: PreparedStatement
     ) -> None:
         self._execute_with_retries(statement, [nb, object_type])
 
     def _add_one(self, statement, object_type: str, obj, keys: List[str]) -> None:
         self._increment_counter(object_type, 1)
         self._execute_with_retries(statement, [getattr(obj, key) for key in keys])
 
     def _get_random_row(self, statement) -> Optional[Row]:
         """Takes a prepared statement of the form
         "SELECT * FROM <table> WHERE token(<keys>) > ? LIMIT 1"
         and uses it to return a random row"""
         token = random.randint(TOKEN_BEGIN, TOKEN_END)
         rows = self._execute_with_retries(statement, [token])
         if not rows:
             # There are no row with a greater token; wrap around to get
             # the row with the smallest token
             rows = self._execute_with_retries(statement, [TOKEN_BEGIN])
         if rows:
             return rows.one()
         else:
             return None
 
     def _missing(self, statement, ids):
         res = self._execute_with_retries(statement, [ids])
         found_ids = {id_ for (id_,) in res}
         return [id_ for id_ in ids if id_ not in found_ids]
 
     ##########################
     # 'content' table
     ##########################
 
     _content_pk = ["sha1", "sha1_git", "sha256", "blake2s256"]
     _content_keys = [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "ctime",
         "status",
     ]
 
     def _content_add_finalize(self, statement: BoundStatement) -> None:
         """Returned currified by content_add_prepare, to be called when the
         content row should be added to the primary table."""
         self._execute_with_retries(statement, None)
         self._increment_counter("content", 1)
 
     @_prepared_insert_statement("content", _content_keys)
     def content_add_prepare(
         self, content, *, statement
     ) -> Tuple[int, Callable[[], None]]:
         """Prepares insertion of a Content to the main 'content' table.
         Returns a token (to be used in secondary tables), and a function to be
         called to perform the insertion in the main table."""
         statement = statement.bind(
             [getattr(content, key) for key in self._content_keys]
         )
 
         # Type used for hashing keys (usually, it will be
         # cassandra.metadata.Murmur3Token)
         token_class = self._cluster.metadata.token_map.token_class
 
         # Token of the row when it will be inserted. This is equivalent to
         # "SELECT token({', '.join(self._content_pk)}) FROM content WHERE ..."
         # after the row is inserted; but we need the token to insert in the
         # index tables *before* inserting to the main 'content' table
         token = token_class.from_key(statement.routing_key).value
         assert TOKEN_BEGIN <= token <= TOKEN_END
 
         # Function to be called after the indexes contain their respective
         # row
         finalizer = functools.partial(self._content_add_finalize, statement)
 
         return (token, finalizer)
 
     @_prepared_statement(
         "SELECT * FROM content WHERE "
         + " AND ".join(map("%s = ?".__mod__, HASH_ALGORITHMS))
     )
     def content_get_from_pk(
         self, content_hashes: Dict[str, bytes], *, statement
     ) -> Optional[Row]:
         rows = list(
             self._execute_with_retries(
                 statement, [content_hashes[algo] for algo in HASH_ALGORITHMS]
             )
         )
         assert len(rows) <= 1
         if rows:
             return rows[0]
         else:
             return None
 
     @_prepared_statement(
         "SELECT * FROM content WHERE token(" + ", ".join(_content_pk) + ") = ?"
     )
     def content_get_from_token(self, token, *, statement) -> Iterable[Row]:
         return self._execute_with_retries(statement, [token])
 
     @_prepared_statement(
         "SELECT * FROM content WHERE token(%s) > ? LIMIT 1" % ", ".join(_content_pk)
     )
     def content_get_random(self, *, statement) -> Optional[Row]:
         return self._get_random_row(statement)
 
     @_prepared_statement(
         (
             "SELECT token({0}) AS tok, {1} FROM content "
             "WHERE token({0}) >= ? AND token({0}) <= ? LIMIT ?"
         ).format(", ".join(_content_pk), ", ".join(_content_keys))
     )
     def content_get_token_range(
         self, start: int, end: int, limit: int, *, statement
     ) -> Iterable[Row]:
         return self._execute_with_retries(statement, [start, end, limit])
 
     ##########################
     # 'content_by_*' tables
     ##########################
 
     @_prepared_statement("SELECT sha1_git FROM content_by_sha1_git WHERE sha1_git IN ?")
     def content_missing_by_sha1_git(
         self, ids: List[bytes], *, statement
     ) -> List[bytes]:
         return self._missing(statement, ids)
 
     def content_index_add_one(self, algo: str, content: Content, token: int) -> None:
         """Adds a row mapping content[algo] to the token of the Content in
         the main 'content' table."""
         query = (
             f"INSERT INTO content_by_{algo} ({algo}, target_token) " f"VALUES (%s, %s)"
         )
         self._execute_with_retries(query, [content.get_hash(algo), token])
 
     def content_get_tokens_from_single_hash(
         self, algo: str, hash_: bytes
     ) -> Iterable[int]:
         assert algo in HASH_ALGORITHMS
         query = f"SELECT target_token FROM content_by_{algo} WHERE {algo} = %s"
         return (tok for (tok,) in self._execute_with_retries(query, [hash_]))
 
     ##########################
     # 'skipped_content' table
     ##########################
 
     _skipped_content_pk = ["sha1", "sha1_git", "sha256", "blake2s256"]
     _skipped_content_keys = [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "ctime",
         "status",
         "reason",
         "origin",
     ]
     _magic_null_pk = b"<null>"
     """
     NULLs (or all-empty blobs) are not allowed in primary keys; instead use a
     special value that can't possibly be a valid hash.
     """
 
     def _skipped_content_add_finalize(self, statement: BoundStatement) -> None:
         """Returned currified by skipped_content_add_prepare, to be called
         when the content row should be added to the primary table."""
         self._execute_with_retries(statement, None)
         self._increment_counter("skipped_content", 1)
 
     @_prepared_insert_statement("skipped_content", _skipped_content_keys)
     def skipped_content_add_prepare(
         self, content, *, statement
     ) -> Tuple[int, Callable[[], None]]:
         """Prepares insertion of a Content to the main 'skipped_content' table.
         Returns a token (to be used in secondary tables), and a function to be
         called to perform the insertion in the main table."""
 
         # Replace NULLs (which are not allowed in the partition key) with
         # an empty byte string
         content = content.to_dict()
         for key in self._skipped_content_pk:
             if content[key] is None:
                 content[key] = self._magic_null_pk
 
         statement = statement.bind(
             [content.get(key) for key in self._skipped_content_keys]
         )
 
         # Type used for hashing keys (usually, it will be
         # cassandra.metadata.Murmur3Token)
         token_class = self._cluster.metadata.token_map.token_class
 
         # Token of the row when it will be inserted. This is equivalent to
         # "SELECT token({', '.join(self._content_pk)})
         #  FROM skipped_content WHERE ..."
         # after the row is inserted; but we need the token to insert in the
         # index tables *before* inserting to the main 'skipped_content' table
         token = token_class.from_key(statement.routing_key).value
         assert TOKEN_BEGIN <= token <= TOKEN_END
 
         # Function to be called after the indexes contain their respective
         # row
         finalizer = functools.partial(self._skipped_content_add_finalize, statement)
 
         return (token, finalizer)
 
     @_prepared_statement(
         "SELECT * FROM skipped_content WHERE "
         + " AND ".join(map("%s = ?".__mod__, HASH_ALGORITHMS))
     )
     def skipped_content_get_from_pk(
         self, content_hashes: Dict[str, bytes], *, statement
     ) -> Optional[Row]:
         rows = list(
             self._execute_with_retries(
                 statement,
                 [
                     content_hashes[algo] or self._magic_null_pk
                     for algo in HASH_ALGORITHMS
                 ],
             )
         )
         assert len(rows) <= 1
         if rows:
             # TODO: convert _magic_null_pk back to None?
             return rows[0]
         else:
             return None
 
     ##########################
     # 'skipped_content_by_*' tables
     ##########################
 
     def skipped_content_index_add_one(
         self, algo: str, content: SkippedContent, token: int
     ) -> None:
         """Adds a row mapping content[algo] to the token of the SkippedContent
         in the main 'skipped_content' table."""
         query = (
             f"INSERT INTO skipped_content_by_{algo} ({algo}, target_token) "
             f"VALUES (%s, %s)"
         )
         self._execute_with_retries(
             query, [content.get_hash(algo) or self._magic_null_pk, token]
         )
 
     ##########################
     # 'revision' table
     ##########################
 
     _revision_keys = [
         "id",
         "date",
         "committer_date",
         "type",
         "directory",
         "message",
         "author",
         "committer",
         "synthetic",
         "metadata",
         "extra_headers",
     ]
 
     @_prepared_exists_statement("revision")
     def revision_missing(self, ids: List[bytes], *, statement) -> List[bytes]:
         return self._missing(statement, ids)
 
     @_prepared_insert_statement("revision", _revision_keys)
     def revision_add_one(self, revision: Dict[str, Any], *, statement) -> None:
         self._execute_with_retries(
             statement, [revision[key] for key in self._revision_keys]
         )
         self._increment_counter("revision", 1)
 
     @_prepared_statement("SELECT id FROM revision WHERE id IN ?")
     def revision_get_ids(self, revision_ids, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [revision_ids])
 
     @_prepared_statement("SELECT * FROM revision WHERE id IN ?")
     def revision_get(self, revision_ids, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [revision_ids])
 
     @_prepared_statement("SELECT * FROM revision WHERE token(id) > ? LIMIT 1")
     def revision_get_random(self, *, statement) -> Optional[Row]:
         return self._get_random_row(statement)
 
     ##########################
     # 'revision_parent' table
     ##########################
 
     _revision_parent_keys = ["id", "parent_rank", "parent_id"]
 
     @_prepared_insert_statement("revision_parent", _revision_parent_keys)
     def revision_parent_add_one(
         self, id_: Sha1Git, parent_rank: int, parent_id: Sha1Git, *, statement
     ) -> None:
         self._execute_with_retries(statement, [id_, parent_rank, parent_id])
 
     @_prepared_statement("SELECT parent_id FROM revision_parent WHERE id = ?")
     def revision_parent_get(self, revision_id: Sha1Git, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [revision_id])
 
     ##########################
     # 'release' table
     ##########################
 
     _release_keys = [
         "id",
         "target",
         "target_type",
         "date",
         "name",
         "message",
         "author",
         "synthetic",
     ]
 
     @_prepared_exists_statement("release")
     def release_missing(self, ids: List[bytes], *, statement) -> List[bytes]:
         return self._missing(statement, ids)
 
     @_prepared_insert_statement("release", _release_keys)
     def release_add_one(self, release: Dict[str, Any], *, statement) -> None:
         self._execute_with_retries(
             statement, [release[key] for key in self._release_keys]
         )
         self._increment_counter("release", 1)
 
     @_prepared_statement("SELECT * FROM release WHERE id in ?")
     def release_get(self, release_ids: List[str], *, statement) -> None:
         return self._execute_with_retries(statement, [release_ids])
 
     @_prepared_statement("SELECT * FROM release WHERE token(id) > ? LIMIT 1")
     def release_get_random(self, *, statement) -> Optional[Row]:
         return self._get_random_row(statement)
 
     ##########################
     # 'directory' table
     ##########################
 
     _directory_keys = ["id"]
 
     @_prepared_exists_statement("directory")
     def directory_missing(self, ids: List[bytes], *, statement) -> List[bytes]:
         return self._missing(statement, ids)
 
     @_prepared_insert_statement("directory", _directory_keys)
     def directory_add_one(self, directory_id: Sha1Git, *, statement) -> None:
         """Called after all calls to directory_entry_add_one, to
         commit/finalize the directory."""
         self._execute_with_retries(statement, [directory_id])
         self._increment_counter("directory", 1)
 
     @_prepared_statement("SELECT * FROM directory WHERE token(id) > ? LIMIT 1")
     def directory_get_random(self, *, statement) -> Optional[Row]:
         return self._get_random_row(statement)
 
     ##########################
     # 'directory_entry' table
     ##########################
 
     _directory_entry_keys = ["directory_id", "name", "type", "target", "perms"]
 
     @_prepared_insert_statement("directory_entry", _directory_entry_keys)
     def directory_entry_add_one(self, entry: Dict[str, Any], *, statement) -> None:
         self._execute_with_retries(
             statement, [entry[key] for key in self._directory_entry_keys]
         )
 
     @_prepared_statement("SELECT * FROM directory_entry WHERE directory_id IN ?")
     def directory_entry_get(self, directory_ids, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [directory_ids])
 
     ##########################
     # 'snapshot' table
     ##########################
 
     _snapshot_keys = ["id"]
 
     @_prepared_exists_statement("snapshot")
     def snapshot_missing(self, ids: List[bytes], *, statement) -> List[bytes]:
         return self._missing(statement, ids)
 
     @_prepared_insert_statement("snapshot", _snapshot_keys)
     def snapshot_add_one(self, snapshot_id: Sha1Git, *, statement) -> None:
         self._execute_with_retries(statement, [snapshot_id])
         self._increment_counter("snapshot", 1)
 
     @_prepared_statement("SELECT * FROM snapshot WHERE id = ?")
     def snapshot_get(self, snapshot_id: Sha1Git, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [snapshot_id])
 
     @_prepared_statement("SELECT * FROM snapshot WHERE token(id) > ? LIMIT 1")
     def snapshot_get_random(self, *, statement) -> Optional[Row]:
         return self._get_random_row(statement)
 
     ##########################
     # 'snapshot_branch' table
     ##########################
 
     _snapshot_branch_keys = ["snapshot_id", "name", "target_type", "target"]
 
     @_prepared_insert_statement("snapshot_branch", _snapshot_branch_keys)
     def snapshot_branch_add_one(self, branch: Dict[str, Any], *, statement) -> None:
         self._execute_with_retries(
             statement, [branch[key] for key in self._snapshot_branch_keys]
         )
 
     @_prepared_statement(
         "SELECT ascii_bins_count(target_type) AS counts "
         "FROM snapshot_branch "
         "WHERE snapshot_id = ? "
     )
     def snapshot_count_branches(self, snapshot_id: Sha1Git, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [snapshot_id])
 
     @_prepared_statement(
         "SELECT * FROM snapshot_branch WHERE snapshot_id = ? AND name >= ? LIMIT ?"
     )
     def snapshot_branch_get(
         self, snapshot_id: Sha1Git, from_: bytes, limit: int, *, statement
     ) -> None:
         return self._execute_with_retries(statement, [snapshot_id, from_, limit])
 
     ##########################
     # 'origin' table
     ##########################
 
     origin_keys = ["sha1", "url", "type", "next_visit_id"]
 
     @_prepared_statement(
         "INSERT INTO origin (sha1, url, next_visit_id) "
         "VALUES (?, ?, 1) IF NOT EXISTS"
     )
     def origin_add_one(self, origin: Origin, *, statement) -> None:
         self._execute_with_retries(statement, [hash_url(origin.url), origin.url])
         self._increment_counter("origin", 1)
 
     @_prepared_statement("SELECT * FROM origin WHERE sha1 = ?")
     def origin_get_by_sha1(self, sha1: bytes, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [sha1])
 
     def origin_get_by_url(self, url: str) -> ResultSet:
         return self.origin_get_by_sha1(hash_url(url))
 
     @_prepared_statement(
         f'SELECT token(sha1) AS tok, {", ".join(origin_keys)} '
         f"FROM origin WHERE token(sha1) >= ? LIMIT ?"
     )
     def origin_list(self, start_token: int, limit: int, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [start_token, limit])
 
     @_prepared_statement("SELECT * FROM origin")
     def origin_iter_all(self, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [])
 
     @_prepared_statement("SELECT next_visit_id FROM origin WHERE sha1 = ?")
     def _origin_get_next_visit_id(self, origin_sha1: bytes, *, statement) -> int:
         rows = list(self._execute_with_retries(statement, [origin_sha1]))
         assert len(rows) == 1  # TODO: error handling
         return rows[0].next_visit_id
 
     @_prepared_statement(
         "UPDATE origin SET next_visit_id=? WHERE sha1 = ? IF next_visit_id=?"
     )
     def origin_generate_unique_visit_id(self, origin_url: str, *, statement) -> int:
         origin_sha1 = hash_url(origin_url)
         next_id = self._origin_get_next_visit_id(origin_sha1)
         while True:
             res = list(
                 self._execute_with_retries(
                     statement, [next_id + 1, origin_sha1, next_id]
                 )
             )
             assert len(res) == 1
             if res[0].applied:
                 # No data race
                 return next_id
             else:
                 # Someone else updated it before we did, let's try again
                 next_id = res[0].next_visit_id
                 # TODO: abort after too many attempts
 
         return next_id
 
     ##########################
     # 'origin_visit' table
     ##########################
 
     _origin_visit_keys = [
         "origin",
         "visit",
         "type",
         "date",
     ]
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? AND visit > ? "
         "ORDER BY visit ASC"
     )
     def _origin_visit_get_pagination_asc_no_limit(
         self, origin_url: str, last_visit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, last_visit])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? AND visit > ? "
         "ORDER BY visit ASC "
         "LIMIT ?"
     )
     def _origin_visit_get_pagination_asc_limit(
         self, origin_url: str, last_visit: int, limit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, last_visit, limit])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? AND visit < ? "
         "ORDER BY visit DESC"
     )
     def _origin_visit_get_pagination_desc_no_limit(
         self, origin_url: str, last_visit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, last_visit])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? AND visit < ? "
         "ORDER BY visit DESC "
         "LIMIT ?"
     )
     def _origin_visit_get_pagination_desc_limit(
         self, origin_url: str, last_visit: int, limit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, last_visit, limit])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? ORDER BY visit ASC LIMIT ?"
     )
     def _origin_visit_get_no_pagination_asc_limit(
         self, origin_url: str, limit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, limit])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? ORDER BY visit ASC "
     )
     def _origin_visit_get_no_pagination_asc_no_limit(
         self, origin_url: str, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? ORDER BY visit DESC"
     )
     def _origin_visit_get_no_pagination_desc_no_limit(
         self, origin_url: str, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url])
 
     @_prepared_statement(
         "SELECT * FROM origin_visit WHERE origin = ? ORDER BY visit DESC LIMIT ?"
     )
     def _origin_visit_get_no_pagination_desc_limit(
         self, origin_url: str, limit: int, *, statement
     ) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url, limit])
 
     def origin_visit_get(
         self,
         origin_url: str,
         last_visit: Optional[int],
         limit: Optional[int],
         order: str = "asc",
     ) -> ResultSet:
         order = order.lower()
         assert order in ["asc", "desc"]
 
         args: List[Any] = [origin_url]
 
         if last_visit is not None:
             page_name = "pagination"
             args.append(last_visit)
         else:
             page_name = "no_pagination"
 
         if limit is not None:
             limit_name = "limit"
             args.append(limit)
         else:
             limit_name = "no_limit"
 
         method_name = f"_origin_visit_get_{page_name}_{order}_{limit_name}"
         origin_visit_get_method = getattr(self, method_name)
         return origin_visit_get_method(*args)
 
     @_prepared_insert_statement("origin_visit", _origin_visit_keys)
     def origin_visit_add_one(self, visit: OriginVisit, *, statement) -> None:
         self._add_one(statement, "origin_visit", visit, self._origin_visit_keys)
 
     _origin_visit_status_keys = [
         "origin",
         "visit",
         "date",
         "status",
         "snapshot",
         "metadata",
     ]
 
     @_prepared_insert_statement("origin_visit_status", _origin_visit_status_keys)
     def origin_visit_status_add_one(
         self, visit_update: OriginVisitStatus, *, statement
     ) -> None:
         assert self._origin_visit_status_keys[-1] == "metadata"
         keys = self._origin_visit_status_keys
 
-        metadata = json.dumps(visit_update.metadata)
+        metadata = json.dumps(
+            dict(visit_update.metadata) if visit_update.metadata is not None else None
+        )
         self._execute_with_retries(
             statement, [getattr(visit_update, key) for key in keys[:-1]] + [metadata]
         )
 
     def origin_visit_status_get_latest(self, origin: str, visit: int,) -> Optional[Row]:
         """Given an origin visit id, return its latest origin_visit_status
 
          """
         rows = self.origin_visit_status_get(origin, visit)
         return rows[0] if rows else None
 
     @_prepared_statement(
         "SELECT * FROM origin_visit_status "
         "WHERE origin = ? AND visit = ? "
         "ORDER BY date DESC"
     )
     def origin_visit_status_get(
         self,
         origin: str,
         visit: int,
         allowed_statuses: Optional[List[str]] = None,
         require_snapshot: bool = False,
         *,
         statement,
     ) -> List[Row]:
         """Return all origin visit statuses for a given visit
 
         """
         return list(self._execute_with_retries(statement, [origin, visit]))
 
     @_prepared_statement("SELECT * FROM origin_visit WHERE origin = ? AND visit = ?")
     def origin_visit_get_one(
         self, origin_url: str, visit_id: int, *, statement
     ) -> Optional[Row]:
         # TODO: error handling
         rows = list(self._execute_with_retries(statement, [origin_url, visit_id]))
         if rows:
             return rows[0]
         else:
             return None
 
     @_prepared_statement("SELECT * FROM origin_visit WHERE origin = ?")
     def origin_visit_get_all(self, origin_url: str, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [origin_url])
 
     @_prepared_statement("SELECT * FROM origin_visit WHERE token(origin) >= ?")
     def _origin_visit_iter_from(self, min_token: int, *, statement) -> Iterator[Row]:
         yield from self._execute_with_retries(statement, [min_token])
 
     @_prepared_statement("SELECT * FROM origin_visit WHERE token(origin) < ?")
     def _origin_visit_iter_to(self, max_token: int, *, statement) -> Iterator[Row]:
         yield from self._execute_with_retries(statement, [max_token])
 
     def origin_visit_iter(self, start_token: int) -> Iterator[Row]:
         """Returns all origin visits in order from this token,
         and wraps around the token space."""
         yield from self._origin_visit_iter_from(start_token)
         yield from self._origin_visit_iter_to(start_token)
 
     ##########################
     # 'metadata_authority' table
     ##########################
 
     _metadata_authority_keys = ["url", "type", "metadata"]
 
     @_prepared_insert_statement("metadata_authority", _metadata_authority_keys)
     def metadata_authority_add(self, url, type, metadata, *, statement):
         return self._execute_with_retries(statement, [url, type, metadata])
 
     @_prepared_statement("SELECT * from metadata_authority WHERE type = ? AND url = ?")
     def metadata_authority_get(self, type, url, *, statement) -> Optional[Row]:
         return next(iter(self._execute_with_retries(statement, [type, url])), None)
 
     ##########################
     # 'metadata_fetcher' table
     ##########################
 
     _metadata_fetcher_keys = ["name", "version", "metadata"]
 
     @_prepared_insert_statement("metadata_fetcher", _metadata_fetcher_keys)
     def metadata_fetcher_add(self, name, version, metadata, *, statement):
         return self._execute_with_retries(statement, [name, version, metadata])
 
     @_prepared_statement(
         "SELECT * from metadata_fetcher WHERE name = ? AND version = ?"
     )
     def metadata_fetcher_get(self, name, version, *, statement) -> Optional[Row]:
         return next(iter(self._execute_with_retries(statement, [name, version])), None)
 
     #########################
     # 'object_metadata' table
     #########################
 
     _object_metadata_keys = [
         "type",
         "id",
         "authority_type",
         "authority_url",
         "discovery_date",
         "fetcher_name",
         "fetcher_version",
         "format",
         "metadata",
         "origin",
         "visit",
         "snapshot",
         "release",
         "revision",
         "path",
         "directory",
     ]
 
     @_prepared_statement(
         f"INSERT INTO object_metadata ({', '.join(_object_metadata_keys)}) "
         f"VALUES ({', '.join('?' for _ in _object_metadata_keys)})"
     )
     def object_metadata_add(
         self,
         object_type: str,
         id: str,
         authority_type,
         authority_url,
         discovery_date,
         fetcher_name,
         fetcher_version,
         format,
         metadata,
         context: Dict[str, Union[str, bytes, int]],
         *,
         statement,
     ):
         params = [
             object_type,
             id,
             authority_type,
             authority_url,
             discovery_date,
             fetcher_name,
             fetcher_version,
             format,
             metadata,
         ]
 
         params.extend(
             context.get(key) for key in extrinsic_metadata.CONTEXT_KEYS[object_type]
         )
 
         return self._execute_with_retries(statement, params,)
 
     @_prepared_statement(
         "SELECT * from object_metadata "
         "WHERE id=? AND authority_url=? AND discovery_date>? AND authority_type=?"
     )
     def object_metadata_get_after_date(
         self,
         id: str,
         authority_type: str,
         authority_url: str,
         after: datetime.datetime,
         *,
         statement,
     ):
         return self._execute_with_retries(
             statement, [id, authority_url, after, authority_type]
         )
 
     @_prepared_statement(
         "SELECT * from object_metadata "
         "WHERE id=? AND authority_type=? AND authority_url=? "
         "AND (discovery_date, fetcher_name, fetcher_version) > (?, ?, ?)"
     )
     def object_metadata_get_after_date_and_fetcher(
         self,
         id: str,
         authority_type: str,
         authority_url: str,
         after_date: datetime.datetime,
         after_fetcher_name: str,
         after_fetcher_version: str,
         *,
         statement,
     ):
         return self._execute_with_retries(
             statement,
             [
                 id,
                 authority_type,
                 authority_url,
                 after_date,
                 after_fetcher_name,
                 after_fetcher_version,
             ],
         )
 
     @_prepared_statement(
         "SELECT * from object_metadata "
         "WHERE id=? AND authority_url=? AND authority_type=?"
     )
     def object_metadata_get(
         self, id: str, authority_type: str, authority_url: str, *, statement
     ) -> Iterable[Row]:
         return self._execute_with_retries(
             statement, [id, authority_url, authority_type]
         )
 
     ##########################
     # Miscellaneous
     ##########################
 
     @_prepared_statement("SELECT uuid() FROM revision LIMIT 1;")
     def check_read(self, *, statement):
         self._execute_with_retries(statement, [])
 
     @_prepared_statement(
         "SELECT object_type, count FROM object_count WHERE partition_key=0"
     )
     def stat_counters(self, *, statement) -> ResultSet:
         return self._execute_with_retries(statement, [])
diff --git a/swh/storage/db.py b/swh/storage/db.py
index 41e83863..3728ed92 100644
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -1,1288 +1,1292 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 import random
 import select
 from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 
 from swh.core.db import BaseDb
-from swh.core.db.db_utils import stored_procedure, jsonize
+from swh.core.db.db_utils import stored_procedure, jsonize as _jsonize
 from swh.core.db.db_utils import execute_values_generator
 from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE
 
 
+def jsonize(d):
+    return _jsonize(dict(d) if d is not None else None)
+
+
 class Db(BaseDb):
     """Proxy to the SWH DB, with wrappers around stored procedures
 
     """
 
     def mktemp_dir_entry(self, entry_type, cur=None):
         self._cursor(cur).execute(
             "SELECT swh_mktemp_dir_entry(%s)", (("directory_entry_%s" % entry_type),)
         )
 
     @stored_procedure("swh_mktemp_revision")
     def mktemp_revision(self, cur=None):
         pass
 
     @stored_procedure("swh_mktemp_release")
     def mktemp_release(self, cur=None):
         pass
 
     @stored_procedure("swh_mktemp_snapshot_branch")
     def mktemp_snapshot_branch(self, cur=None):
         pass
 
     def register_listener(self, notify_queue, cur=None):
         """Register a listener for NOTIFY queue `notify_queue`"""
         self._cursor(cur).execute("LISTEN %s" % notify_queue)
 
     def listen_notifies(self, timeout):
         """Listen to notifications for `timeout` seconds"""
         if select.select([self.conn], [], [], timeout) == ([], [], []):
             return
         else:
             self.conn.poll()
             while self.conn.notifies:
                 yield self.conn.notifies.pop(0)
 
     @stored_procedure("swh_content_add")
     def content_add_from_temp(self, cur=None):
         pass
 
     @stored_procedure("swh_directory_add")
     def directory_add_from_temp(self, cur=None):
         pass
 
     @stored_procedure("swh_skipped_content_add")
     def skipped_content_add_from_temp(self, cur=None):
         pass
 
     @stored_procedure("swh_revision_add")
     def revision_add_from_temp(self, cur=None):
         pass
 
     @stored_procedure("swh_release_add")
     def release_add_from_temp(self, cur=None):
         pass
 
     def content_update_from_temp(self, keys_to_update, cur=None):
         cur = self._cursor(cur)
         cur.execute(
             """select swh_content_update(ARRAY[%s] :: text[])""" % keys_to_update
         )
 
     content_get_metadata_keys = [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "status",
     ]
 
     content_add_keys = content_get_metadata_keys + ["ctime"]
 
     skipped_content_keys = [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "reason",
         "status",
         "origin",
     ]
 
     def content_get_metadata_from_sha1s(self, sha1s, cur=None):
         cur = self._cursor(cur)
         yield from execute_values_generator(
             cur,
             """
             select t.sha1, %s from (values %%s) as t (sha1)
             inner join content using (sha1)
             """
             % ", ".join(self.content_get_metadata_keys[1:]),
             ((sha1,) for sha1 in sha1s),
         )
 
     def content_get_range(self, start, end, limit=None, cur=None):
         """Retrieve contents within range [start, end].
 
         """
         cur = self._cursor(cur)
         query = """select %s from content
                    where %%s <= sha1 and sha1 <= %%s
                    order by sha1
                    limit %%s""" % ", ".join(
             self.content_get_metadata_keys
         )
         cur.execute(query, (start, end, limit))
         yield from cur
 
     content_hash_keys = ["sha1", "sha1_git", "sha256", "blake2s256"]
 
     def content_missing_from_list(self, contents, cur=None):
         cur = self._cursor(cur)
 
         keys = ", ".join(self.content_hash_keys)
         equality = " AND ".join(
             ("t.%s = c.%s" % (key, key)) for key in self.content_hash_keys
         )
 
         yield from execute_values_generator(
             cur,
             """
             SELECT %s
             FROM (VALUES %%s) as t(%s)
             WHERE NOT EXISTS (
                 SELECT 1 FROM content c
                 WHERE %s
             )
             """
             % (keys, keys, equality),
             (tuple(c[key] for key in self.content_hash_keys) for c in contents),
         )
 
     def content_missing_per_sha1(self, sha1s, cur=None):
         cur = self._cursor(cur)
 
         yield from execute_values_generator(
             cur,
             """
         SELECT t.sha1 FROM (VALUES %s) AS t(sha1)
         WHERE NOT EXISTS (
             SELECT 1 FROM content c WHERE c.sha1 = t.sha1
         )""",
             ((sha1,) for sha1 in sha1s),
         )
 
     def content_missing_per_sha1_git(self, contents, cur=None):
         cur = self._cursor(cur)
 
         yield from execute_values_generator(
             cur,
             """
         SELECT t.sha1_git FROM (VALUES %s) AS t(sha1_git)
         WHERE NOT EXISTS (
             SELECT 1 FROM content c WHERE c.sha1_git = t.sha1_git
         )""",
             ((sha1,) for sha1 in contents),
         )
 
     def skipped_content_missing(self, contents, cur=None):
         if not contents:
             return []
         cur = self._cursor(cur)
 
         query = """SELECT * FROM (VALUES %s) AS t (%s)
                    WHERE not exists
                    (SELECT 1 FROM skipped_content s WHERE
                        s.sha1 is not distinct from t.sha1::sha1 and
                        s.sha1_git is not distinct from t.sha1_git::sha1 and
                        s.sha256 is not distinct from t.sha256::bytea);""" % (
             (", ".join("%s" for _ in contents)),
             ", ".join(self.content_hash_keys),
         )
         cur.execute(
             query,
             [tuple(cont[key] for key in self.content_hash_keys) for cont in contents],
         )
 
         yield from cur
 
     def snapshot_exists(self, snapshot_id, cur=None):
         """Check whether a snapshot with the given id exists"""
         cur = self._cursor(cur)
 
         cur.execute("""SELECT 1 FROM snapshot where id=%s""", (snapshot_id,))
 
         return bool(cur.fetchone())
 
     def snapshot_missing_from_list(self, snapshots, cur=None):
         cur = self._cursor(cur)
         yield from execute_values_generator(
             cur,
             """
             SELECT id FROM (VALUES %s) as t(id)
             WHERE NOT EXISTS (
                 SELECT 1 FROM snapshot d WHERE d.id = t.id
             )
                 """,
             ((id,) for id in snapshots),
         )
 
     def snapshot_add(self, snapshot_id, cur=None):
         """Add a snapshot from the temporary table"""
         cur = self._cursor(cur)
 
         cur.execute("""SELECT swh_snapshot_add(%s)""", (snapshot_id,))
 
     snapshot_count_cols = ["target_type", "count"]
 
     def snapshot_count_branches(self, snapshot_id, cur=None):
         cur = self._cursor(cur)
         query = """\
            SELECT %s FROM swh_snapshot_count_branches(%%s)
         """ % ", ".join(
             self.snapshot_count_cols
         )
 
         cur.execute(query, (snapshot_id,))
 
         yield from cur
 
     snapshot_get_cols = ["snapshot_id", "name", "target", "target_type"]
 
     def snapshot_get_by_id(
         self,
         snapshot_id,
         branches_from=b"",
         branches_count=None,
         target_types=None,
         cur=None,
     ):
         cur = self._cursor(cur)
         query = """\
            SELECT %s
            FROM swh_snapshot_get_by_id(%%s, %%s, %%s, %%s :: snapshot_target[])
         """ % ", ".join(
             self.snapshot_get_cols
         )
 
         cur.execute(query, (snapshot_id, branches_from, branches_count, target_types))
 
         yield from cur
 
     def snapshot_get_by_origin_visit(self, origin_url, visit_id, cur=None):
         cur = self._cursor(cur)
         query = """\
            SELECT ovs.snapshot
            FROM origin_visit ov
            INNER JOIN origin o ON o.id = ov.origin
            INNER JOIN origin_visit_status ovs
              ON ov.origin = ovs.origin AND ov.visit = ovs.visit
            WHERE o.url=%s AND ov.visit=%s
            ORDER BY ovs.date DESC LIMIT 1
         """
 
         cur.execute(query, (origin_url, visit_id))
         ret = cur.fetchone()
         if ret:
             return ret[0]
 
     def snapshot_get_random(self, cur=None):
         return self._get_random_row_from_table("snapshot", ["id"], "id", cur)
 
     content_find_cols = [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "ctime",
         "status",
     ]
 
     def content_find(
         self, sha1=None, sha1_git=None, sha256=None, blake2s256=None, cur=None
     ):
         """Find the content optionally on a combination of the following
         checksums sha1, sha1_git, sha256 or blake2s256.
 
         Args:
             sha1: sha1 content
             git_sha1: the sha1 computed `a la git` sha1 of the content
             sha256: sha256 content
             blake2s256: blake2s256 content
 
         Returns:
             The tuple (sha1, sha1_git, sha256, blake2s256) if found or None.
 
         """
         cur = self._cursor(cur)
 
         checksum_dict = {
             "sha1": sha1,
             "sha1_git": sha1_git,
             "sha256": sha256,
             "blake2s256": blake2s256,
         }
         where_parts = []
         args = []
         # Adds only those keys which have value other than None
         for algorithm in checksum_dict:
             if checksum_dict[algorithm] is not None:
                 args.append(checksum_dict[algorithm])
                 where_parts.append(algorithm + "= %s")
         query = " AND ".join(where_parts)
         cur.execute(
             """SELECT %s
                        FROM content WHERE %s
                        """
             % (",".join(self.content_find_cols), query),
             args,
         )
         content = cur.fetchall()
         return content
 
     def content_get_random(self, cur=None):
         return self._get_random_row_from_table("content", ["sha1_git"], "sha1_git", cur)
 
     def directory_missing_from_list(self, directories, cur=None):
         cur = self._cursor(cur)
         yield from execute_values_generator(
             cur,
             """
             SELECT id FROM (VALUES %s) as t(id)
             WHERE NOT EXISTS (
                 SELECT 1 FROM directory d WHERE d.id = t.id
             )
             """,
             ((id,) for id in directories),
         )
 
     directory_ls_cols = [
         "dir_id",
         "type",
         "target",
         "name",
         "perms",
         "status",
         "sha1",
         "sha1_git",
         "sha256",
         "length",
     ]
 
     def directory_walk_one(self, directory, cur=None):
         cur = self._cursor(cur)
         cols = ", ".join(self.directory_ls_cols)
         query = "SELECT %s FROM swh_directory_walk_one(%%s)" % cols
         cur.execute(query, (directory,))
         yield from cur
 
     def directory_walk(self, directory, cur=None):
         cur = self._cursor(cur)
         cols = ", ".join(self.directory_ls_cols)
         query = "SELECT %s FROM swh_directory_walk(%%s)" % cols
         cur.execute(query, (directory,))
         yield from cur
 
     def directory_entry_get_by_path(self, directory, paths, cur=None):
         """Retrieve a directory entry by path.
 
         """
         cur = self._cursor(cur)
 
         cols = ", ".join(self.directory_ls_cols)
         query = "SELECT %s FROM swh_find_directory_entry_by_path(%%s, %%s)" % cols
         cur.execute(query, (directory, paths))
 
         data = cur.fetchone()
         if set(data) == {None}:
             return None
         return data
 
     def directory_get_random(self, cur=None):
         return self._get_random_row_from_table("directory", ["id"], "id", cur)
 
     def revision_missing_from_list(self, revisions, cur=None):
         cur = self._cursor(cur)
 
         yield from execute_values_generator(
             cur,
             """
             SELECT id FROM (VALUES %s) as t(id)
             WHERE NOT EXISTS (
                 SELECT 1 FROM revision r WHERE r.id = t.id
             )
             """,
             ((id,) for id in revisions),
         )
 
     revision_add_cols = [
         "id",
         "date",
         "date_offset",
         "date_neg_utc_offset",
         "committer_date",
         "committer_date_offset",
         "committer_date_neg_utc_offset",
         "type",
         "directory",
         "message",
         "author_fullname",
         "author_name",
         "author_email",
         "committer_fullname",
         "committer_name",
         "committer_email",
         "metadata",
         "synthetic",
         "extra_headers",
     ]
 
     revision_get_cols = revision_add_cols + ["parents"]
 
     def origin_visit_add(self, origin, ts, type, cur=None):
         """Add a new origin_visit for origin origin at timestamp ts.
 
         Args:
             origin: origin concerned by the visit
             ts: the date of the visit
             type: type of loader for the visit
 
         Returns:
             The new visit index step for that origin
 
         """
         cur = self._cursor(cur)
         self._cursor(cur).execute(
             "SELECT swh_origin_visit_add(%s, %s, %s)", (origin, ts, type)
         )
         return cur.fetchone()[0]
 
     origin_visit_status_cols = [
         "origin",
         "visit",
         "date",
         "status",
         "snapshot",
         "metadata",
     ]
 
     def origin_visit_status_add(
         self, visit_status: OriginVisitStatus, cur=None
     ) -> None:
         """Add new origin visit status
 
         """
         assert self.origin_visit_status_cols[0] == "origin"
         assert self.origin_visit_status_cols[-1] == "metadata"
         cols = self.origin_visit_status_cols[1:-1]
         cur = self._cursor(cur)
         cur.execute(
             f"WITH origin_id as (select id from origin where url=%s) "
             f"INSERT INTO origin_visit_status "
             f"(origin, {', '.join(cols)}, metadata) "
             f"VALUES ((select id from origin_id), "
             f"{', '.join(['%s']*len(cols))}, %s) "
             f"ON CONFLICT (origin, visit, date) do nothing",
             [visit_status.origin]
             + [getattr(visit_status, key) for key in cols]
             + [jsonize(visit_status.metadata)],
         )
 
     def origin_visit_add_with_id(self, origin_visit: OriginVisit, cur=None) -> None:
         """Insert origin visit when id are already set
 
         """
         ov = origin_visit
         assert ov.visit is not None
         cur = self._cursor(cur)
         origin_visit_cols = ["origin", "visit", "date", "type"]
         query = """INSERT INTO origin_visit ({cols})
                    VALUES ((select id from origin where url=%s), {values})
                    ON CONFLICT (origin, visit) DO NOTHING""".format(
             cols=", ".join(origin_visit_cols),
             values=", ".join("%s" for col in origin_visit_cols[1:]),
         )
         cur.execute(query, (ov.origin, ov.visit, ov.date, ov.type))
 
     origin_visit_get_cols = [
         "origin",
         "visit",
         "date",
         "type",
         "status",
         "metadata",
         "snapshot",
     ]
     origin_visit_select_cols = [
         "o.url AS origin",
         "ov.visit",
         "ov.date",
         "ov.type AS type",
         "ovs.status",
         "ovs.metadata",
         "ovs.snapshot",
     ]
 
     origin_visit_status_select_cols = [
         "o.url AS origin",
         "ovs.visit",
         "ovs.date",
         "ovs.status",
         "ovs.snapshot",
         "ovs.metadata",
     ]
 
     def _make_origin_visit_status(
         self, row: Optional[Tuple[Any]]
     ) -> Optional[Dict[str, Any]]:
         """Make an origin_visit_status dict out of a row
 
         """
         if not row:
             return None
         return dict(zip(self.origin_visit_status_cols, row))
 
     def origin_visit_status_get_latest(
         self,
         origin_url: str,
         visit: int,
         allowed_statuses: Optional[List[str]] = None,
         require_snapshot: bool = False,
         cur=None,
     ) -> Optional[Dict[str, Any]]:
         """Given an origin visit id, return its latest origin_visit_status
 
         """
         cur = self._cursor(cur)
 
         query_parts = [
             "SELECT %s" % ", ".join(self.origin_visit_status_select_cols),
             "FROM origin_visit_status ovs ",
             "INNER JOIN origin o ON o.id = ovs.origin",
         ]
         query_parts.append("WHERE o.url = %s")
         query_params: List[Any] = [origin_url]
         query_parts.append("AND ovs.visit = %s")
         query_params.append(visit)
 
         if require_snapshot:
             query_parts.append("AND ovs.snapshot is not null")
 
         if allowed_statuses:
             query_parts.append("AND ovs.status IN %s")
             query_params.append(tuple(allowed_statuses))
 
         query_parts.append("ORDER BY ovs.date DESC LIMIT 1")
         query = "\n".join(query_parts)
 
         cur.execute(query, tuple(query_params))
         row = cur.fetchone()
         return self._make_origin_visit_status(row)
 
     def origin_visit_get_all(
         self, origin_id, last_visit=None, order="asc", limit=None, cur=None
     ):
         """Retrieve all visits for origin with id origin_id.
 
         Args:
             origin_id: The occurrence's origin
 
         Yields:
             The visits for that origin
 
         """
         cur = self._cursor(cur)
         assert order.lower() in ["asc", "desc"]
 
         query_parts = [
             "SELECT DISTINCT ON (ov.visit) %s "
             % ", ".join(self.origin_visit_select_cols),
             "FROM origin_visit ov",
             "INNER JOIN origin o ON o.id = ov.origin",
             "INNER JOIN origin_visit_status ovs",
             "ON ov.origin = ovs.origin AND ov.visit = ovs.visit",
         ]
         query_parts.append("WHERE o.url = %s")
         query_params: List[Any] = [origin_id]
 
         if last_visit is not None:
             op_comparison = ">" if order == "asc" else "<"
             query_parts.append(f"and ov.visit {op_comparison} %s")
             query_params.append(last_visit)
 
         if order == "asc":
             query_parts.append("ORDER BY ov.visit ASC, ovs.date DESC")
         elif order == "desc":
             query_parts.append("ORDER BY ov.visit DESC, ovs.date DESC")
         else:
             assert False
 
         if limit is not None:
             query_parts.append("LIMIT %s")
             query_params.append(limit)
 
         query = "\n".join(query_parts)
         cur.execute(query, tuple(query_params))
         yield from cur
 
     def origin_visit_get(self, origin_id, visit_id, cur=None):
         """Retrieve information on visit visit_id of origin origin_id.
 
         Args:
             origin_id: the origin concerned
             visit_id: The visit step for that origin
 
         Returns:
             The origin_visit information
 
         """
         cur = self._cursor(cur)
 
         query = """\
             SELECT %s
             FROM origin_visit ov
             INNER JOIN origin o ON o.id = ov.origin
             INNER JOIN origin_visit_status ovs
             ON ov.origin = ovs.origin AND ov.visit = ovs.visit
             WHERE o.url = %%s AND ov.visit = %%s
             ORDER BY ovs.date DESC
             LIMIT 1
             """ % (
             ", ".join(self.origin_visit_select_cols)
         )
 
         cur.execute(query, (origin_id, visit_id))
         r = cur.fetchall()
         if not r:
             return None
         return r[0]
 
     def origin_visit_find_by_date(self, origin, visit_date, cur=None):
         cur = self._cursor(cur)
         cur.execute(
             "SELECT * FROM swh_visit_find_by_date(%s, %s)", (origin, visit_date)
         )
         rows = cur.fetchall()
         if rows:
             visit = dict(zip(self.origin_visit_get_cols, rows[0]))
             visit["origin"] = origin
             return visit
 
     def origin_visit_exists(self, origin_id, visit_id, cur=None):
         """Check whether an origin visit with the given ids exists"""
         cur = self._cursor(cur)
 
         query = "SELECT 1 FROM origin_visit where origin = %s AND visit = %s"
 
         cur.execute(query, (origin_id, visit_id))
 
         return bool(cur.fetchone())
 
     def origin_visit_get_latest(
         self,
         origin_id: str,
         type: Optional[str],
         allowed_statuses: Optional[Iterable[str]],
         require_snapshot: bool,
         cur=None,
     ):
         """Retrieve the most recent origin_visit of the given origin,
         with optional filters.
 
         Args:
             origin_id: the origin concerned
             type: Optional visit type to filter on
             allowed_statuses: the visit statuses allowed for the returned visit
             require_snapshot (bool): If True, only a visit with a known
                 snapshot will be returned.
 
         Returns:
             The origin_visit information, or None if no visit matches.
         """
         cur = self._cursor(cur)
 
         query_parts = [
             "SELECT %s" % ", ".join(self.origin_visit_select_cols),
             "FROM origin_visit ov ",
             "INNER JOIN origin o ON o.id = ov.origin",
             "INNER JOIN origin_visit_status ovs ",
             "ON o.id = ovs.origin AND ov.visit = ovs.visit ",
         ]
         query_parts.append("WHERE o.url = %s")
         query_params: List[Any] = [origin_id]
 
         if type is not None:
             query_parts.append("AND ov.type = %s")
             query_params.append(type)
 
         if require_snapshot:
             query_parts.append("AND ovs.snapshot is not null")
 
         if allowed_statuses:
             query_parts.append("AND ovs.status IN %s")
             query_params.append(tuple(allowed_statuses))
 
         query_parts.append(
             "ORDER BY ov.date DESC, ov.visit DESC, ovs.date DESC LIMIT 1"
         )
 
         query = "\n".join(query_parts)
 
         cur.execute(query, tuple(query_params))
         r = cur.fetchone()
         if not r:
             return None
         return r
 
     def origin_visit_get_random(self, type, cur=None):
         """Randomly select one origin visit that was full and in the last 3
            months
 
         """
         cur = self._cursor(cur)
         columns = ",".join(self.origin_visit_select_cols)
         query = f"""select {columns}
                     from origin_visit ov
                     inner join origin o on ov.origin=o.id
                     inner join origin_visit_status ovs
                       on ov.origin = ovs.origin and ov.visit = ovs.visit
                     where ovs.status='full'
                       and ov.type=%s
                       and ov.date > now() - '3 months'::interval
                       and random() < 0.1
                     limit 1
                  """
         cur.execute(query, (type,))
         return cur.fetchone()
 
     @staticmethod
     def mangle_query_key(key, main_table):
         if key == "id":
             return "t.id"
         if key == "parents":
             return """
             ARRAY(
             SELECT rh.parent_id::bytea
             FROM revision_history rh
             WHERE rh.id = t.id
             ORDER BY rh.parent_rank
             )"""
         if "_" not in key:
             return "%s.%s" % (main_table, key)
 
         head, tail = key.split("_", 1)
         if head in ("author", "committer") and tail in (
             "name",
             "email",
             "id",
             "fullname",
         ):
             return "%s.%s" % (head, tail)
 
         return "%s.%s" % (main_table, key)
 
     def revision_get_from_list(self, revisions, cur=None):
         cur = self._cursor(cur)
 
         query_keys = ", ".join(
             self.mangle_query_key(k, "revision") for k in self.revision_get_cols
         )
 
         yield from execute_values_generator(
             cur,
             """
             SELECT %s FROM (VALUES %%s) as t(sortkey, id)
             LEFT JOIN revision ON t.id = revision.id
             LEFT JOIN person author ON revision.author = author.id
             LEFT JOIN person committer ON revision.committer = committer.id
             ORDER BY sortkey
             """
             % query_keys,
             ((sortkey, id) for sortkey, id in enumerate(revisions)),
         )
 
     def revision_log(self, root_revisions, limit=None, cur=None):
         cur = self._cursor(cur)
 
         query = """SELECT %s
                    FROM swh_revision_log(%%s, %%s)
                 """ % ", ".join(
             self.revision_get_cols
         )
 
         cur.execute(query, (root_revisions, limit))
         yield from cur
 
     revision_shortlog_cols = ["id", "parents"]
 
     def revision_shortlog(self, root_revisions, limit=None, cur=None):
         cur = self._cursor(cur)
 
         query = """SELECT %s
                    FROM swh_revision_list(%%s, %%s)
                 """ % ", ".join(
             self.revision_shortlog_cols
         )
 
         cur.execute(query, (root_revisions, limit))
         yield from cur
 
     def revision_get_random(self, cur=None):
         return self._get_random_row_from_table("revision", ["id"], "id", cur)
 
     def release_missing_from_list(self, releases, cur=None):
         cur = self._cursor(cur)
         yield from execute_values_generator(
             cur,
             """
             SELECT id FROM (VALUES %s) as t(id)
             WHERE NOT EXISTS (
                 SELECT 1 FROM release r WHERE r.id = t.id
             )
             """,
             ((id,) for id in releases),
         )
 
     object_find_by_sha1_git_cols = ["sha1_git", "type"]
 
     def object_find_by_sha1_git(self, ids, cur=None):
         cur = self._cursor(cur)
 
         yield from execute_values_generator(
             cur,
             """
             WITH t (sha1_git) AS (VALUES %s),
             known_objects as ((
                 select
                   id as sha1_git,
                   'release'::object_type as type,
                   object_id
                 from release r
                 where exists (select 1 from t where t.sha1_git = r.id)
             ) union all (
                 select
                   id as sha1_git,
                   'revision'::object_type as type,
                   object_id
                 from revision r
                 where exists (select 1 from t where t.sha1_git = r.id)
             ) union all (
                 select
                   id as sha1_git,
                   'directory'::object_type as type,
                   object_id
                 from directory d
                 where exists (select 1 from t where t.sha1_git = d.id)
             ) union all (
                 select
                   sha1_git as sha1_git,
                   'content'::object_type as type,
                   object_id
                 from content c
                 where exists (select 1 from t where t.sha1_git = c.sha1_git)
             ))
             select t.sha1_git as sha1_git, k.type
             from t
             left join known_objects k on t.sha1_git = k.sha1_git
             """,
             ((id,) for id in ids),
         )
 
     def stat_counters(self, cur=None):
         cur = self._cursor(cur)
         cur.execute("SELECT * FROM swh_stat_counters()")
         yield from cur
 
     def origin_add(self, url, cur=None):
         """Insert a new origin and return the new identifier."""
         insert = """INSERT INTO origin (url) values (%s)
                     RETURNING url"""
 
         cur.execute(insert, (url,))
         return cur.fetchone()[0]
 
     origin_cols = ["url"]
 
     def origin_get_by_url(self, origins, cur=None):
         """Retrieve origin `(type, url)` from urls if found."""
         cur = self._cursor(cur)
 
         query = """SELECT %s FROM (VALUES %%s) as t(url)
                    LEFT JOIN origin ON t.url = origin.url
                 """ % ",".join(
             "origin." + col for col in self.origin_cols
         )
 
         yield from execute_values_generator(cur, query, ((url,) for url in origins))
 
     def origin_get_by_sha1(self, sha1s, cur=None):
         """Retrieve origin urls from sha1s if found."""
         cur = self._cursor(cur)
 
         query = """SELECT %s FROM (VALUES %%s) as t(sha1)
                    LEFT JOIN origin ON t.sha1 = digest(origin.url, 'sha1')
                 """ % ",".join(
             "origin." + col for col in self.origin_cols
         )
 
         yield from execute_values_generator(cur, query, ((sha1,) for sha1 in sha1s))
 
     def origin_id_get_by_url(self, origins, cur=None):
         """Retrieve origin `(type, url)` from urls if found."""
         cur = self._cursor(cur)
 
         query = """SELECT id FROM (VALUES %s) as t(url)
                    LEFT JOIN origin ON t.url = origin.url
                 """
 
         for row in execute_values_generator(cur, query, ((url,) for url in origins)):
             yield row[0]
 
     origin_get_range_cols = ["id", "url"]
 
     def origin_get_range(self, origin_from=1, origin_count=100, cur=None):
         """Retrieve ``origin_count`` origins whose ids are greater
         or equal than ``origin_from``.
 
         Origins are sorted by id before retrieving them.
 
         Args:
             origin_from (int): the minimum id of origins to retrieve
             origin_count (int): the maximum number of origins to retrieve
         """
         cur = self._cursor(cur)
 
         query = """SELECT %s
                    FROM origin WHERE id >= %%s
                    ORDER BY id LIMIT %%s
                 """ % ",".join(
             self.origin_get_range_cols
         )
 
         cur.execute(query, (origin_from, origin_count))
         yield from cur
 
     def _origin_query(
         self,
         url_pattern,
         count=False,
         offset=0,
         limit=50,
         regexp=False,
         with_visit=False,
         cur=None,
     ):
         """
         Method factorizing query creation for searching and counting origins.
         """
         cur = self._cursor(cur)
 
         if count:
             origin_cols = "COUNT(*)"
         else:
             origin_cols = ",".join(self.origin_cols)
 
         query = """SELECT %s
                    FROM origin o
                    WHERE """
         if with_visit:
             query += """
                    EXISTS (
                      SELECT 1
                      FROM origin_visit ov
                      INNER JOIN origin_visit_status ovs
                        ON ov.origin = ovs.origin AND ov.visit = ovs.visit
                      INNER JOIN snapshot ON ovs.snapshot=snapshot.id
                      WHERE ov.origin=o.id
                      )
                    AND """
         query += "url %s %%s "
         if not count:
             query += "ORDER BY id OFFSET %%s LIMIT %%s"
 
         if not regexp:
             query = query % (origin_cols, "ILIKE")
             query_params = ("%" + url_pattern + "%", offset, limit)
         else:
             query = query % (origin_cols, "~*")
             query_params = (url_pattern, offset, limit)
 
         if count:
             query_params = (query_params[0],)
 
         cur.execute(query, query_params)
 
     def origin_search(
         self, url_pattern, offset=0, limit=50, regexp=False, with_visit=False, cur=None
     ):
         """Search for origins whose urls contain a provided string pattern
         or match a provided regular expression.
         The search is performed in a case insensitive way.
 
         Args:
             url_pattern (str): the string pattern to search for in origin urls
             offset (int): number of found origins to skip before returning
                 results
             limit (int): the maximum number of found origins to return
             regexp (bool): if True, consider the provided pattern as a regular
                 expression and returns origins whose urls match it
             with_visit (bool): if True, filter out origins with no visit
 
         """
         self._origin_query(
             url_pattern,
             offset=offset,
             limit=limit,
             regexp=regexp,
             with_visit=with_visit,
             cur=cur,
         )
         yield from cur
 
     def origin_count(self, url_pattern, regexp=False, with_visit=False, cur=None):
         """Count origins whose urls contain a provided string pattern
         or match a provided regular expression.
         The pattern search in origin urls is performed in a case insensitive
         way.
 
         Args:
             url_pattern (str): the string pattern to search for in origin urls
             regexp (bool): if True, consider the provided pattern as a regular
                 expression and returns origins whose urls match it
             with_visit (bool): if True, filter out origins with no visit
         """
         self._origin_query(
             url_pattern, count=True, regexp=regexp, with_visit=with_visit, cur=cur
         )
         return cur.fetchone()[0]
 
     release_add_cols = [
         "id",
         "target",
         "target_type",
         "date",
         "date_offset",
         "date_neg_utc_offset",
         "name",
         "comment",
         "synthetic",
         "author_fullname",
         "author_name",
         "author_email",
     ]
     release_get_cols = release_add_cols
 
     def release_get_from_list(self, releases, cur=None):
         cur = self._cursor(cur)
         query_keys = ", ".join(
             self.mangle_query_key(k, "release") for k in self.release_get_cols
         )
 
         yield from execute_values_generator(
             cur,
             """
             SELECT %s FROM (VALUES %%s) as t(sortkey, id)
             LEFT JOIN release ON t.id = release.id
             LEFT JOIN person author ON release.author = author.id
             ORDER BY sortkey
             """
             % query_keys,
             ((sortkey, id) for sortkey, id in enumerate(releases)),
         )
 
     def release_get_random(self, cur=None):
         return self._get_random_row_from_table("release", ["id"], "id", cur)
 
     _object_metadata_context_cols = [
         "origin",
         "visit",
         "snapshot",
         "release",
         "revision",
         "path",
         "directory",
     ]
     """The list of context columns for all artifact types."""
 
     _object_metadata_insert_cols = [
         "type",
         "id",
         "authority_id",
         "fetcher_id",
         "discovery_date",
         "format",
         "metadata",
         *_object_metadata_context_cols,
     ]
     """List of columns of the object_metadata table, used when writing
     metadata."""
 
     _object_metadata_insert_query = f"""
         INSERT INTO object_metadata
             ({', '.join(_object_metadata_insert_cols)})
         VALUES ({', '.join('%s' for _ in _object_metadata_insert_cols)})
         ON CONFLICT (id, authority_id, discovery_date, fetcher_id)
         DO NOTHING
     """
 
     object_metadata_get_cols = [
         "id",
         "discovery_date",
         "metadata_authority.type",
         "metadata_authority.url",
         "metadata_fetcher.id",
         "metadata_fetcher.name",
         "metadata_fetcher.version",
         *_object_metadata_context_cols,
         "format",
         "metadata",
     ]
     """List of columns of the object_metadata, metadata_authority,
     and metadata_fetcher tables, used when reading object metadata."""
 
     _object_metadata_select_query = f"""
         SELECT
             object_metadata.id AS id,
             {', '.join(object_metadata_get_cols[1:-1])},
             object_metadata.metadata AS metadata
         FROM object_metadata
         INNER JOIN metadata_authority
             ON (metadata_authority.id=authority_id)
         INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id)
         WHERE object_metadata.id=%s AND authority_id=%s
     """
 
     def object_metadata_add(
         self,
         object_type: str,
         id: str,
         context: Dict[str, Union[str, bytes, int]],
         discovery_date: datetime.datetime,
         authority_id: int,
         fetcher_id: int,
         format: str,
         metadata: bytes,
         cur,
     ):
         query = self._object_metadata_insert_query
         args: Dict[str, Any] = dict(
             type=object_type,
             id=id,
             authority_id=authority_id,
             fetcher_id=fetcher_id,
             discovery_date=discovery_date,
             format=format,
             metadata=metadata,
         )
         for col in self._object_metadata_context_cols:
             args[col] = context.get(col)
 
         params = [args[col] for col in self._object_metadata_insert_cols]
 
         cur.execute(query, params)
 
     def object_metadata_get(
         self,
         object_type: str,
         id: str,
         authority_id: int,
         after_time: Optional[datetime.datetime],
         after_fetcher: Optional[int],
         limit: int,
         cur,
     ):
         query_parts = [self._object_metadata_select_query]
         args = [id, authority_id]
 
         if after_fetcher is not None:
             assert after_time
             query_parts.append("AND (discovery_date, fetcher_id) > (%s, %s)")
             args.extend([after_time, after_fetcher])
         elif after_time is not None:
             query_parts.append("AND discovery_date > %s")
             args.append(after_time)
 
         query_parts.append("ORDER BY discovery_date, fetcher_id")
 
         if limit:
             query_parts.append("LIMIT %s")
             args.append(limit)
 
         cur.execute(" ".join(query_parts), args)
         yield from cur
 
     metadata_fetcher_cols = ["name", "version", "metadata"]
 
     def metadata_fetcher_add(
         self, name: str, version: str, metadata: bytes, cur=None
     ) -> None:
         cur = self._cursor(cur)
         cur.execute(
             "INSERT INTO metadata_fetcher (name, version, metadata) "
             "VALUES (%s, %s, %s) ON CONFLICT DO NOTHING",
             (name, version, jsonize(metadata)),
         )
 
     def metadata_fetcher_get(self, name: str, version: str, cur=None):
         cur = self._cursor(cur)
         cur.execute(
             f"SELECT {', '.join(self.metadata_fetcher_cols)} "
             f"FROM metadata_fetcher "
             f"WHERE name=%s AND version=%s",
             (name, version),
         )
         return cur.fetchone()
 
     def metadata_fetcher_get_id(
         self, name: str, version: str, cur=None
     ) -> Optional[int]:
         cur = self._cursor(cur)
         cur.execute(
             "SELECT id FROM metadata_fetcher WHERE name=%s AND version=%s",
             (name, version),
         )
         row = cur.fetchone()
         if row:
             return row[0]
         else:
             return None
 
     metadata_authority_cols = ["type", "url", "metadata"]
 
     def metadata_authority_add(
         self, type: str, url: str, metadata: bytes, cur=None
     ) -> None:
         cur = self._cursor(cur)
         cur.execute(
             "INSERT INTO metadata_authority (type, url, metadata) "
             "VALUES (%s, %s, %s) ON CONFLICT DO NOTHING",
             (type, url, jsonize(metadata)),
         )
 
     def metadata_authority_get(self, type: str, url: str, cur=None):
         cur = self._cursor(cur)
         cur.execute(
             f"SELECT {', '.join(self.metadata_authority_cols)} "
             f"FROM metadata_authority "
             f"WHERE type=%s AND url=%s",
             (type, url),
         )
         return cur.fetchone()
 
     def metadata_authority_get_id(self, type: str, url: str, cur=None) -> Optional[int]:
         cur = self._cursor(cur)
         cur.execute(
             "SELECT id FROM metadata_authority WHERE type=%s AND url=%s", (type, url)
         )
         row = cur.fetchone()
         if row:
             return row[0]
         else:
             return None
 
     def _get_random_row_from_table(self, table_name, cols, id_col, cur=None):
         random_sha1 = bytes(random.randint(0, 255) for _ in range(SHA1_SIZE))
         cur = self._cursor(cur)
         query = """
             (SELECT {cols} FROM {table} WHERE {id_col} >= %s
              ORDER BY {id_col} LIMIT 1)
             UNION
             (SELECT {cols} FROM {table} WHERE {id_col} < %s
              ORDER BY {id_col} DESC LIMIT 1)
             LIMIT 1
             """.format(
             cols=", ".join(cols), table=table_name, id_col=id_col
         )
         cur.execute(query, (random_sha1, random_sha1))
         row = cur.fetchone()
         if row:
             return row[0]