diff --git a/swh/storage/backfill.py b/swh/storage/backfill.py
index 3df10b2c..79b14321 100644
--- a/swh/storage/backfill.py
+++ b/swh/storage/backfill.py
@@ -1,646 +1,649 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Storage backfiller.
 
 The backfiller goal is to produce back part or all of the objects
 from a storage to the journal topics
 
 Current implementation consists in the JournalBackfiller class.
 
 It simply reads the objects from the storage and sends every object identifier back to
 the journal.
 
 """
 
 import logging
 from typing import Any, Callable, Dict, Optional
 
 from swh.core.db import BaseDb
 from swh.model.identifiers import ExtendedObjectType
 from swh.model.model import (
     BaseModel,
     Directory,
     DirectoryEntry,
     ExtID,
     RawExtrinsicMetadata,
     Release,
     Revision,
     Snapshot,
     SnapshotBranch,
     TargetType,
 )
 from swh.storage.postgresql.converters import (
     db_to_extid,
     db_to_raw_extrinsic_metadata,
     db_to_release,
     db_to_revision,
 )
 from swh.storage.replay import object_converter_fn
 from swh.storage.writer import JournalWriter
 
 logger = logging.getLogger(__name__)
 
 PARTITION_KEY = {
     "content": "sha1",
     "skipped_content": "sha1",
     "directory": "id",
     "extid": "target",
     "metadata_authority": "type, url",
     "metadata_fetcher": "name, version",
     "raw_extrinsic_metadata": "target",
     "revision": "revision.id",
     "release": "release.id",
     "snapshot": "id",
     "origin": "id",
     "origin_visit": "origin_visit.origin",
     "origin_visit_status": "origin_visit_status.origin",
 }
 
 COLUMNS = {
     "content": [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "status",
         "ctime",
     ],
     "skipped_content": [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "ctime",
         "status",
         "reason",
     ],
     "directory": ["id", "dir_entries", "file_entries", "rev_entries"],
     "extid": ["extid_type", "extid", "target_type", "target"],
     "metadata_authority": ["type", "url"],
     "metadata_fetcher": ["name", "version"],
     "origin": ["url"],
     "origin_visit": ["visit", "type", ("origin.url", "origin"), "date",],
     "origin_visit_status": [
         ("origin_visit_status.visit", "visit"),
         ("origin.url", "origin"),
         ("origin_visit_status.date", "date"),
         "type",
         "snapshot",
         "status",
         "metadata",
     ],
     "raw_extrinsic_metadata": [
         "raw_extrinsic_metadata.type",
         "raw_extrinsic_metadata.target",
         "metadata_authority.type",
         "metadata_authority.url",
         "metadata_fetcher.name",
         "metadata_fetcher.version",
         "discovery_date",
         "format",
         "raw_extrinsic_metadata.metadata",
         "origin",
         "visit",
         "snapshot",
         "release",
         "revision",
         "path",
         "directory",
     ],
     "revision": [
         ("revision.id", "id"),
         "date",
         "date_offset",
         "date_neg_utc_offset",
         "committer_date",
         "committer_date_offset",
         "committer_date_neg_utc_offset",
         "type",
         "directory",
         "message",
         "synthetic",
         "metadata",
         "extra_headers",
         (
             "array(select parent_id::bytea from revision_history rh "
             "where rh.id = revision.id order by rh.parent_rank asc)",
             "parents",
         ),
         ("a.id", "author_id"),
         ("a.name", "author_name"),
         ("a.email", "author_email"),
         ("a.fullname", "author_fullname"),
         ("c.id", "committer_id"),
         ("c.name", "committer_name"),
         ("c.email", "committer_email"),
         ("c.fullname", "committer_fullname"),
     ],
     "release": [
         ("release.id", "id"),
         "date",
         "date_offset",
         "date_neg_utc_offset",
         "comment",
         ("release.name", "name"),
         "synthetic",
         "target",
         "target_type",
         ("a.id", "author_id"),
         ("a.name", "author_name"),
         ("a.email", "author_email"),
         ("a.fullname", "author_fullname"),
     ],
     "snapshot": ["id", "object_id"],
 }
 
 
 JOINS = {
     "release": ["person a on release.author=a.id"],
     "revision": [
         "person a on revision.author=a.id",
         "person c on revision.committer=c.id",
     ],
     "origin_visit": ["origin on origin_visit.origin=origin.id"],
     "origin_visit_status": ["origin on origin_visit_status.origin=origin.id",],
     "raw_extrinsic_metadata": [
         "metadata_authority on "
         "raw_extrinsic_metadata.authority_id=metadata_authority.id",
         "metadata_fetcher on raw_extrinsic_metadata.fetcher_id=metadata_fetcher.id",
     ],
 }
 
 
 def directory_converter(db: BaseDb, directory_d: Dict[str, Any]) -> Directory:
     """Convert directory from the flat representation to swh model
        compatible objects.
 
     """
     columns = ["target", "name", "perms"]
     query_template = """
     select %(columns)s
     from directory_entry_%(type)s
     where id in %%s
     """
 
     types = ["file", "dir", "rev"]
 
     entries = []
     with db.cursor() as cur:
         for type in types:
             ids = directory_d.pop("%s_entries" % type)
             if not ids:
                 continue
             query = query_template % {
                 "columns": ",".join(columns),
                 "type": type,
             }
             cur.execute(query, (tuple(ids),))
             for row in cur:
                 entry_d = dict(zip(columns, row))
                 entry = DirectoryEntry(
                     name=entry_d["name"],
                     type=type,
                     target=entry_d["target"],
                     perms=entry_d["perms"],
                 )
                 entries.append(entry)
 
     return Directory(id=directory_d["id"], entries=tuple(entries),)
 
 
 def raw_extrinsic_metadata_converter(
     db: BaseDb, metadata: Dict[str, Any]
 ) -> RawExtrinsicMetadata:
     """Convert a raw extrinsic metadata from the flat representation to swh model
        compatible objects.
 
     """
     return db_to_raw_extrinsic_metadata(metadata)
 
 
 def extid_converter(db: BaseDb, extid: Dict[str, Any]) -> ExtID:
     """Convert an extid from the flat representation to swh model
        compatible objects.
 
     """
     return db_to_extid(extid)
 
 
 def revision_converter(db: BaseDb, revision_d: Dict[str, Any]) -> Revision:
     """Convert revision from the flat representation to swh model
        compatible objects.
 
     """
     revision = db_to_revision(revision_d)
     assert revision is not None, revision_d["id"]
     return revision
 
 
 def release_converter(db: BaseDb, release_d: Dict[str, Any]) -> Release:
     """Convert release from the flat representation to swh model
        compatible objects.
 
     """
     release = db_to_release(release_d)
     assert release is not None, release_d["id"]
     return release
 
 
 def snapshot_converter(db: BaseDb, snapshot_d: Dict[str, Any]) -> Snapshot:
     """Convert snapshot from the flat representation to swh model
        compatible objects.
 
     """
     columns = ["name", "target", "target_type"]
     query = """
     select %s
     from snapshot_branches sbs
     inner join snapshot_branch sb on sb.object_id=sbs.branch_id
     where sbs.snapshot_id=%%s
     """ % ", ".join(
         columns
     )
     with db.cursor() as cur:
         cur.execute(query, (snapshot_d["object_id"],))
         branches = {}
         for name, *row in cur:
             branch_d = dict(zip(columns[1:], row))
             if branch_d["target"] is not None and branch_d["target_type"] is not None:
                 branch: Optional[SnapshotBranch] = SnapshotBranch(
                     target=branch_d["target"],
                     target_type=TargetType(branch_d["target_type"]),
                 )
             else:
                 branch = None
             branches[name] = branch
 
     return Snapshot(id=snapshot_d["id"], branches=branches,)
 
 
 CONVERTERS: Dict[str, Callable[[BaseDb, Dict[str, Any]], BaseModel]] = {
     "directory": directory_converter,
     "extid": extid_converter,
     "raw_extrinsic_metadata": raw_extrinsic_metadata_converter,
     "revision": revision_converter,
     "release": release_converter,
     "snapshot": snapshot_converter,
 }
 
 
 def object_to_offset(object_id, numbits):
     """Compute the index of the range containing object id, when dividing
        space into 2^numbits.
 
     Args:
         object_id (str): The hex representation of object_id
         numbits (int): Number of bits in which we divide input space
 
     Returns:
         The index of the range containing object id
 
     """
     q, r = divmod(numbits, 8)
     length = q + (r != 0)
     shift_bits = 8 - r if r else 0
 
     truncated_id = object_id[: length * 2]
     if len(truncated_id) < length * 2:
         truncated_id += "0" * (length * 2 - len(truncated_id))
 
     truncated_id_bytes = bytes.fromhex(truncated_id)
     return int.from_bytes(truncated_id_bytes, byteorder="big") >> shift_bits
 
 
 def byte_ranges(numbits, start_object=None, end_object=None):
     """Generate start/end pairs of bytes spanning numbits bits and
        constrained by optional start_object and end_object.
 
     Args:
         numbits (int): Number of bits in which we divide input space
         start_object (str): Hex object id contained in the first range
                             returned
         end_object (str): Hex object id contained in the last range
                           returned
 
     Yields:
         2^numbits pairs of bytes
 
     """
     q, r = divmod(numbits, 8)
     length = q + (r != 0)
     shift_bits = 8 - r if r else 0
 
     def to_bytes(i):
         return int.to_bytes(i << shift_bits, length=length, byteorder="big")
 
     start_offset = 0
     end_offset = 1 << numbits
 
     if start_object is not None:
         start_offset = object_to_offset(start_object, numbits)
     if end_object is not None:
         end_offset = object_to_offset(end_object, numbits) + 1
 
     for start in range(start_offset, end_offset):
         end = start + 1
 
         if start == 0:
             yield None, to_bytes(end)
         elif end == 1 << numbits:
             yield to_bytes(start), None
         else:
             yield to_bytes(start), to_bytes(end)
 
 
 def raw_extrinsic_metadata_target_ranges(start_object=None, end_object=None):
     """Generate ranges of values for the `target` attribute of `raw_extrinsic_metadata`
     objects.
 
     This generates one range for all values before the first SWHID (which would
     correspond to raw origin URLs), then a number of hex-based ranges for each
     known type of SWHID (2**12 ranges for directories, 2**8 ranges for all other
     types). Finally, it generates one extra range for values above all possible
     SWHIDs.
     """
     if start_object is None:
         start_object = ""
 
     swhid_target_types = sorted(type.value for type in ExtendedObjectType)
 
     first_swhid = f"swh:1:{swhid_target_types[0]}:"
 
     # Generate a range for url targets, if the starting object is before SWHIDs
     if start_object < first_swhid:
         yield start_object, (
             first_swhid
             if end_object is None or end_object >= first_swhid
             else end_object
         )
 
     if end_object is not None and end_object <= first_swhid:
         return
 
     # Prime the following loop, which uses the upper bound of the previous range
     # as lower bound, to account for potential targets between two valid types
     # of SWHIDs (even though they would eventually be rejected by the
     # RawExtrinsicMetadata parser, they /might/ exist...)
     end_swhid = first_swhid
 
     # Generate ranges for swhid targets
     for target_type in swhid_target_types:
         finished = False
         base_swhid = f"swh:1:{target_type}:"
         last_swhid = base_swhid + ("f" * 40)
 
         if start_object > last_swhid:
             continue
 
         # Generate 2**8 or 2**12 ranges
         for _, end in byte_ranges(12 if target_type == "dir" else 8):
             # Reuse previous uppper bound
             start_swhid = end_swhid
 
             # Use last_swhid for this object type if on the last byte range
             end_swhid = (base_swhid + end.hex()) if end is not None else last_swhid
 
             # Ignore out of bounds ranges
             if start_object >= end_swhid:
                 continue
 
             # Potentially clamp start of range to the first object requested
             start_swhid = max(start_swhid, start_object)
 
             # Handle ending the loop early if the last requested object id is in
             # the current range
             if end_object is not None and end_swhid >= end_object:
                 end_swhid = end_object
                 finished = True
 
             yield start_swhid, end_swhid
 
             if finished:
                 return
 
     # Generate one final range for potential raw origin URLs after the last
     # valid SWHID
     start_swhid = max(start_object, end_swhid)
     yield start_swhid, end_object
 
 
 def integer_ranges(start, end, block_size=1000):
     for start in range(start, end, block_size):
         if start == 0:
             yield None, block_size
         elif start + block_size > end:
             yield start, end
         else:
             yield start, start + block_size
 
 
 RANGE_GENERATORS = {
     "content": lambda start, end: byte_ranges(24, start, end),
     "skipped_content": lambda start, end: [(None, None)],
     "directory": lambda start, end: byte_ranges(24, start, end),
     "extid": lambda start, end: byte_ranges(24, start, end),
     "revision": lambda start, end: byte_ranges(24, start, end),
     "release": lambda start, end: byte_ranges(16, start, end),
     "raw_extrinsic_metadata": raw_extrinsic_metadata_target_ranges,
     "snapshot": lambda start, end: byte_ranges(16, start, end),
     "origin": integer_ranges,
     "origin_visit": integer_ranges,
     "origin_visit_status": integer_ranges,
 }
 
 
 def compute_query(obj_type, start, end):
     columns = COLUMNS.get(obj_type)
     join_specs = JOINS.get(obj_type, [])
     join_clause = "\n".join("left join %s" % clause for clause in join_specs)
 
     where = []
     where_args = []
     if start:
         where.append("%(keys)s >= %%s")
         where_args.append(start)
     if end:
         where.append("%(keys)s < %%s")
         where_args.append(end)
 
     where_clause = ""
     if where:
         where_clause = ("where " + " and ".join(where)) % {
             "keys": "(%s)" % PARTITION_KEY[obj_type]
         }
 
     column_specs = []
     column_aliases = []
     for column in columns:
         if isinstance(column, str):
             column_specs.append(column)
             column_aliases.append(column)
         else:
             column_specs.append("%s as %s" % column)
             column_aliases.append(column[1])
 
     query = """
 select %(columns)s
 from %(table)s
 %(join)s
 %(where)s
     """ % {
         "columns": ",".join(column_specs),
         "table": obj_type,
         "join": join_clause,
         "where": where_clause,
     }
 
     return query, where_args, column_aliases
 
 
 def fetch(db, obj_type, start, end):
     """Fetch all obj_type's identifiers from db.
 
     This opens one connection, stream objects and when done, close
     the connection.
 
     Args:
         db (BaseDb): Db connection object
         obj_type (str): Object type
         start (Union[bytes|Tuple]): Range start identifier
         end (Union[bytes|Tuple]): Range end identifier
 
     Raises:
         ValueError if obj_type is not supported
 
     Yields:
         Objects in the given range
 
     """
     query, where_args, column_aliases = compute_query(obj_type, start, end)
     converter = CONVERTERS.get(obj_type)
     with db.cursor() as cursor:
         logger.debug("Fetching data for table %s", obj_type)
         logger.debug("query: %s %s", query, where_args)
         cursor.execute(query, where_args)
         for row in cursor:
             record = dict(zip(column_aliases, row))
             if converter:
                 record = converter(db, record)
             else:
                 record = object_converter_fn[obj_type](record)
 
             logger.debug("record: %s", record)
             yield record
 
 
 def _format_range_bound(bound):
     if isinstance(bound, bytes):
         return bound.hex()
     else:
         return str(bound)
 
 
 MANDATORY_KEYS = ["storage", "journal_writer"]
 
 
 class JournalBackfiller:
     """Class in charge of reading the storage's objects and sends those
        back to the journal's topics.
 
        This is designed to be run periodically.
 
     """
 
     def __init__(self, config=None):
         self.config = config
         self.check_config(config)
 
     def check_config(self, config):
         missing_keys = []
         for key in MANDATORY_KEYS:
             if not config.get(key):
                 missing_keys.append(key)
 
         if missing_keys:
             raise ValueError(
                 "Configuration error: The following keys must be"
                 " provided: %s" % (",".join(missing_keys),)
             )
 
-        if "cls" not in config["storage"] or config["storage"]["cls"] != "local":
+        if "cls" not in config["storage"] or config["storage"]["cls"] not in (
+            "local",
+            "postgresql",
+        ):
             raise ValueError(
                 "swh storage backfiller must be configured to use a local"
                 " (PostgreSQL) storage"
             )
 
     def parse_arguments(self, object_type, start_object, end_object):
         """Parse arguments
 
         Raises:
             ValueError for unsupported object type
             ValueError if object ids are not parseable
 
         Returns:
             Parsed start and end object ids
 
         """
         if object_type not in COLUMNS:
             raise ValueError(
                 "Object type %s is not supported. "
                 "The only possible values are %s"
                 % (object_type, ", ".join(sorted(COLUMNS.keys())))
             )
 
         if object_type in ["origin", "origin_visit", "origin_visit_status"]:
             if start_object:
                 start_object = int(start_object)
             else:
                 start_object = 0
             if end_object:
                 end_object = int(end_object)
             else:
                 end_object = 100 * 1000 * 1000  # hard-coded limit
 
         return start_object, end_object
 
     def run(self, object_type, start_object, end_object, dry_run=False):
         """Reads storage's subscribed object types and send them to the
            journal's reading topic.
 
         """
         start_object, end_object = self.parse_arguments(
             object_type, start_object, end_object
         )
 
         db = BaseDb.connect(self.config["storage"]["db"])
         writer = JournalWriter({"cls": "kafka", **self.config["journal_writer"]})
         assert writer.journal is not None
 
         for range_start, range_end in RANGE_GENERATORS[object_type](
             start_object, end_object
         ):
             logger.info(
                 "Processing %s range %s to %s",
                 object_type,
                 _format_range_bound(range_start),
                 _format_range_bound(range_end),
             )
 
             objects = fetch(db, object_type, start=range_start, end=range_end)
 
             if not dry_run:
                 writer.write_additions(object_type, objects)
             else:
                 # only consume the objects iterator to check for any potential
                 # decoding/encoding errors
                 for obj in objects:
                     pass
 
 
 if __name__ == "__main__":
     print('Please use the "swh-journal backfiller run" command')
diff --git a/swh/storage/migrate_extrinsic_metadata.py b/swh/storage/migrate_extrinsic_metadata.py
index 171e3b8c..d390c147 100644
--- a/swh/storage/migrate_extrinsic_metadata.py
+++ b/swh/storage/migrate_extrinsic_metadata.py
@@ -1,1208 +1,1208 @@
 #!/usr/bin/env python3
 
-# Copyright (C) 2020  The Software Heritage developers
+# Copyright (C) 2020-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """This is an executable script to migrate extrinsic revision metadata from
 the revision table to the new extrinsic metadata storage.
 
 This is designed to be as conservative as possible, following this principle:
 for each revision the script reads (in "handle_row"), it will read some of the
 fields, write them directly to the metadata storage, and remove them.
 Then it checks all the remaining fields are in a hardcoded list of fields that
 are known not to require migration.
 
 This means that every field that isn't migrated was explicitly reviewed while
 writing this script.
 
 Additionally, this script contains many assertions to prevent false positives
 in its heuristics.
 """
 
 import datetime
 import hashlib
 import json
 import os
 import re
 import sys
 import time
 from typing import Any, Dict, Optional
 from urllib.error import HTTPError
 from urllib.parse import unquote, urlparse
 from urllib.request import urlopen
 
 import iso8601
 import psycopg2
 
 from swh.core.db import BaseDb
 from swh.model.hashutil import hash_to_hex
 from swh.model.identifiers import (
     CoreSWHID,
     ExtendedObjectType,
     ExtendedSWHID,
     ObjectType,
     QualifiedSWHID,
 )
 from swh.model.model import (
     MetadataAuthority,
     MetadataAuthorityType,
     MetadataFetcher,
     RawExtrinsicMetadata,
     Sha1Git,
 )
 from swh.storage import get_storage
 from swh.storage.algos.origin import iter_origin_visit_statuses, iter_origin_visits
 from swh.storage.algos.snapshot import snapshot_get_all_branches
 
 # XML namespaces and fields for metadata coming from the deposit:
 
 CODEMETA_NS = "https://doi.org/10.5063/SCHEMA/CODEMETA-2.0"
 ATOM_NS = "http://www.w3.org/2005/Atom"
 ATOM_KEYS = ["id", "author", "external_identifier", "title"]
 
 # columns of the revision table (of the storage DB)
 REVISION_COLS = [
     "id",
     "directory",
     "date",
     "committer_date",
     "type",
     "message",
     "metadata",
 ]
 
 # columns of the tables of the deposit DB
 DEPOSIT_COLS = [
     "deposit.id",
     "deposit.external_id",
     "deposit.swhid_context",
     "deposit.status",
     "deposit_request.metadata",
     "deposit_request.date",
     "deposit_client.provider_url",
     "deposit_collection.name",
     "auth_user.username",
 ]
 
 # Formats we write to the extrinsic metadata storage
 OLD_DEPOSIT_FORMAT = (
     "sword-v2-atom-codemeta-v2-in-json-with-expanded-namespaces"  # before february 2018
 )
 NEW_DEPOSIT_FORMAT = "sword-v2-atom-codemeta-v2-in-json"  # after february 2018
 GNU_FORMAT = "gnu-tree-json"
 NIXGUIX_FORMAT = "nixguix-sources-json"
 NPM_FORMAT = "replicate-npm-package-json"
 ORIGINAL_ARTIFACT_FORMAT = "original-artifacts-json"
 PYPI_FORMAT = "pypi-project-json"
 
 # Information about this script, for traceability
 FETCHER = MetadataFetcher(
     name="migrate-extrinsic-metadata-from-revisions", version="0.0.1",
 )
 
 # Authorities that we got the metadata from
 AUTHORITIES = {
     "npmjs": MetadataAuthority(
         type=MetadataAuthorityType.FORGE, url="https://npmjs.com/", metadata={}
     ),
     "pypi": MetadataAuthority(
         type=MetadataAuthorityType.FORGE, url="https://pypi.org/", metadata={}
     ),
     "gnu": MetadataAuthority(
         type=MetadataAuthorityType.FORGE, url="https://ftp.gnu.org/", metadata={}
     ),
     "swh": MetadataAuthority(
         type=MetadataAuthorityType.REGISTRY,
         url="https://softwareheritage.org/",
         metadata={},
     ),  # for original_artifact (which are checksums computed by SWH)
 }
 
 # Regular expression for the format of revision messages written by the
 # deposit loader
 deposit_revision_message_re = re.compile(
     b"(?P<client>[a-z-]*): "
     b"Deposit (?P<deposit_id>[0-9]+) in collection (?P<collection>[a-z-]+).*"
 )
 
 
 # not reliable, because PyPI allows arbitrary names
 def pypi_project_from_filename(filename):
     original_filename = filename
     if filename.endswith(".egg"):
         return None
     elif filename == "mongomotor-0.13.0.n.tar.gz":
         return "mongomotor"
     elif re.match(r"datahaven-rev[0-9]+\.tar\.gz", filename):
         return "datahaven"
     elif re.match(r"Dtls-[0-9]\.[0-9]\.[0-9]\.sdist_with_openssl\..*", filename):
         return "Dtls"
     elif re.match(r"(gae)?pytz-20[0-9][0-9][a-z]\.(tar\.gz|zip)", filename):
         return filename.split("-", 1)[0]
     elif filename.startswith(("powny-", "obedient.powny-",)):
         return filename.split("-")[0]
     elif filename.startswith("devpi-theme-16-"):
         return "devpi-theme-16"
     elif re.match("[^-]+-[0-9]+.tar.gz", filename):
         return filename.split("-")[0]
     elif filename == "ohai-1!0.tar.gz":
         return "ohai"
     elif filename == "collective.topicitemsevent-0.1dvl.tar.gz":
         return "collective.topicitemsevent"
     elif filename.startswith(
         ("SpiNNStorageHandlers-1!", "sPyNNakerExternalDevicesPlugin-1!")
     ):
         return filename.split("-")[0]
     elif filename.startswith("limnoria-201"):
         return "limnoria"
     elif filename.startswith("pytz-20"):
         return "pytz"
     elif filename.startswith("youtube_dl_server-alpha."):
         return "youtube_dl_server"
     elif filename == "json-extensions-b76bc7d.tar.gz":
         return "json-extensions"
     elif filename == "LitReview-0.6989ev.tar.gz":
         # typo of "dev"
         return "LitReview"
     elif filename.startswith("django_options-r"):
         return "django_options"
     elif filename == "Greater than, equal, or less Library-0.1.tar.gz":
         return "Greater-than-equal-or-less-Library"
     elif filename.startswith("upstart--main-"):
         return "upstart"
     elif filename == "duckduckpy0.1.tar.gz":
         return "duckduckpy"
     elif filename == "QUI for MPlayer snapshot_9-14-2011.zip":
         return "QUI-for-MPlayer"
     elif filename == "Eddy's Memory Game-1.0.zip":
         return "Eddy-s-Memory-Game"
     elif filename == "jekyll2nikola-0-0-1.tar.gz":
         return "jekyll2nikola"
     elif filename.startswith("ore.workflowed"):
         return "ore.workflowed"
     elif re.match("instancemanager-[0-9]*", filename):
         return "instancemanager"
     elif filename == "OrzMC_W&L-1.0.0.tar.gz":
         return "OrzMC-W-L"
     elif filename == "use0mk.tar.gz":
         return "use0mk"
     elif filename == "play-0-develop-1-gd67cd85.tar.gz":
         return "play"
     filename = filename.replace(" ", "-")
 
     match = re.match(
         r"^(?P<project_name>[a-z_.-]+)"  # project name
         r"\.(tar\.gz|tar\.bz2|tgz|zip)$",  # extension
         filename,
         re.I,
     )
     if match:
         return match.group("project_name")
 
     # First try with a rather strict format, but that allows accidentally
     # matching the version as part of the package name
     match = re.match(
         r"^(?P<project_name>[a-z0-9_.]+?([-_][a-z][a-z0-9.]+?)*?)"  # project name
         r"-v?"
         r"([0-9]+!)?"  # epoch
         r"[0-9_.]+([a-z]+[0-9]+)?"  # "main" version
         r"([.-]?(alpha|beta|dev|post|pre|rc)(\.?[0-9]+)?)*"  # development status
         r"([.-]?20[012][0-9]{5,9})?"  # date
         r"([.-]g?[0-9a-f]+)?"  # git commit
         r"([-+]py(thon)?(3k|[23](\.?[0-9]{1,2})?))?"  # python version
         r"\.(tar\.gz|tar\.bz2|tgz|zip)$",  # extension
         filename,
         re.I,
     )
     if match:
         return match.group("project_name")
 
     # If that doesn't work, give up on trying to parse version suffixes,
     # and just find the first version-like occurrence in the file name
 
     match = re.match(
         r"^(?P<project_name>[a-z0-9_.-]+?)"  # project name
         r"[-_.]v?"
         r"([0-9]+!)?"  # epoch
         r"("  # "main" version
         r"[0-9_]+\.[0-9_.]+([a-z]+[0-9]+)?"  # classic version number
         r"|20[012][0-9]{5,9}"  # date as integer
         r"|20[012][0-9]-[01][0-9]-[0-3][0-9]"  # date as ISO 8601
         r")"  # end of "main" version
         r"[a-z]?(dev|pre)?"  # direct version suffix
         r"([._-].*)?"  # extra suffixes
         r"\.(tar\.gz|tar\.bz2|tgz|zip)$",  # extension
         filename,
         re.I,
     )
     if match:
         return match.group("project_name")
 
     # If that still doesn't work, give one last chance if there's only one
     # dash or underscore in the name
 
     match = re.match(
         r"^(?P<project_name>[^_-]+)"  # project name
         r"[_-][^_-]+"  # version
         r"\.(tar\.gz|tar\.bz2|tgz|zip)$",  # extension
         filename,
     )
     assert match, original_filename
     return match.group("project_name")
 
 
 def pypi_origin_from_project_name(project_name: str) -> str:
     return f"https://pypi.org/project/{project_name}/"
 
 
 def pypi_origin_from_filename(storage, rev_id: bytes, filename: str) -> Optional[str]:
     project_name = pypi_project_from_filename(filename)
     origin = pypi_origin_from_project_name(project_name)
     # But unfortunately, the filename is user-provided, and doesn't
     # necessarily match the package name on pypi. Therefore, we need
     # to check it.
     if _check_revision_in_origin(storage, origin, rev_id):
         return origin
 
     # if the origin we guessed does not exist, query the PyPI API with the
     # project name we guessed. If only the capitalisation and dash/underscores
     # are wrong (by far the most common case), PyPI kindly corrects them.
     try:
         resp = urlopen(f"https://pypi.org/pypi/{project_name}/json/")
     except HTTPError as e:
         assert e.code == 404
         # nope; PyPI couldn't correct the wrong project name
         return None
     assert resp.code == 200, resp.code
     project_name = json.load(resp)["info"]["name"]
     origin = pypi_origin_from_project_name(project_name)
 
     if _check_revision_in_origin(storage, origin, rev_id):
         return origin
     else:
         # The origin exists, but the revision does not belong in it.
         # This happens sometimes, as the filename we guessed the origin
         # from is user-provided.
         return None
 
 
 def cran_package_from_url(filename):
     match = re.match(
         r"^https://cran\.r-project\.org/src/contrib/"
         r"(?P<package_name>[a-zA-Z0-9.]+)_[0-9.-]+(\.tar\.gz)?$",
         filename,
     )
     assert match, filename
     return match.group("package_name")
 
 
 def npm_package_from_source_url(package_source_url):
     match = re.match(
         "^https://registry.npmjs.org/(?P<package_name>.*)/-/[^/]+.tgz$",
         package_source_url,
     )
     assert match, package_source_url
     return unquote(match.group("package_name"))
 
 
 def remove_atom_codemeta_metadata_with_xmlns(metadata):
     """Removes all known Atom and Codemeta metadata fields from the dict,
     assuming this is a dict generated by xmltodict without expanding namespaces.
     """
     keys_to_remove = ATOM_KEYS + ["@xmlns", "@xmlns:codemeta"]
     for key in list(metadata):
         if key.startswith("codemeta:") or key in keys_to_remove:
             del metadata[key]
 
 
 def remove_atom_codemeta_metadata_without_xmlns(metadata):
     """Removes all known Atom and Codemeta metadata fields from the dict,
     assuming this is a dict generated by xmltodict with expanded namespaces.
     """
     for key in list(metadata):
         if key.startswith(("{%s}" % ATOM_NS, "{%s}" % CODEMETA_NS)):
             del metadata[key]
 
 
 def _check_revision_in_origin(storage, origin, revision_id):
     seen_snapshots = set()  # no need to visit them again
     seen_revisions = set()
 
     for visit in iter_origin_visits(storage, origin):
         for status in iter_origin_visit_statuses(storage, origin, visit.visit):
             if status.snapshot is None:
                 continue
             if status.snapshot in seen_snapshots:
                 continue
             seen_snapshots.add(status.snapshot)
             snapshot = snapshot_get_all_branches(storage, status.snapshot)
             for (branch_name, branch) in snapshot.branches.items():
                 if branch is None:
                     continue
 
                 # If it's the revision passed as argument, then it is indeed in the
                 # origin
                 if branch.target == revision_id:
                     return True
 
                 # Else, let's make sure the branch doesn't have any other revision
 
                 # Get the revision at the top of the branch.
                 if branch.target in seen_revisions:
                     continue
                 seen_revisions.add(branch.target)
                 revision = storage.revision_get([branch.target])[0]
 
                 if revision is None:
                     # https://forge.softwareheritage.org/T997
                     continue
 
                 # Check it doesn't have parents (else we would have to
                 # recurse)
                 assert revision.parents == (), "revision with parents"
 
     return False
 
 
 def debian_origins_from_row(row, storage):
     """Guesses a Debian origin from a row. May return an empty list if it
     cannot reliably guess it, but all results are guaranteed to be correct."""
     filenames = [entry["filename"] for entry in row["metadata"]["original_artifact"]]
     package_names = {filename.split("_")[0] for filename in filenames}
     assert len(package_names) == 1, package_names
     (package_name,) = package_names
 
     candidate_origins = [
         f"deb://Debian/packages/{package_name}",
         f"deb://Debian-Security/packages/{package_name}",
         f"http://snapshot.debian.org/package/{package_name}/",
     ]
 
     return [
         origin
         for origin in candidate_origins
         if _check_revision_in_origin(storage, origin, row["id"])
     ]
 
 
 # Cache of origins that are known to exist
 _origins = set()
 
 
 def assert_origin_exists(storage, origin):
     assert check_origin_exists(storage, origin), origin
 
 
 def check_origin_exists(storage, origin):
     return (
         (
             hashlib.sha1(origin.encode()).digest() in _origins  # very fast
             or storage.origin_get([origin])[0] is not None  # slow, but up to date
         ),
         origin,
     )
 
 
 def load_metadata(
     storage,
     revision_id,
     directory_id,
     discovery_date: datetime.datetime,
     metadata: Dict[str, Any],
     format: str,
     authority: MetadataAuthority,
     origin: Optional[str],
     dry_run: bool,
 ):
     """Does the actual loading to swh-storage."""
     directory_swhid = ExtendedSWHID(
         object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id
     )
     revision_swhid = CoreSWHID(object_type=ObjectType.REVISION, object_id=revision_id)
     obj = RawExtrinsicMetadata(
         target=directory_swhid,
         discovery_date=discovery_date,
         authority=authority,
         fetcher=FETCHER,
         format=format,
         metadata=json.dumps(metadata).encode(),
         origin=origin,
         revision=revision_swhid,
     )
     if not dry_run:
         storage.raw_extrinsic_metadata_add([obj])
 
 
 def handle_deposit_row(
     row,
     discovery_date: Optional[datetime.datetime],
     origin,
     storage,
     deposit_cur,
     dry_run: bool,
 ):
     """Loads metadata from the deposit database (which is more reliable as the
     metadata on the revision object, as some versions of the deposit loader were
     a bit lossy; and they used very different format for the field in the
     revision table).
     """
     parsed_message = deposit_revision_message_re.match(row["message"])
     assert parsed_message is not None, row["message"]
 
     deposit_id = int(parsed_message.group("deposit_id"))
     collection = parsed_message.group("collection").decode()
     client_name = parsed_message.group("client").decode()
 
     deposit_cur.execute(
         f"SELECT {', '.join(DEPOSIT_COLS)} FROM deposit "
         f"INNER JOIN deposit_collection "
         f" ON (deposit.collection_id=deposit_collection.id) "
         f"INNER JOIN deposit_client ON (deposit.client_id=deposit_client.user_ptr_id) "
         f"INNER JOIN auth_user ON (deposit.client_id=auth_user.id) "
         f"INNER JOIN deposit_request ON (deposit.id=deposit_request.deposit_id) "
         f"WHERE deposit.id = %s",
         (deposit_id,),
     )
 
     provider_urls = set()
     swhids = set()
     metadata_entries = []
     dates = set()
     external_identifiers = set()
     for deposit_request_row in deposit_cur:
         deposit_request = dict(zip(DEPOSIT_COLS, deposit_request_row))
 
         # Sanity checks to make sure we selected the right deposit
         assert deposit_request["deposit.id"] == deposit_id
         assert deposit_request["deposit_collection.name"] == collection, deposit_request
         if client_name != "":
             # Sometimes it's missing from the commit message
             assert deposit_request["auth_user.username"] == client_name
 
         # Date of the deposit request (either the initial request, of subsequent ones)
         date = deposit_request["deposit_request.date"]
         dates.add(date)
 
         assert deposit_request["deposit.swhid_context"], deposit_request
         external_identifiers.add(deposit_request["deposit.external_id"])
         swhids.add(deposit_request["deposit.swhid_context"])
 
         # Client of the deposit
         provider_urls.add(deposit_request["deposit_client.provider_url"])
 
         metadata = deposit_request["deposit_request.metadata"]
         if metadata is not None:
             json.dumps(metadata).encode()  # check it's valid
             if "@xmlns" in metadata:
                 assert metadata["@xmlns"] == ATOM_NS
                 assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS])
                 format = NEW_DEPOSIT_FORMAT
             elif "{http://www.w3.org/2005/Atom}id" in metadata:
                 assert (
                     "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" in metadata
                     or "{http://www.w3.org/2005/Atom}author" in metadata
                 )
                 format = OLD_DEPOSIT_FORMAT
             else:
                 # new format introduced in
                 # https://forge.softwareheritage.org/D4065
                 # it's the same as the first case, but with the @xmlns
                 # declarations stripped
                 # Most of them should have the "id", but some revisions,
                 # like 4d3890004fade1f4ec3bf7004a4af0c490605128, are missing
                 # this field
                 assert "id" in metadata or "title" in metadata
                 assert "codemeta:author" in metadata
                 format = NEW_DEPOSIT_FORMAT
             metadata_entries.append((date, format, metadata))
 
     if discovery_date is None:
         discovery_date = max(dates)
 
     # Sanity checks to make sure deposit requests are consistent with each other
     assert len(metadata_entries) >= 1, deposit_id
     assert len(provider_urls) == 1, f"expected 1 provider url, got {provider_urls}"
     (provider_url,) = provider_urls
     assert len(swhids) == 1
     (swhid,) = swhids
     assert (
         len(external_identifiers) == 1
     ), f"expected 1 external identifier, got {external_identifiers}"
     (external_identifier,) = external_identifiers
 
     # computed the origin from the external_identifier if we don't have one
     if origin is None:
         origin = f"{provider_url.strip('/')}/{external_identifier}"
 
         # explicit list of mistakes that happened in the past, but shouldn't
         # happen again:
         if origin == "https://hal.archives-ouvertes.fr/hal-01588781":
             # deposit id 75
             origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588781"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01588782":
             # deposit id 76
             origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588782"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01592430":
             # deposit id 143
             origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592430"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01588927":
             origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588927"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01593875":
             # deposit id 175
             origin = "https://hal-preprod.archives-ouvertes.fr/hal-01593875"
         elif deposit_id == 160:
             assert origin == "https://www.softwareheritage.org/je-suis-gpl", origin
             origin = "https://forge.softwareheritage.org/source/jesuisgpl/"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01588942":
             # deposit id 90
             origin = "https://inria.halpreprod.archives-ouvertes.fr/hal-01588942"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01592499":
             # deposit id 162
             origin = "https://hal-preprod.archives-ouvertes.fr/hal-01592499"
         elif origin == "https://hal.archives-ouvertes.fr/hal-01588935":
             # deposit id 89
             origin = "https://hal-preprod.archives-ouvertes.fr/hal-01588935"
 
         assert_origin_exists(storage, origin)
 
     # check the origin we computed matches the one in the deposit db
     swhid_origin = QualifiedSWHID.from_string(swhid).origin
     if origin is not None:
         # explicit list of mistakes that happened in the past, but shouldn't
         # happen again:
         exceptions = [
             (
                 # deposit id 229
                 "https://hal.archives-ouvertes.fr/hal-01243573",
                 "https://hal-test.archives-ouvertes.fr/hal-01243573",
             ),
             (
                 # deposit id 199
                 "https://hal.archives-ouvertes.fr/hal-01243065",
                 "https://hal-test.archives-ouvertes.fr/hal-01243065",
             ),
             (
                 # deposit id 164
                 "https://hal.archives-ouvertes.fr/hal-01593855",
                 "https://hal-preprod.archives-ouvertes.fr/hal-01593855",
             ),
         ]
         if (origin, swhid_origin) not in exceptions:
             assert origin == swhid_origin, (
                 f"the origin we guessed from the deposit db or revision ({origin}) "
                 f"doesn't match the one in the deposit db's SWHID ({swhid})"
             )
 
     authority = MetadataAuthority(
         type=MetadataAuthorityType.DEPOSIT_CLIENT, url=provider_url, metadata={},
     )
 
     for (date, format, metadata) in metadata_entries:
         load_metadata(
             storage,
             row["id"],
             row["directory"],
             date,
             metadata,
             format,
             authority=authority,
             origin=origin,
             dry_run=dry_run,
         )
 
     return (origin, discovery_date)
 
 
 def handle_row(row: Dict[str, Any], storage, deposit_cur, dry_run: bool):
     type_ = row["type"]
 
     # default date in case we can't find a better one
     discovery_date = row["date"] or row["committer_date"]
 
     metadata = row["metadata"]
 
     if metadata is None:
         return
 
     if type_ == "dsc":
         origin = None  # it will be defined later, using debian_origins_from_row
 
         # TODO: the debian loader writes the changelog date as the revision's
         # author date and committer date. Instead, we should use the visit's date
 
         if "extrinsic" in metadata:
             extrinsic_files = metadata["extrinsic"]["raw"]["files"]
             for artifact_entry in metadata["original_artifact"]:
                 extrinsic_file = extrinsic_files[artifact_entry["filename"]]
                 for key in ("sha256",):
                     assert artifact_entry["checksums"][key] == extrinsic_file[key]
                     artifact_entry["url"] = extrinsic_file["uri"]
             del metadata["extrinsic"]
 
     elif type_ == "tar":
         provider = metadata.get("extrinsic", {}).get("provider")
         if provider is not None:
             # This is the format all the package loaders currently write, and
             # it is the easiest, thanks to the 'provider' and 'when' fields,
             # which have all the information we need to tell them easily
             # and generate accurate metadata
 
             discovery_date = iso8601.parse_date(metadata["extrinsic"]["when"])
 
             # New versions of the loaders write the provider; use it.
             if provider.startswith("https://replicate.npmjs.com/"):
                 # npm loader format 1
 
                 parsed_url = urlparse(provider)
                 assert re.match("^/[^/]+/?$", parsed_url.path), parsed_url
                 package_name = unquote(parsed_url.path.strip("/"))
                 origin = "https://www.npmjs.com/package/" + package_name
                 assert_origin_exists(storage, origin)
 
                 load_metadata(
                     storage,
                     row["id"],
                     row["directory"],
                     discovery_date,
                     metadata["extrinsic"]["raw"],
                     NPM_FORMAT,
                     authority=AUTHORITIES["npmjs"],
                     origin=origin,
                     dry_run=dry_run,
                 )
                 del metadata["extrinsic"]
 
             elif provider.startswith("https://pypi.org/"):
                 # pypi loader format 1
 
                 match = re.match(
                     "https://pypi.org/pypi/(?P<project_name>.*)/json", provider
                 )
                 assert match, f"unexpected provider URL format: {provider}"
                 project_name = match.group("project_name")
                 origin = f"https://pypi.org/project/{project_name}/"
                 assert_origin_exists(storage, origin)
 
                 load_metadata(
                     storage,
                     row["id"],
                     row["directory"],
                     discovery_date,
                     metadata["extrinsic"]["raw"],
                     PYPI_FORMAT,
                     authority=AUTHORITIES["pypi"],
                     origin=origin,
                     dry_run=dry_run,
                 )
                 del metadata["extrinsic"]
 
             elif provider.startswith("https://cran.r-project.org/"):
                 # cran loader
 
                 provider = metadata["extrinsic"]["provider"]
                 if provider.startswith("https://cran.r-project.org/package="):
                     origin = metadata["extrinsic"]["provider"]
                 else:
                     package_name = cran_package_from_url(provider)
                     origin = f"https://cran.r-project.org/package={package_name}"
                 assert origin is not None
 
                 # Ideally we should assert the origin exists, but we can't:
                 # https://forge.softwareheritage.org/T2536
                 if (
                     hashlib.sha1(origin.encode()).digest() not in _origins
                     and storage.origin_get([origin])[0] is None
                 ):
                     return
 
                 raw_extrinsic_metadata = metadata["extrinsic"]["raw"]
 
                 # this is actually intrinsic, ignore it
                 if "version" in raw_extrinsic_metadata:
                     del raw_extrinsic_metadata["version"]
 
                 # Copy the URL to the original_artifacts metadata
                 assert len(metadata["original_artifact"]) == 1
                 if "url" in metadata["original_artifact"][0]:
                     assert (
                         metadata["original_artifact"][0]["url"]
                         == raw_extrinsic_metadata["url"]
                     ), row
                 else:
                     metadata["original_artifact"][0]["url"] = raw_extrinsic_metadata[
                         "url"
                     ]
                 del raw_extrinsic_metadata["url"]
 
                 assert (
                     raw_extrinsic_metadata == {}
                 ), f"Unexpected metadata keys: {list(raw_extrinsic_metadata)}"
 
                 del metadata["extrinsic"]
 
             elif (
                 provider.startswith("https://nix-community.github.io/nixpkgs-swh/")
                 or provider == "https://guix.gnu.org/sources.json"
             ):
                 # nixguix loader
                 origin = provider
                 assert_origin_exists(storage, origin)
 
                 authority = MetadataAuthority(
                     type=MetadataAuthorityType.FORGE, url=provider, metadata={},
                 )
                 assert row["date"] is None  # the nixguix loader does not write dates
 
                 load_metadata(
                     storage,
                     row["id"],
                     row["directory"],
                     discovery_date,
                     metadata["extrinsic"]["raw"],
                     NIXGUIX_FORMAT,
                     authority=authority,
                     origin=origin,
                     dry_run=dry_run,
                 )
                 del metadata["extrinsic"]
 
             elif provider.startswith("https://ftp.gnu.org/"):
                 # archive loader format 1
 
                 origin = provider
                 assert_origin_exists(storage, origin)
 
                 assert len(metadata["original_artifact"]) == 1
                 metadata["original_artifact"][0]["url"] = metadata["extrinsic"]["raw"][
                     "url"
                 ]
 
                 # Remove duplicate keys of original_artifacts
                 for key in ("url", "time", "length", "version", "filename"):
                     del metadata["extrinsic"]["raw"][key]
 
                 assert metadata["extrinsic"]["raw"] == {}
                 del metadata["extrinsic"]
 
             elif provider.startswith("https://deposit.softwareheritage.org/"):
                 origin = metadata["extrinsic"]["raw"]["origin"]["url"]
                 assert_origin_exists(storage, origin)
 
                 if "@xmlns" in metadata:
                     assert metadata["@xmlns"] == ATOM_NS
                     assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS])
                     assert "intrinsic" not in metadata
                     assert "extra_headers" not in metadata
 
                     # deposit loader format 1
                     # in this case, the metadata seems to be both directly in metadata
                     # and in metadata["extrinsic"]["raw"]["metadata"]
 
                     (origin, discovery_date) = handle_deposit_row(
                         row, discovery_date, origin, storage, deposit_cur, dry_run
                     )
 
                     remove_atom_codemeta_metadata_with_xmlns(metadata)
                     if "client" in metadata:
                         del metadata["client"]
                     del metadata["extrinsic"]
                 else:
                     # deposit loader format 2
                     actual_metadata = metadata["extrinsic"]["raw"]["origin_metadata"][
                         "metadata"
                     ]
                     if isinstance(actual_metadata, str):
                         # new format introduced in
                         # https://forge.softwareheritage.org/D4105
                         actual_metadata = json.loads(actual_metadata)
                     if "@xmlns" in actual_metadata:
                         assert actual_metadata["@xmlns"] == ATOM_NS
                         assert actual_metadata["@xmlns:codemeta"] in (
                             CODEMETA_NS,
                             [CODEMETA_NS],
                         )
                     elif "{http://www.w3.org/2005/Atom}id" in actual_metadata:
                         assert (
                             "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author"
                             in actual_metadata
                         )
                     else:
                         # new format introduced in
                         # https://forge.softwareheritage.org/D4065
                         # it's the same as the first case, but with the @xmlns
                         # declarations stripped
                         # Most of them should have the "id", but some revisions,
                         # like 4d3890004fade1f4ec3bf7004a4af0c490605128, are missing
                         # this field
                         assert (
                             "id" in actual_metadata
                             or "title" in actual_metadata
                             or "atom:title" in actual_metadata
                         )
                         assert "codemeta:author" in actual_metadata
 
                     (origin, discovery_date) = handle_deposit_row(
                         row, discovery_date, origin, storage, deposit_cur, dry_run
                     )
 
                     del metadata["extrinsic"]
             else:
                 assert False, f"unknown provider {provider}"
 
         # Older versions don't write the provider; use heuristics instead.
         elif (
             metadata.get("package_source", {})
             .get("url", "")
             .startswith("https://registry.npmjs.org/")
         ):
             # npm loader format 2
 
             package_source_url = metadata["package_source"]["url"]
             package_name = npm_package_from_source_url(package_source_url)
             origin = "https://www.npmjs.com/package/" + package_name
             assert_origin_exists(storage, origin)
 
             load_metadata(
                 storage,
                 row["id"],
                 row["directory"],
                 discovery_date,
                 metadata["package"],
                 NPM_FORMAT,
                 authority=AUTHORITIES["npmjs"],
                 origin=origin,
                 dry_run=dry_run,
             )
             del metadata["package"]
 
             assert "original_artifact" not in metadata
 
             # rebuild an "original_artifact"-like metadata dict from what we
             # can salvage of "package_source"
             package_source_metadata = metadata["package_source"]
             keep_keys = {"blake2s256", "filename", "sha1", "sha256", "url"}
             discard_keys = {
                 "date",  # is equal to the revision date
                 "name",  # was loaded above
                 "version",  # same
             }
             assert (
                 set(package_source_metadata) == keep_keys | discard_keys
             ), package_source_metadata
 
             # will be loaded below
             metadata["original_artifact"] = [
                 {
                     "filename": package_source_metadata["filename"],
                     "checksums": {
                         "sha1": package_source_metadata["sha1"],
                         "sha256": package_source_metadata["sha256"],
                         "blake2s256": package_source_metadata["blake2s256"],
                     },
                     "url": package_source_metadata["url"],
                 }
             ]
             del metadata["package_source"]
 
         elif "@xmlns" in metadata:
             assert metadata["@xmlns:codemeta"] in (CODEMETA_NS, [CODEMETA_NS])
             assert "intrinsic" not in metadata
             assert "extra_headers" not in metadata
 
             # deposit loader format 3
 
             if row["message"] == b"swh: Deposit 159 in collection swh":
                 # There is no deposit 159 in the deposit DB, for some reason
                 assert (
                     hash_to_hex(row["id"]) == "8e9cee14a6ad39bca4347077b87fb5bbd8953bb1"
                 )
                 return
             elif row["message"] == b"hal: Deposit 342 in collection hal":
                 # They have status 'failed' and no swhid
                 return
 
             origin = None  # TODO
             discovery_date = None  # TODO
 
             (origin, discovery_date) = handle_deposit_row(
                 row, discovery_date, origin, storage, deposit_cur, dry_run
             )
             remove_atom_codemeta_metadata_with_xmlns(metadata)
             if "client" in metadata:
                 del metadata["client"]  # found in the deposit db
             if "committer" in metadata:
                 del metadata["committer"]  # found on the revision object
 
         elif "{http://www.w3.org/2005/Atom}id" in metadata:
             assert (
                 "{https://doi.org/10.5063/SCHEMA/CODEMETA-2.0}author" in metadata
                 or "{http://www.w3.org/2005/Atom}author" in metadata
             )
             assert "intrinsic" not in metadata
             assert "extra_headers" not in metadata
 
             # deposit loader format 4
 
             origin = None
             discovery_date = None  # TODO
 
             (origin, discovery_date) = handle_deposit_row(
                 row, discovery_date, origin, storage, deposit_cur, dry_run
             )
             remove_atom_codemeta_metadata_without_xmlns(metadata)
 
         elif hash_to_hex(row["id"]) == "a86747d201ab8f8657d145df4376676d5e47cf9f":
             # deposit 91, is missing "{http://www.w3.org/2005/Atom}id" for some
             # reason, and has an invalid oririn
             return
 
         elif (
             isinstance(metadata.get("original_artifact"), dict)
             and metadata["original_artifact"]["url"].startswith(
                 "https://files.pythonhosted.org/"
             )
         ) or (
             isinstance(metadata.get("original_artifact"), list)
             and len(metadata.get("original_artifact")) == 1
             and metadata["original_artifact"][0]
             .get("url", "")
             .startswith("https://files.pythonhosted.org/")
         ):
             if isinstance(metadata.get("original_artifact"), dict):
                 metadata["original_artifact"] = [metadata["original_artifact"]]
 
             assert len(metadata["original_artifact"]) == 1
 
             origin = pypi_origin_from_filename(
                 storage, row["id"], metadata["original_artifact"][0]["filename"]
             )
 
             if "project" in metadata:
                 # pypi loader format 2
                 load_metadata(
                     storage,
                     row["id"],
                     row["directory"],
                     discovery_date,
                     metadata["project"],
                     PYPI_FORMAT,
                     authority=AUTHORITIES["pypi"],
                     origin=origin,
                     dry_run=dry_run,
                 )
                 del metadata["project"]
             else:
                 assert set(metadata) == {"original_artifact"}, set(metadata)
                 # pypi loader format 3
                 pass  # nothing to do, there's no metadata
 
         elif row["message"] == b"synthetic revision message":
             assert isinstance(metadata["original_artifact"], list), metadata
             assert not any("url" in d for d in metadata["original_artifact"])
 
             # archive loader format 2
 
             origin = None
 
         elif deposit_revision_message_re.match(row["message"]):
             # deposit without metadata in the revision
 
             assert set(metadata) == {"original_artifact"}, metadata
 
             origin = None  # TODO
             discovery_date = None
 
             (origin, discovery_date) = handle_deposit_row(
                 row, discovery_date, origin, storage, deposit_cur, dry_run
             )
         else:
             assert False, f"Unable to detect type of metadata for row: {row}"
 
     # Ignore common intrinsic metadata keys
     for key in ("intrinsic", "extra_headers"):
         if key in metadata:
             del metadata[key]
 
     # Ignore loader-specific intrinsic metadata keys
     if type_ == "hg":
         del metadata["node"]
     elif type_ == "dsc":
         if "package_info" in metadata:
             del metadata["package_info"]
 
     if "original_artifact" in metadata:
         for original_artifact in metadata["original_artifact"]:
             # Rename keys to the expected format of original-artifacts-json.
             rename_keys = [
                 ("name", "filename"),  # eg. from old Debian loader
                 ("size", "length"),  # eg. from old PyPI loader
             ]
             for (old_name, new_name) in rename_keys:
                 if old_name in original_artifact:
                     assert new_name not in original_artifact
                     original_artifact[new_name] = original_artifact.pop(old_name)
 
             # Move the checksums to their own subdict, which is the expected format
             # of original-artifacts-json.
             if "sha1" in original_artifact:
                 assert "checksums" not in original_artifact
                 original_artifact["checksums"] = {}
                 for key in ("sha1", "sha256", "sha1_git", "blake2s256"):
                     if key in original_artifact:
                         original_artifact["checksums"][key] = original_artifact.pop(key)
 
             if "date" in original_artifact:
                 # The information comes from the package repository rather than SWH,
                 # so it shouldn't be in the 'original-artifacts' metadata
                 # (which has SWH as authority).
                 # Moreover, it's not a very useful information, so let's just drop it.
                 del original_artifact["date"]
 
             allowed_keys = {
                 "checksums",
                 "filename",
                 "length",
                 "url",
                 "archive_type",
             }
             assert set(original_artifact) <= allowed_keys, set(original_artifact)
 
         if type_ == "dsc":
             assert origin is None
             origins = debian_origins_from_row(row, storage)
             if not origins:
                 print("Missing Debian origin for revision: {hash_to_hex(row['id'])}")
         else:
             origins = [origin]
 
         for origin in origins:
             load_metadata(
                 storage,
                 row["id"],
                 row["directory"],
                 discovery_date,
                 metadata["original_artifact"],
                 ORIGINAL_ARTIFACT_FORMAT,
                 authority=AUTHORITIES["swh"],
                 origin=origin,
                 dry_run=dry_run,
             )
         del metadata["original_artifact"]
 
     assert metadata == {}, (
         f"remaining metadata keys for {row['id'].hex()} (type: {row['type']}): "
         f"{metadata}"
     )
 
 
 def create_fetchers(db):
     with db.cursor() as cur:
         cur.execute(
             """
             INSERT INTO metadata_fetcher (name, version, metadata)
             VALUES (%s, %s, %s)
             ON CONFLICT DO NOTHING
             """,
             (FETCHER.name, FETCHER.version, FETCHER.metadata),
         )
 
 
 def iter_revision_rows(storage_dbconn: str, first_id: Sha1Git):
     after_id = first_id
     failures = 0
     while True:
         try:
             storage_db = BaseDb.connect(storage_dbconn)
             with storage_db.cursor() as cur:
                 while True:
                     cur.execute(
                         f"SELECT {', '.join(REVISION_COLS)} FROM revision "
                         f"WHERE id > %s AND metadata IS NOT NULL AND type != 'git'"
                         f"ORDER BY id LIMIT 1000",
                         (after_id,),
                     )
                     new_rows = 0
                     for row in cur:
                         new_rows += 1
                         row_d = dict(zip(REVISION_COLS, row))
                         yield row_d
                     after_id = row_d["id"]
                     if new_rows == 0:
                         return
         except psycopg2.OperationalError as e:
             print(e)
             # most likely a temporary error, try again
             if failures >= 60:
                 raise
             else:
                 time.sleep(60)
                 failures += 1
 
 
 def main(storage_dbconn, storage_url, deposit_dbconn, first_id, dry_run):
     storage_db = BaseDb.connect(storage_dbconn)
     deposit_db = BaseDb.connect(deposit_dbconn)
     storage = get_storage(
         "pipeline",
         steps=[
             {"cls": "retry"},
             {
-                "cls": "local",
+                "cls": "postgresql",
                 "db": storage_dbconn,
                 "objstorage": {"cls": "memory", "args": {}},
             },
         ],
     )
 
     if not dry_run:
         create_fetchers(storage_db)
         # Not creating authorities, as the loaders are presumably already running
         # and created them already.
         # This also helps make sure this script doesn't accidentally create
         # authorities that differ from what the loaders use.
 
     total_rows = 0
     with deposit_db.cursor() as deposit_cur:
         for row in iter_revision_rows(storage_dbconn, first_id):
             handle_row(row, storage, deposit_cur, dry_run)
 
             total_rows += 1
 
             if total_rows % 1000 == 0:
                 percents = (
                     int.from_bytes(row["id"][0:4], byteorder="big") * 100 / (1 << 32)
                 )
                 print(
                     f"Processed {total_rows/1000000.:.2f}M rows "
                     f"(~{percents:.1f}%, last revision: {row['id'].hex()})"
                 )
 
 
 if __name__ == "__main__":
     if len(sys.argv) == 4:
         (_, storage_dbconn, storage_url, deposit_dbconn) = sys.argv
         first_id = "00" * 20
     elif len(sys.argv) == 5:
         (_, storage_dbconn, storage_url, deposit_dbconn, first_id) = sys.argv
     else:
         print(
             f"Syntax: {sys.argv[0]} <storage_dbconn> <storage_url> "
             f"<deposit_dbconn> [<first id>]"
         )
         exit(1)
 
     if os.path.isfile("./origins.txt"):
         # You can generate this file with:
         # psql service=swh-replica \
         #   -c "\copy (select digest(url, 'sha1') from origin) to stdout" \
         #   | pv -l > origins.txt
         print("Loading origins...")
         with open("./origins.txt") as fd:
             for line in fd:
                 digest = line.strip()[3:]
                 _origins.add(bytes.fromhex(digest))
         print("Done loading origins.")
 
     main(storage_dbconn, storage_url, deposit_dbconn, bytes.fromhex(first_id), True)
diff --git a/swh/storage/pytest_plugin.py b/swh/storage/pytest_plugin.py
index 205271b0..604a3d90 100644
--- a/swh/storage/pytest_plugin.py
+++ b/swh/storage/pytest_plugin.py
@@ -1,54 +1,54 @@
-# Copyright (C) 2019-2020  The Software Heritage developers
+# Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from os import environ, path
 
 import pytest
 
 from swh.core.db.pytest_plugin import postgresql_fact
 import swh.storage
 from swh.storage import get_storage
 from swh.storage.tests.storage_data import StorageData
 
 SQL_DIR = path.join(path.dirname(swh.storage.__file__), "sql")
 
 environ["LC_ALL"] = "C.UTF-8"
 
 
 swh_storage_postgresql = postgresql_fact(
     "postgresql_proc", dbname="storage", dump_files=path.join(SQL_DIR, "*.sql")
 )
 
 
 @pytest.fixture
 def swh_storage_backend_config(swh_storage_postgresql):
     """Basic pg storage configuration with no journal collaborator
     (to avoid pulling optional dependency on clients of this fixture)
 
     """
     yield {
-        "cls": "local",
+        "cls": "postgresql",
         "db": swh_storage_postgresql.dsn,
         "objstorage": {"cls": "memory"},
         "check_config": {"check_write": True},
     }
 
 
 @pytest.fixture
 def swh_storage(swh_storage_backend_config):
     return get_storage(**swh_storage_backend_config)
 
 
 @pytest.fixture
 def sample_data() -> StorageData:
     """Pre-defined sample storage object data to manipulate
 
     Returns:
         StorageData whose attribute keys are data model objects. Either multiple
         objects: contents, directories, revisions, releases, ... or simple ones:
         content, directory, revision, release, ...
 
     """
     return StorageData()
diff --git a/swh/storage/tests/test_backfill.py b/swh/storage/tests/test_backfill.py
index 6865aa6c..b1a90520 100644
--- a/swh/storage/tests/test_backfill.py
+++ b/swh/storage/tests/test_backfill.py
@@ -1,298 +1,298 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import functools
 import logging
 from unittest.mock import patch
 
 import pytest
 
 from swh.journal.client import JournalClient
 from swh.model.tests.swh_model_data import TEST_OBJECTS
 from swh.storage import get_storage
 from swh.storage.backfill import (
     PARTITION_KEY,
     JournalBackfiller,
     byte_ranges,
     compute_query,
     raw_extrinsic_metadata_target_ranges,
 )
 from swh.storage.in_memory import InMemoryStorage
 from swh.storage.replay import process_replay_objects
 from swh.storage.tests.test_replay import check_replayed
 
 TEST_CONFIG = {
     "journal_writer": {
         "brokers": ["localhost"],
         "prefix": "swh.tmp_journal.new",
         "client_id": "swh.journal.client.test",
     },
-    "storage": {"cls": "local", "db": "service=swh-dev"},
+    "storage": {"cls": "postgresql", "db": "service=swh-dev"},
 }
 
 
 def test_config_ko_missing_mandatory_key():
     """Missing configuration key will make the initialization fail
 
     """
     for key in TEST_CONFIG.keys():
         config = TEST_CONFIG.copy()
         config.pop(key)
 
         with pytest.raises(ValueError) as e:
             JournalBackfiller(config)
 
         error = "Configuration error: The following keys must be provided: %s" % (
             ",".join([key]),
         )
         assert e.value.args[0] == error
 
 
 def test_config_ko_unknown_object_type():
     """Parse arguments will fail if the object type is unknown
 
     """
     backfiller = JournalBackfiller(TEST_CONFIG)
     with pytest.raises(ValueError) as e:
         backfiller.parse_arguments("unknown-object-type", 1, 2)
 
     error = (
         "Object type unknown-object-type is not supported. "
         "The only possible values are %s" % (", ".join(sorted(PARTITION_KEY)))
     )
     assert e.value.args[0] == error
 
 
 def test_compute_query_content():
     query, where_args, column_aliases = compute_query("content", "\x000000", "\x000001")
 
     assert where_args == ["\x000000", "\x000001"]
 
     assert column_aliases == [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "status",
         "ctime",
     ]
 
     assert (
         query
         == """
 select sha1,sha1_git,sha256,blake2s256,length,status,ctime
 from content
 
 where (sha1) >= %s and (sha1) < %s
     """
     )
 
 
 def test_compute_query_skipped_content():
     query, where_args, column_aliases = compute_query("skipped_content", None, None)
 
     assert where_args == []
 
     assert column_aliases == [
         "sha1",
         "sha1_git",
         "sha256",
         "blake2s256",
         "length",
         "ctime",
         "status",
         "reason",
     ]
 
     assert (
         query
         == """
 select sha1,sha1_git,sha256,blake2s256,length,ctime,status,reason
 from skipped_content
 
 
     """
     )
 
 
 def test_compute_query_origin_visit():
     query, where_args, column_aliases = compute_query("origin_visit", 1, 10)
 
     assert where_args == [1, 10]
 
     assert column_aliases == [
         "visit",
         "type",
         "origin",
         "date",
     ]
 
     assert (
         query
         == """
 select visit,type,origin.url as origin,date
 from origin_visit
 left join origin on origin_visit.origin=origin.id
 where (origin_visit.origin) >= %s and (origin_visit.origin) < %s
     """
     )
 
 
 def test_compute_query_release():
     query, where_args, column_aliases = compute_query("release", "\x000002", "\x000003")
 
     assert where_args == ["\x000002", "\x000003"]
 
     assert column_aliases == [
         "id",
         "date",
         "date_offset",
         "date_neg_utc_offset",
         "comment",
         "name",
         "synthetic",
         "target",
         "target_type",
         "author_id",
         "author_name",
         "author_email",
         "author_fullname",
     ]
 
     assert (
         query
         == """
 select release.id as id,date,date_offset,date_neg_utc_offset,comment,release.name as name,synthetic,target,target_type,a.id as author_id,a.name as author_name,a.email as author_email,a.fullname as author_fullname
 from release
 left join person a on release.author=a.id
 where (release.id) >= %s and (release.id) < %s
     """  # noqa
     )
 
 
 @pytest.mark.parametrize("numbits", [2, 3, 8, 16])
 def test_byte_ranges(numbits):
     ranges = list(byte_ranges(numbits))
 
     assert len(ranges) == 2 ** numbits
     assert ranges[0][0] is None
     assert ranges[-1][1] is None
 
     bounds = []
     for i, (left, right) in enumerate(zip(ranges[:-1], ranges[1:])):
         assert left[1] == right[0], f"Mismatched bounds in {i}th range"
         bounds.append(left[1])
 
     assert bounds == sorted(bounds)
 
 
 def test_raw_extrinsic_metadata_target_ranges():
     ranges = list(raw_extrinsic_metadata_target_ranges())
 
     assert ranges[0][0] == ""
     assert ranges[-1][1] is None
 
     bounds = []
     for i, (left, right) in enumerate(zip(ranges[:-1], ranges[1:])):
         assert left[1] == right[0], f"Mismatched bounds in {i}th range"
         bounds.append(left[1])
 
     assert bounds == sorted(bounds)
 
 
 RANGE_GENERATORS = {
     "content": lambda start, end: [(None, None)],
     "skipped_content": lambda start, end: [(None, None)],
     "directory": lambda start, end: [(None, None)],
     "extid": lambda start, end: [(None, None)],
     "metadata_authority": lambda start, end: [(None, None)],
     "metadata_fetcher": lambda start, end: [(None, None)],
     "revision": lambda start, end: [(None, None)],
     "release": lambda start, end: [(None, None)],
     "snapshot": lambda start, end: [(None, None)],
     "origin": lambda start, end: [(None, 10000)],
     "origin_visit": lambda start, end: [(None, 10000)],
     "origin_visit_status": lambda start, end: [(None, 10000)],
     "raw_extrinsic_metadata": lambda start, end: [(None, None)],
 }
 
 
 @patch("swh.storage.backfill.RANGE_GENERATORS", RANGE_GENERATORS)
 def test_backfiller(
     swh_storage_backend_config,
     kafka_prefix: str,
     kafka_consumer_group: str,
     kafka_server: str,
     caplog,
 ):
     prefix1 = f"{kafka_prefix}-1"
     prefix2 = f"{kafka_prefix}-2"
 
     journal1 = {
         "cls": "kafka",
         "brokers": [kafka_server],
         "client_id": "kafka_writer-1",
         "prefix": prefix1,
     }
     swh_storage_backend_config["journal_writer"] = journal1
     storage = get_storage(**swh_storage_backend_config)
 
     # fill the storage and the journal (under prefix1)
     for object_type, objects in TEST_OBJECTS.items():
         method = getattr(storage, object_type + "_add")
         method(objects)
 
     # now apply the backfiller on the storage to fill the journal under prefix2
     backfiller_config = {
         "journal_writer": {
             "brokers": [kafka_server],
             "client_id": "kafka_writer-2",
             "prefix": prefix2,
         },
         "storage": swh_storage_backend_config,
     }
 
     # Backfilling
     backfiller = JournalBackfiller(backfiller_config)
     for object_type in TEST_OBJECTS:
         backfiller.run(object_type, None, None)
 
     # Trace log messages for unhandled object types in the replayer
     caplog.set_level(logging.DEBUG, "swh.storage.replay")
 
     # now check journal content are the same under both topics
     # use the replayer scaffolding to fill storages to make is a bit easier
     # Replaying #1
     sto1 = get_storage(cls="memory")
     replayer1 = JournalClient(
         brokers=kafka_server,
         group_id=f"{kafka_consumer_group}-1",
         prefix=prefix1,
         stop_on_eof=True,
     )
     worker_fn1 = functools.partial(process_replay_objects, storage=sto1)
     replayer1.process(worker_fn1)
 
     # Replaying #2
     sto2 = get_storage(cls="memory")
     replayer2 = JournalClient(
         brokers=kafka_server,
         group_id=f"{kafka_consumer_group}-2",
         prefix=prefix2,
         stop_on_eof=True,
     )
     worker_fn2 = functools.partial(process_replay_objects, storage=sto2)
     replayer2.process(worker_fn2)
 
     # Compare storages
     assert isinstance(sto1, InMemoryStorage)  # needed to help mypy
     assert isinstance(sto2, InMemoryStorage)
     check_replayed(sto1, sto2)
 
     for record in caplog.records:
         assert (
             "this should not happen" not in record.message
         ), "Replayer ignored some message types, see captured logging"
diff --git a/swh/storage/tests/test_init.py b/swh/storage/tests/test_init.py
index 5fad7c86..4ff3fe9c 100644
--- a/swh/storage/tests/test_init.py
+++ b/swh/storage/tests/test_init.py
@@ -1,232 +1,234 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from unittest.mock import patch
 
 import pytest
 
 from swh.core.pytest_plugin import RPCTestAdapter
 from swh.storage import get_storage
 from swh.storage.api import client, server
 from swh.storage.in_memory import InMemoryStorage
 from swh.storage.postgresql.storage import Storage as DbStorage
 from swh.storage.proxies.buffer import BufferingProxyStorage
 from swh.storage.proxies.filter import FilteringProxyStorage
 from swh.storage.proxies.retry import RetryingProxyStorage
 
 STORAGES = [
     pytest.param(cls, real_class, kwargs, id=cls)
     for (cls, real_class, kwargs) in [
         ("remote", client.RemoteStorage, {"url": "url"}),
         ("memory", InMemoryStorage, {}),
         (
-            "local",
+            "postgresql",
             DbStorage,
             {"db": "postgresql://db", "objstorage": {"cls": "memory"}},
         ),
         ("filter", FilteringProxyStorage, {"storage": {"cls": "memory"}}),
         ("buffer", BufferingProxyStorage, {"storage": {"cls": "memory"}}),
         ("retry", RetryingProxyStorage, {"storage": {"cls": "memory"}}),
     ]
 ]
 
 
 @pytest.mark.parametrize("cls,real_class,args", STORAGES)
 @patch("swh.storage.postgresql.storage.psycopg2.pool")
 def test_get_storage(mock_pool, cls, real_class, args):
     """Instantiating an existing storage should be ok
 
     """
     mock_pool.ThreadedConnectionPool.return_value = None
     actual_storage = get_storage(cls, **args)
     assert actual_storage is not None
     assert isinstance(actual_storage, real_class)
 
 
 @pytest.mark.parametrize("cls,real_class,args", STORAGES)
 @patch("swh.storage.postgresql.storage.psycopg2.pool")
 def test_get_storage_legacy_args(mock_pool, cls, real_class, args):
     """Instantiating an existing storage should be ok even with the legacy
     explicit 'args' keys
 
     """
     mock_pool.ThreadedConnectionPool.return_value = None
     with pytest.warns(DeprecationWarning):
         actual_storage = get_storage(cls, args=args)
         assert actual_storage is not None
         assert isinstance(actual_storage, real_class)
 
 
 def test_get_storage_failure():
     """Instantiating an unknown storage should raise
 
     """
     with pytest.raises(ValueError, match="Unknown storage class `unknown`"):
         get_storage("unknown")
 
 
 def test_get_storage_pipeline():
     config = {
         "cls": "pipeline",
         "steps": [
             {"cls": "filter",},
             {"cls": "buffer", "min_batch_size": {"content": 10,},},
             {"cls": "memory",},
         ],
     }
 
     storage = get_storage(**config)
 
     assert isinstance(storage, FilteringProxyStorage)
     assert isinstance(storage.storage, BufferingProxyStorage)
     assert isinstance(storage.storage.storage, InMemoryStorage)
 
 
 def test_get_storage_pipeline_legacy_args():
     config = {
         "cls": "pipeline",
         "steps": [
             {"cls": "filter",},
             {"cls": "buffer", "args": {"min_batch_size": {"content": 10,},}},
             {"cls": "memory",},
         ],
     }
 
     with pytest.warns(DeprecationWarning):
         storage = get_storage(**config)
 
     assert isinstance(storage, FilteringProxyStorage)
     assert isinstance(storage.storage, BufferingProxyStorage)
     assert isinstance(storage.storage.storage, InMemoryStorage)
 
 
 # get_storage's check_config argument tests
 
 # the "remote" and "pipeline" cases are tested in dedicated test functions below
 @pytest.mark.parametrize(
-    "cls,real_class,kwargs", [x for x in STORAGES if x.id not in ("remote", "local")]
+    "cls,real_class,kwargs",
+    [x for x in STORAGES if x.id not in ("remote", "local", "postgresql")],
 )
 def test_get_storage_check_config(cls, real_class, kwargs, monkeypatch):
     """Instantiating an existing storage with check_config should be ok
 
     """
     check_backend_check_config(monkeypatch, dict(cls=cls, **kwargs))
 
 
 @patch("swh.storage.postgresql.storage.psycopg2.pool")
-def test_get_storage_local_check_config(mock_pool, monkeypatch):
+@pytest.mark.parametrize("clazz", ["local", "postgresql"])
+def test_get_storage_local_check_config(mock_pool, monkeypatch, clazz):
     """Instantiating a local storage with check_config should be ok
 
     """
     mock_pool.ThreadedConnectionPool.return_value = None
     check_backend_check_config(
         monkeypatch,
-        {"cls": "local", "db": "postgresql://db", "objstorage": {"cls": "memory"}},
+        {"cls": clazz, "db": "postgresql://db", "objstorage": {"cls": "memory"}},
         backend_storage_cls=DbStorage,
     )
 
 
 def test_get_storage_pipeline_check_config(monkeypatch):
     """Test that the check_config option works as intended for a pipelined storage"""
     config = {
         "cls": "pipeline",
         "steps": [
             {"cls": "filter",},
             {"cls": "buffer", "min_batch_size": {"content": 10,},},
             {"cls": "memory",},
         ],
     }
     check_backend_check_config(
         monkeypatch, config,
     )
 
 
 def test_get_storage_remote_check_config(monkeypatch):
     """Test that the check_config option works as intended for a remote storage"""
 
     monkeypatch.setattr(
         server, "storage", get_storage(cls="memory", journal_writer={"cls": "memory"})
     )
     test_client = server.app.test_client()
 
     class MockedRemoteStorage(client.RemoteStorage):
         def __init__(self, *args, **kwargs):
             super().__init__(*args, **kwargs)
             self.session.adapters.clear()
             self.session.mount("mock://", RPCTestAdapter(test_client))
 
     monkeypatch.setattr(client, "RemoteStorage", MockedRemoteStorage)
 
     config = {
         "cls": "remote",
         "url": "mock://example.com",
     }
     check_backend_check_config(
         monkeypatch, config,
     )
 
 
 def check_backend_check_config(
     monkeypatch, config, backend_storage_cls=InMemoryStorage
 ):
     """Check the staged/indirect storage (pipeline or remote) works
     as desired with regard to the check_config option of the get_storage()
     factory function.
 
     If set, the check_config argument is used to call the Storage.check_config() at
     instantiation time in the get_storage() factory function. This is supposed to be
     passed through each step of the Storage pipeline until it reached the actual
     backend's (typically in memory or local) check_config() method which will perform
     the verification for read/write access to the backend storage.
 
     monkeypatch is supposed to be the monkeypatch pytest fixture to be used from the
     calling test_ function.
 
     config is the config dict passed to get_storage()
 
     backend_storage_cls is the class of the backend storage to be mocked to
     simulate the check_config behavior; it should then be the class of the
     actual backend storage defined in the `config`.
     """
     access = None
 
     def mockcheck(self, check_write=False):
         if access == "none":
             return False
         if access == "read":
             return check_write is False
         if access == "write":
             return True
 
     monkeypatch.setattr(backend_storage_cls, "check_config", mockcheck)
 
     # simulate no read nor write access to the underlying (memory) storage
     access = "none"
     # by default, no check, so no complain
     assert get_storage(**config)
     # if asked to check, complain
     with pytest.raises(EnvironmentError):
         get_storage(check_config={"check_write": False}, **config)
     with pytest.raises(EnvironmentError):
         get_storage(check_config={"check_write": True}, **config)
 
     # simulate no write access to the underlying (memory) storage
     access = "read"
     # by default, no check so no complain
     assert get_storage(**config)
     # if asked to check for read access, no complain
     get_storage(check_config={"check_write": False}, **config)
     # if asked to check for write access, complain
     with pytest.raises(EnvironmentError):
         get_storage(check_config={"check_write": True}, **config)
 
     # simulate read & write access to the underlying (memory) storage
     access = "write"
     # by default, no check so no complain
     assert get_storage(**config)
     # if asked to check for read access, no complain
     get_storage(check_config={"check_write": False}, **config)
     # if asked to check for write access, no complain
     get_storage(check_config={"check_write": True}, **config)
diff --git a/swh/storage/tests/test_server.py b/swh/storage/tests/test_server.py
index d4089cf8..2c75ac85 100644
--- a/swh/storage/tests/test_server.py
+++ b/swh/storage/tests/test_server.py
@@ -1,96 +1,96 @@
-# Copyright (C) 2019-2020  The Software Heritage developers
+# Copyright (C) 2019-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 from typing import Any, Dict
 
 import pytest
 import yaml
 
 from swh.core.config import load_from_envvar
 from swh.storage.api.server import (
     StorageServerApp,
     load_and_check_config,
     make_app_from_configfile,
 )
 
 
 def prepare_config_file(tmpdir, content, name="config.yml"):
     """Prepare configuration file in `$tmpdir/name` with content `content`.
 
     Args:
         tmpdir (LocalPath): root directory
         content (str/dict): Content of the file either as string or as a dict.
                             If a dict, converts the dict into a yaml string.
         name (str): configuration filename
 
     Returns
         path (str) of the configuration file prepared.
 
     """
     config_path = tmpdir / name
     if isinstance(content, dict):  # convert if needed
         content = yaml.dump(content)
     config_path.write_text(content, encoding="utf-8")
     # pytest on python3.5 does not support LocalPath manipulation, so
     # convert path to string
     return str(config_path)
 
 
 @pytest.mark.parametrize("storage_class", [None, ""])
 def test_load_and_check_config_no_configuration(storage_class):
     """Inexistent configuration files raises"""
     with pytest.raises(EnvironmentError, match="Configuration file must be defined"):
         load_and_check_config(storage_class)
 
 
 def test_load_and_check_config_inexistent_file():
     config_path = "/some/inexistent/config.yml"
     expected_error = f"Configuration file {config_path} does not exist"
     with pytest.raises(FileNotFoundError, match=expected_error):
         load_and_check_config(config_path)
 
 
 def test_load_and_check_config_wrong_configuration(tmpdir):
     """Wrong configuration raises"""
     config_path = prepare_config_file(tmpdir, "something: useless")
     with pytest.raises(KeyError, match="Missing 'storage' configuration"):
         load_and_check_config(config_path)
 
 
 def test_load_and_check_config_local_config_fine(tmpdir):
     """'local' complete configuration is fine"""
-    config = {"storage": {"cls": "local", "db": "db", "objstorage": "something",}}
+    config = {"storage": {"cls": "postgresql", "db": "db", "objstorage": "something",}}
     config_path = prepare_config_file(tmpdir, config)
     cfg = load_and_check_config(config_path)
     assert cfg == config
 
 
 @pytest.fixture
 def swh_storage_server_config(
     swh_storage_backend_config: Dict[str, Any]
 ) -> Dict[str, Any]:
     return {"storage": swh_storage_backend_config}
 
 
 @pytest.fixture
 def swh_storage_config(monkeypatch, swh_storage_server_config, tmp_path):
     conf_path = os.path.join(str(tmp_path), "storage.yml")
     with open(conf_path, "w") as f:
         f.write(yaml.dump(swh_storage_server_config))
     monkeypatch.setenv("SWH_CONFIG_FILENAME", conf_path)
     return conf_path
 
 
 def test_server_make_app_from_config_file(swh_storage_config):
     app = make_app_from_configfile()
     expected_cfg = load_from_envvar()
 
     assert app is not None
     assert isinstance(app, StorageServerApp)
     assert app.config["storage"] == expected_cfg["storage"]
 
     app2 = make_app_from_configfile()
     assert app is app2