diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ace1fe2..41a4ad6 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,48 +1,48 @@
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v2.4.0
   hooks:
   - id: trailing-whitespace
   - id: check-json
   - id: check-yaml
 
 - repo: https://gitlab.com/pycqa/flake8
   rev: 3.8.3
   hooks:
   - id: flake8
 
 - repo: https://github.com/codespell-project/codespell
   rev: v1.16.0
   hooks:
   - id: codespell
 
 - repo: local
   hooks:
   - id: mypy
     name: mypy
     entry: mypy
     args: [swh]
     pass_filenames: false
     language: system
     types: [python]
 
 # unfortunately, we are far from being able to enable this...
 # - repo: https://github.com/PyCQA/pydocstyle.git
 #   rev: 4.0.0
 #   hooks:
 #   - id: pydocstyle
 #     name: pydocstyle
 #     description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions.
 #     entry: pydocstyle --convention=google
 #     language: python
 #     types: [python]
 
 - repo: https://github.com/PyCQA/isort
   rev: 5.5.2
   hooks:
   - id: isort
 
 - repo: https://github.com/python/black
-  rev: 19.10b0
+  rev: 20.8b1
   hooks:
   - id: black
diff --git a/swh/provenance/postgresql/provenancedb_with_path.py b/swh/provenance/postgresql/provenancedb_with_path.py
index ce4f468..8fedbee 100644
--- a/swh/provenance/postgresql/provenancedb_with_path.py
+++ b/swh/provenance/postgresql/provenancedb_with_path.py
@@ -1,193 +1,196 @@
 from datetime import datetime
 import os
 from typing import Generator, Optional, Tuple
 
 import psycopg2
 import psycopg2.extras
 
 from ..model import DirectoryEntry, FileEntry
 from ..revision import RevisionEntry
 from .provenancedb_base import ProvenanceDBBase
 
 
 def normalize(path: bytes) -> bytes:
     return path[2:] if path.startswith(bytes("." + os.path.sep, "utf-8")) else path
 
 
 class ProvenanceWithPathDB(ProvenanceDBBase):
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
     ):
         self.insert_cache["content_in_dir"].add(
             (blob.id, directory.id, normalize(os.path.join(prefix, blob.name)))
         )
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
     ):
         self.insert_cache["content_early_in_rev"].add(
             (blob.id, revision.id, normalize(os.path.join(prefix, blob.name)))
         )
 
     def content_find_first(
         self, blobid: bytes
     ) -> Optional[Tuple[bytes, bytes, datetime, bytes]]:
         self.cursor.execute(
             """SELECT content_location.sha1 AS blob,
                       revision.sha1 AS rev,
                       revision.date AS date,
                       content_location.path AS path
                  FROM (SELECT content_hex.sha1,
                               content_hex.rev,
                               location.path
                         FROM (SELECT content.sha1,
                                      content_early_in_rev.rev,
                                      content_early_in_rev.loc
                                FROM content_early_in_rev
                                JOIN content
                                  ON content.id=content_early_in_rev.blob
                                WHERE content.sha1=%s
                              ) AS content_hex
                         JOIN location
                             ON location.id=content_hex.loc
                       ) AS content_location
                  JOIN revision
                    ON revision.id=content_location.rev
                  ORDER BY date, rev, path ASC LIMIT 1""",
             (blobid,),
         )
         return self.cursor.fetchone()
 
     def content_find_all(
         self, blobid: bytes, limit: Optional[int] = None
     ) -> Generator[Tuple[bytes, bytes, datetime, bytes], None, None]:
         early_cut = f"LIMIT {limit}" if limit is not None else ""
         self.cursor.execute(
             f"""(SELECT content_location.sha1 AS blob,
                         revision.sha1 AS rev,
                         revision.date AS date,
                         content_location.path AS path
                   FROM (SELECT content_hex.sha1,
                                content_hex.rev,
                                location.path
                          FROM (SELECT content.sha1,
                                       content_early_in_rev.rev,
                                       content_early_in_rev.loc
                                 FROM content_early_in_rev
                                 JOIN content
                                   ON content.id=content_early_in_rev.blob
                                 WHERE content.sha1=%s
                               ) AS content_hex
                          JOIN location
                            ON location.id=content_hex.loc
                        ) AS content_location
                   JOIN revision
                     ON revision.id=content_location.rev
                   )
                 UNION
                 (SELECT content_prefix.sha1 AS blob,
                         revision.sha1 AS rev,
                         revision.date AS date,
                         content_prefix.path AS path
                   FROM (SELECT content_in_rev.sha1,
                                content_in_rev.rev,
                                CASE location.path
                                  WHEN '' THEN content_in_rev.suffix
                                  WHEN '.' THEN content_in_rev.suffix
                                  ELSE (location.path || '/' ||
                                           content_in_rev.suffix)::unix_path
                                END AS path
                          FROM (SELECT content_suffix.sha1,
                                       directory_in_rev.rev,
                                       directory_in_rev.loc,
                                       content_suffix.path AS suffix
                                 FROM (SELECT content_hex.sha1,
                                              content_hex.dir,
                                              location.path
                                        FROM (SELECT content.sha1,
                                                     content_in_dir.dir,
                                                     content_in_dir.loc
                                               FROM content_in_dir
                                               JOIN content
                                                 ON content_in_dir.blob=content.id
                                               WHERE content.sha1=%s
                                             ) AS content_hex
                                        JOIN location
                                          ON location.id=content_hex.loc
                                      ) AS content_suffix
                                 JOIN directory_in_rev
                                   ON directory_in_rev.dir=content_suffix.dir
                               ) AS content_in_rev
                          JOIN location
                            ON location.id=content_in_rev.loc
                        ) AS content_prefix
                   JOIN revision
                     ON revision.id=content_prefix.rev
                 )
                 ORDER BY date, rev, path {early_cut}""",
             (blobid, blobid),
         )
         # TODO: use POSTGRESQL EXPLAIN looking for query optimizations.
         yield from self.cursor.fetchall()
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
     ):
         self.insert_cache["directory_in_rev"].add(
             (directory.id, revision.id, normalize(path))
         )
 
     def insert_location(self, src0_table, src1_table, dst_table):
         """Insert location entries in `dst_table` from the insert_cache
 
         Also insert missing location entries in the 'location' table.
         """
         # TODO: find a better way of doing this; might be doable in a coupls of
         # SQL queries (one to insert missing entries in the location' table,
         # one to insert entries in the dst_table)
 
         # Resolve src0 ids
         src0_sha1s = tuple(set(sha1 for (sha1, _, _) in self.insert_cache[dst_table]))
         fmt = ",".join(["%s"] * len(src0_sha1s))
         self.cursor.execute(
-            f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""", src0_sha1s,
+            f"""SELECT sha1, id FROM {src0_table} WHERE sha1 IN ({fmt})""",
+            src0_sha1s,
         )
         src0_values = dict(self.cursor.fetchall())
 
         # Resolve src1 ids
         src1_sha1s = tuple(set(sha1 for (_, sha1, _) in self.insert_cache[dst_table]))
         fmt = ",".join(["%s"] * len(src1_sha1s))
         self.cursor.execute(
-            f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""", src1_sha1s,
+            f"""SELECT sha1, id FROM {src1_table} WHERE sha1 IN ({fmt})""",
+            src1_sha1s,
         )
         src1_values = dict(self.cursor.fetchall())
 
         # insert missing locations
         locations = tuple(set((loc,) for (_, _, loc) in self.insert_cache[dst_table]))
         psycopg2.extras.execute_values(
             self.cursor,
             """
             INSERT INTO location(path) VALUES %s
             ON CONFLICT (path) DO NOTHING
             """,
             locations,
         )
         # fetch location ids
         fmt = ",".join(["%s"] * len(locations))
         self.cursor.execute(
-            f"SELECT path, id FROM location WHERE path IN ({fmt})", locations,
+            f"SELECT path, id FROM location WHERE path IN ({fmt})",
+            locations,
         )
         loc_ids = dict(self.cursor.fetchall())
 
         # Insert values in dst_table
         rows = [
             (src0_values[sha1_src], src1_values[sha1_dst], loc_ids[loc])
             for (sha1_src, sha1_dst, loc) in self.insert_cache[dst_table]
         ]
         psycopg2.extras.execute_values(
             self.cursor,
             f"""INSERT INTO {dst_table} VALUES %s
                   ON CONFLICT DO NOTHING""",
             rows,
         )
         self.insert_cache[dst_table].clear()
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
index 5adf3b9..378e30a 100644
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -1,152 +1,154 @@
 from datetime import datetime, timezone
 from itertools import islice
 import threading
 from typing import Iterable, Iterator, Optional, Tuple
 
 import iso8601
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.model import RevisionEntry
 
 ########################################################################################
 ########################################################################################
 
 
 class CSVRevisionIterator:
     """Iterator over revisions typically present in the given CSV file.
 
     The input is an iterator that produces 3 elements per row:
 
       (id, date, root)
 
     where:
     - id: is the id (sha1_git) of the revision
     - date: is the author date
     - root: sha1 of the directory
     """
 
     def __init__(
         self,
         revisions: Iterable[Tuple[bytes, datetime, bytes]],
         archive: ArchiveInterface,
         limit: Optional[int] = None,
     ):
         self.revisions: Iterator[Tuple[bytes, datetime, bytes]]
         if limit is not None:
             self.revisions = islice(revisions, limit)
         else:
             self.revisions = iter(revisions)
         self.mutex = threading.Lock()
         self.archive = archive
 
     def __iter__(self):
         return self
 
     def __next__(self):
         with self.mutex:
             id, date, root = next(self.revisions)
             date = iso8601.parse_date(date)
             if date.tzinfo is None:
                 date = date.replace(tzinfo=timezone.utc)
             return RevisionEntry(
-                hash_to_bytes(id), date=date, root=hash_to_bytes(root),
+                hash_to_bytes(id),
+                date=date,
+                root=hash_to_bytes(root),
             )
 
 
 # class ArchiveRevisionIterator(RevisionIterator):
 #     """Iterator over revisions present in the given database."""
 #
 #     def __init__(self, conn, limit=None, chunksize=100):
 #         self.cur = conn.cursor()
 #         self.chunksize = chunksize
 #         self.records = []
 #         if limit is None:
 #             self.cur.execute('''SELECT id, date, committer_date, directory
 #                             FROM revision''')
 #         else:
 #             self.cur.execute('''SELECT id, date, committer_date, directory
 #                             FROM revision
 #                             LIMIT %s''', (limit,))
 #         for row in self.cur.fetchmany(self.chunksize):
 #             record = self.make_record(row)
 #             if record is not None:
 #                 self.records.append(record)
 #         self.mutex = threading.Lock()
 #
 #     def __del__(self):
 #         self.cur.close()
 #
 #     def next(self):
 #         self.mutex.acquire()
 #         if not self.records:
 #             self.records.clear()
 #             for row in self.cur.fetchmany(self.chunksize):
 #                 record = self.make_record(row)
 #                 if record is not None:
 #                     self.records.append(record)
 #
 #         if self.records:
 #             revision, *self.records = self.records
 #             self.mutex.release()
 #             return revision
 #         else:
 #             self.mutex.release()
 #             return None
 #
 #     def make_record(self, row):
 #         # Only revision with author or committer date are considered
 #         if row[1] is not None:
 #             # If the revision has author date, it takes precedence
 #             return RevisionEntry(row[0], row[1], row[3])
 #         elif row[2] is not None:
 #             # If not, we use the committer date
 #             return RevisionEntry(row[0], row[2], row[3])
 
 
 ########################################################################################
 ########################################################################################
 
 # class RevisionWorker(threading.Thread):
 #     def __init__(
 #         self,
 #         id: int,
 #         conninfo: dict,
 #         archive: ArchiveInterface,
 #         revisions: RevisionIterator
 #     ):
 #         from .provenance import get_provenance
 #
 #         super().__init__()
 #         self.archive = archive
 #         self.id = id
 #         self.provenance = get_provenance(conninfo)
 #         self.revisions = revisions
 #
 #
 #     def run(self):
 #         from .provenance import revision_add
 #
 #
 #         while True:
 #             revision = self.revisions.next()
 #             if revision is None: break
 #
 #             processed = False
 #             while not processed:
 #                 logging.info(
 #                     f'Thread {(
 #                         self.id
 #                     )} - Processing revision {(
 #                         hash_to_hex(revision.id)
 #                     )} (timestamp: {revision.date})'
 #                 )
 #                 processed = revision_add(self.provenance, self.archive, revision)
 #                 if not processed:
 #                     logging.warning(
 #                         f'Thread {(
 #                              self.id
 #                         )} - Failed to process revision {(
 #                             hash_to_hex(revision.id)
 #                         )} (timestamp: {revision.date})'
 #                     )
diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py
index e0f332b..93046b5 100644
--- a/swh/provenance/tests/test_cli.py
+++ b/swh/provenance/tests/test_cli.py
@@ -1,111 +1,114 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from click.testing import CliRunner
 import psycopg2
 import pytest
 import yaml
 
 from swh.core.cli import swh as swhmain
 import swh.core.cli.db  # noqa ; ensure cli is loaded
 import swh.provenance.cli  # noqa ; ensure cli is loaded
 
 
 def test_cli_swh_db_help():
     # swhmain.add_command(provenance_cli)
     result = CliRunner().invoke(swhmain, ["provenance", "-h"])
     assert result.exit_code == 0
     assert "Commands:" in result.output
     commands = result.output.split("Commands:")[1]
     for command in (
         "create",
         "find-all",
         "find-first",
         "iter-origins",
         "iter-revisions",
     ):
         assert f"  {command} " in commands
 
 
 def test_cli_create_deprecated(provenance_db, tmp_path):
     conffile = tmp_path / "config.yml"
     conf = {
-        "provenance": {"cls": "local", "with_path": True,},
+        "provenance": {
+            "cls": "local",
+            "with_path": True,
+        },
     }
     yaml.dump(conf, conffile.open("w"))
     result = CliRunner().invoke(
         swhmain, ["provenance", "--config-file", str(conffile), "create", "--drop"]
     )
     assert result.exit_code == 0, result.output
     assert "DeprecationWarning" in result.output
 
 
 TABLES = {
     "dbflavor",
     "dbversion",
     "content",
     "content_early_in_rev",
     "content_in_dir",
     "directory",
     "directory_in_rev",
     "origin",
     "revision",
     "revision_before_rev",
     "revision_in_org",
 }
 
 
 @pytest.mark.parametrize(
     "flavor, dbtables", (("with-path", TABLES | {"location"}), ("without-path", TABLES))
 )
 def test_cli_db_create_and_init_db_with_flavor(
     monkeypatch, postgresql, flavor, dbtables
 ):
     """Test that 'swh db init provenance' works with flavors
 
     for both with-path and without-path flavors"""
 
     dbname = f"{flavor}-db"
 
     # DB creation using 'swh db create'
     db_params = postgresql.get_dsn_parameters()
     monkeypatch.setenv("PGHOST", db_params["host"])
     monkeypatch.setenv("PGUSER", db_params["user"])
     monkeypatch.setenv("PGPORT", db_params["port"])
     result = CliRunner().invoke(swhmain, ["db", "create", "-d", dbname, "provenance"])
     assert result.exit_code == 0, result.output
 
     # DB init using 'swh db init'
     result = CliRunner().invoke(
         swhmain, ["db", "init", "-d", dbname, "--flavor", flavor, "provenance"]
     )
     assert result.exit_code == 0, result.output
     assert f"(flavor {flavor})" in result.output
 
     db_params["dbname"] = dbname
     cnx = psycopg2.connect(**db_params)
     # check the DB looks OK (check for db_flavor and expected tables)
     with cnx.cursor() as cur:
         cur.execute("select swh_get_dbflavor()")
         assert cur.fetchone() == (flavor,)
 
         cur.execute(
             "select table_name from information_schema.tables "
             "where table_schema = 'public' "
             f"and table_catalog = '{dbname}'"
         )
         tables = set(x for (x,) in cur.fetchall())
         assert tables == dbtables
 
 
 def test_cli_init_db_default_flavor(provenance_db):
     "Test that 'swh db init provenance' defaults to a with-path flavored DB"
     dbname = provenance_db.dsn
     result = CliRunner().invoke(swhmain, ["db", "init", "-d", dbname, "provenance"])
     assert result.exit_code == 0, result.output
 
     with provenance_db.cursor() as cur:
         cur.execute("select swh_get_dbflavor()")
         assert cur.fetchone() == ("with-path",)
diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py
index ea2c133..8abc38a 100644
--- a/swh/provenance/tests/test_provenance_db.py
+++ b/swh/provenance/tests/test_provenance_db.py
@@ -1,288 +1,292 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 
 import pytest
 
 from swh.model.tests.swh_model_data import TEST_OBJECTS
 from swh.provenance.model import RevisionEntry
 from swh.provenance.origin import OriginEntry
 from swh.provenance.provenance import origin_add, revision_add
 from swh.provenance.storage.archive import ArchiveStorage
 from swh.provenance.tests.conftest import synthetic_result
 
 
 def ts2dt(ts: dict) -> datetime.datetime:
     timestamp = datetime.datetime.fromtimestamp(
         ts["timestamp"]["seconds"],
         datetime.timezone(datetime.timedelta(minutes=ts["offset"])),
     )
     return timestamp.replace(microsecond=ts["timestamp"]["microseconds"])
 
 
 def test_provenance_origin_add(provenance, swh_storage_with_objects):
     """Test the ProvenanceDB.origin_add() method"""
     for origin in TEST_OBJECTS["origin"]:
         entry = OriginEntry(url=origin.url, revisions=[])
         origin_add(ArchiveStorage(swh_storage_with_objects), provenance, entry)
     # TODO: check some facts here
 
 
 def test_provenance_add_revision(provenance, storage_and_CMDBTS, archive):
 
     storage, data = storage_and_CMDBTS
     for i in range(2):
         # do it twice, there should be no change in results
         for revision in data["revision"]:
             entry = RevisionEntry(
                 id=revision["id"],
                 date=ts2dt(revision["date"]),
                 root=revision["directory"],
             )
             revision_add(provenance, archive, entry)
 
         # there should be as many entries in 'revision' as revisions from the
         # test dataset
         provenance.cursor.execute("SELECT count(*) FROM revision")
         assert provenance.cursor.fetchone()[0] == len(data["revision"])
 
         # there should be no 'location' for the empty path
         provenance.cursor.execute("SELECT count(*) FROM location WHERE path=''")
         assert provenance.cursor.fetchone()[0] == 0
 
         # there should be 32 'location' for non-empty path
         provenance.cursor.execute("SELECT count(*) FROM location WHERE path!=''")
         assert provenance.cursor.fetchone()[0] == 32
 
         # there should be as many entries in 'revision' as revisions from the
         # test dataset
         provenance.cursor.execute("SELECT count(*) FROM revision")
         assert provenance.cursor.fetchone()[0] == len(data["revision"])
 
         # 7 directories
         provenance.cursor.execute("SELECT count(*) FROM directory")
         assert provenance.cursor.fetchone()[0] == 7
 
         # 12 D-R entries
         provenance.cursor.execute("SELECT count(*) FROM directory_in_rev")
         assert provenance.cursor.fetchone()[0] == 12
 
         provenance.cursor.execute("SELECT count(*) FROM content")
         assert provenance.cursor.fetchone()[0] == len(data["content"])
         provenance.cursor.execute("SELECT count(*) FROM content_in_dir")
         assert provenance.cursor.fetchone()[0] == 16
         provenance.cursor.execute("SELECT count(*) FROM content_early_in_rev")
         assert provenance.cursor.fetchone()[0] == 13
 
 
 def test_provenance_content_find_first(provenance, storage_and_CMDBTS, archive):
     storage, data = storage_and_CMDBTS
     for revision in data["revision"]:
         entry = RevisionEntry(
-            id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"],
+            id=revision["id"],
+            date=ts2dt(revision["date"]),
+            root=revision["directory"],
         )
         revision_add(provenance, archive, entry)
 
     first_expected_content = [
         {
             "content": "43f3c871310a8e524004e91f033e7fb3b0bc8475",
             "rev": "35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5",
             "date": 1609757158,
             "path": "README.md",
         },
         {
             "content": "6dc7e44ead5c0e300fe94448c3e046dfe33ad4d1",
             "rev": "9e36e095b79e36a3da104ce272989b39cd68aefd",
             "date": 1610644094,
             "path": "Red/Blue/Green/a",
         },
         {
             "content": "9f6e04be05297905f1275d3f4e0bb0583458b2e8",
             "rev": "bfbfcc72ae7fc35d6941386c36280512e6b38440",
             "date": 1610644097,
             "path": "Red/Blue/Green/b",
         },
         {
             "content": "a28fa70e725ebda781e772795ca080cd737b823c",
             "rev": "0a31c9d509783abfd08f9fdfcd3acae20f17dfd0",
             "date": 1610644099,
             "path": "Red/Blue/c",
         },
         {
             "content": "c0229d305adf3edf49f031269a70e3e87665fe88",
             "rev": "1d1fcf1816a8a2a77f9b1f342ba11d0fe9fd7f17",
             "date": 1610644105,
             "path": "Purple/d",
         },
         {
             "content": "94ba40161084e8b80943accd9d24e1f9dd47189b",
             "rev": "55d4dc9471de6144f935daf3c38878155ca274d5",
             "date": 1610644113,
             "path": ("Dark/Brown/Purple/f", "Dark/Brown/Purple/g", "Dark/h"),  # XXX
         },
         {
             "content": "5e8f9ceaee9dafae2e3210e254fdf170295f8b5b",
             "rev": "a8939755d0be76cfea136e9e5ebce9bc51c49fef",
             "date": 1610644116,
             "path": "Dark/h",
         },
         {
             "content": "bbd54b961764094b13f10cef733e3725d0a834c3",
             "rev": "ca1774a07b6e02c1caa7ae678924efa9259ee7c6",
             "date": 1610644118,
             "path": "Paris/i",
         },
         {
             "content": "7ce4fe9a22f589fa1656a752ea371b0ebc2106b1",
             "rev": "611fe71d75b6ea151b06e3845c09777acc783d82",
             "date": 1610644120,
             "path": "Paris/j",
         },
         {
             "content": "cb79b39935c9392fa5193d9f84a6c35dc9c22c75",
             "rev": "4c5551b4969eb2160824494d40b8e1f6187fc01e",
             "date": 1610644122,
             "path": "Paris/k",
         },
     ]
 
     for expected in first_expected_content:
         contentid = bytes.fromhex(expected["content"])
         (blob, rev, date, path) = provenance.content_find_first(contentid)
         if isinstance(expected["path"], tuple):
             assert bytes(path).decode() in expected["path"]
         else:
             assert bytes(path).decode() == expected["path"]
         assert bytes(blob) == contentid
         assert bytes(rev).hex() == expected["rev"]
         assert int(date.timestamp()) == expected["date"]
 
 
 def sha1s(cur, table):
     """return the 'sha1' column from the DB 'table' (as hex)
 
     'cur' is a cursor to the provenance index DB.
     """
     cur.execute(f"SELECT sha1 FROM {table}")
     return set(sha1.hex() for (sha1,) in cur.fetchall())
 
 
 def locations(cur):
     """return the 'path' column from the DB location table
 
     'cur' is a cursor to the provenance index DB.
     """
     cur.execute("SELECT encode(location.path::bytea, 'escape') FROM location")
     return set(x for (x,) in cur.fetchall())
 
 
 def relations(cur, src, dst):
     """return the triplets ('sha1', 'sha1', 'path') from the DB
 
     for the relation between 'src' table and 'dst' table
     (i.e. for C-R, C-D and D-R relations).
 
     'cur' is a cursor to the provenance index DB.
     """
     relation = {
         ("content", "revision"): "content_early_in_rev",
         ("content", "directory"): "content_in_dir",
         ("directory", "revision"): "directory_in_rev",
     }[(src, dst)]
 
     srccol = {"content": "blob", "directory": "dir"}[src]
     dstcol = {"directory": "dir", "revision": "rev"}[dst]
 
     cur.execute(
         f"SELECT encode(src.sha1::bytea, 'hex'),"
         f"       encode(dst.sha1::bytea, 'hex'),"
         f"       encode(location.path::bytea, 'escape') "
         f"FROM {relation} as rel, "
         f"     {src} as src, {dst} as dst, location "
         f"WHERE rel.{srccol}=src.id AND rel.{dstcol}=dst.id AND rel.loc=location.id"
     )
     return set(cur.fetchall())
 
 
 @pytest.mark.parametrize(
     "syntheticfile, args",
     (
         ("synthetic_lower_1.txt", {"lower": True, "mindepth": 1}),
         ("synthetic_upper_1.txt", {"lower": False, "mindepth": 1}),
         ("synthetic_lower_2.txt", {"lower": True, "mindepth": 2}),
         ("synthetic_upper_2.txt", {"lower": False, "mindepth": 2}),
     ),
 )
 def test_provenance_heuristics(
     provenance, storage_and_CMDBTS, archive, syntheticfile, args
 ):
     storage, data = storage_and_CMDBTS
 
     revisions = {rev["id"]: rev for rev in data["revision"]}
 
     rows = {
         "content": set(),
         "content_in_dir": set(),
         "content_early_in_rev": set(),
         "directory": set(),
         "directory_in_rev": set(),
         "location": set(),
         "revision": set(),
     }
 
     for synth_rev in synthetic_result(syntheticfile):
         revision = revisions[synth_rev["sha1"]]
         entry = RevisionEntry(
-            id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"],
+            id=revision["id"],
+            date=ts2dt(revision["date"]),
+            root=revision["directory"],
         )
         revision_add(provenance, archive, entry, **args)
 
         # each "entry" in the synth file is one new revision
         rows["revision"].add(synth_rev["sha1"].hex())
         assert rows["revision"] == sha1s(provenance.cursor, "revision"), synth_rev[
             "msg"
         ]
 
         # this revision might have added new content objects
         rows["content"] |= set(x["dst"].hex() for x in synth_rev["R_C"])
         rows["content"] |= set(x["dst"].hex() for x in synth_rev["D_C"])
         assert rows["content"] == sha1s(provenance.cursor, "content"), synth_rev["msg"]
 
         # check for R-C (direct) entries
         rows["content_early_in_rev"] |= set(
             (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_C"]
         )
         assert rows["content_early_in_rev"] == relations(
             provenance.cursor, "content", "revision"
         ), synth_rev["msg"]
 
         # check directories
         rows["directory"] |= set(x["dst"].hex() for x in synth_rev["R_D"])
         assert rows["directory"] == sha1s(provenance.cursor, "directory"), synth_rev[
             "msg"
         ]
 
         # check for R-D entries
         rows["directory_in_rev"] |= set(
             (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_D"]
         )
         assert rows["directory_in_rev"] == relations(
             provenance.cursor, "directory", "revision"
         ), synth_rev["msg"]
 
         # check for D-C entries
         rows["content_in_dir"] |= set(
             (x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["D_C"]
         )
         assert rows["content_in_dir"] == relations(
             provenance.cursor, "content", "directory"
         ), synth_rev["msg"]
 
         # check for location entries
         rows["location"] |= set(x["path"] for x in synth_rev["R_C"])
         rows["location"] |= set(x["path"] for x in synth_rev["D_C"])
         rows["location"] |= set(x["path"] for x in synth_rev["R_D"])
         assert rows["location"] == locations(provenance.cursor), synth_rev["msg"]
diff --git a/tox.ini b/tox.ini
index bd11bd4..acd42f2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,35 +1,35 @@
 [tox]
 envlist=black,flake8,mypy,py3
 
 [testenv]
 extras =
   testing
 deps =
   pytest-cov
 commands =
   pytest --doctest-modules \
          {envsitepackagesdir}/swh/provenance \
          --cov={envsitepackagesdir}/swh/provenance \
          --cov-branch {posargs}
 
 [testenv:black]
 skip_install = true
 deps =
-  black==19.10b0
+  black==20.8b1
 commands =
   {envpython} -m black --check swh
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
 
 [testenv:mypy]
 extras =
   testing
 deps =
   mypy
 commands =
   mypy swh