diff --git a/contents.py b/contents.py
index 5f5b5da..c4c4834 100644
--- a/contents.py
+++ b/contents.py
@@ -1,52 +1,58 @@
 import os
 import psycopg2
 
-from swh.model.hashutil import (hash_to_bytes, hash_to_hex)
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
 from swh.provenance.provenance import get_provenance
 
 
 if __name__ == "__main__":
     conninfo = {
         "host": "localhost",
         "database": "new_1000",
         "user": "postgres",
-        "password": "postgres"
+        "password": "postgres",
     }
     provenance = get_provenance(conninfo)
 
-    print('content(id, date): ################################################')
-    provenance.cursor.execute('''SELECT id, date FROM content ORDER BY id''')
+    print("content(id, date): ################################################")
+    provenance.cursor.execute("""SELECT id, date FROM content ORDER BY id""")
     for row in provenance.cursor.fetchall():
-        print(f'{hash_to_hex(row[0])}, {row[1]}')
-    print('###################################################################')
+        print(f"{hash_to_hex(row[0])}, {row[1]}")
+    print("###################################################################")
 
-    print('content_early_in_rev(blob, rev, path): ############################')
-    provenance.cursor.execute('''SELECT blob, rev, path FROM content_early_in_rev ORDER BY blob, rev, path''')
+    print("content_early_in_rev(blob, rev, path): ############################")
+    provenance.cursor.execute(
+        """SELECT blob, rev, path FROM content_early_in_rev ORDER BY blob, rev, path"""
+    )
     for row in provenance.cursor.fetchall():
-        print(f'{row[0]}, {row[1]}, {row[2]}')
-        print(f'{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}')
-    print('###################################################################')
-
-    print('content_in_dir(blob, dir, path): ##################################')
-    provenance.cursor.execute('''SELECT blob, dir, path FROM content_in_dir ORDER BY blob, dir, path''')
+        print(f"{row[0]}, {row[1]}, {row[2]}")
+        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
+    print("###################################################################")
+
+    print("content_in_dir(blob, dir, path): ##################################")
+    provenance.cursor.execute(
+        """SELECT blob, dir, path FROM content_in_dir ORDER BY blob, dir, path"""
+    )
     for row in provenance.cursor.fetchall():
-        print(f'{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}')
-    print('###################################################################')
+        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
+    print("###################################################################")
 
-    print('directory(id, date): ##############################################')
-    provenance.cursor.execute('''SELECT id, date FROM directory ORDER BY id''')
+    print("directory(id, date): ##############################################")
+    provenance.cursor.execute("""SELECT id, date FROM directory ORDER BY id""")
     for row in provenance.cursor.fetchall():
-        print(f'{hash_to_hex(row[0])}, {row[1]}')
-    print('###################################################################')
+        print(f"{hash_to_hex(row[0])}, {row[1]}")
+    print("###################################################################")
 
-    print('directory_in_rev(dir, rev, path): #################################')
-    provenance.cursor.execute('''SELECT dir, rev, path FROM directory_in_rev ORDER BY dir, rev, path''')
+    print("directory_in_rev(dir, rev, path): #################################")
+    provenance.cursor.execute(
+        """SELECT dir, rev, path FROM directory_in_rev ORDER BY dir, rev, path"""
+    )
     for row in provenance.cursor.fetchall():
-        print(f'{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}')
-    print('###################################################################')
+        print(f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {os.fsdecode(row[2])}")
+    print("###################################################################")
 
-    print('revision(id, date): ###############################################')
-    provenance.cursor.execute('''SELECT id, date FROM revision ORDER BY id''')
+    print("revision(id, date): ###############################################")
+    provenance.cursor.execute("""SELECT id, date FROM revision ORDER BY id""")
     for row in provenance.cursor.fetchall():
-        print(f'{hash_to_hex(row[0])}, {row[1]}')
-    print('###################################################################')
+        print(f"{hash_to_hex(row[0])}, {row[1]}")
+    print("###################################################################")
diff --git a/revisions.py b/revisions.py
index eff0fcf..ccadf0e 100644
--- a/revisions.py
+++ b/revisions.py
@@ -1,62 +1,69 @@
 import io
 import random
 import pytz
 
 from datetime import datetime
 
-from swh.model.hashutil import (hash_to_bytes, hash_to_hex)
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
 from swh.storage import get_storage
 from swh.provenance.revision import RevisionEntry
 
 
 def rev_to_csv(revision: RevisionEntry):
-    return ','.join([
-        hash_to_hex(revision.id),
-        str(pytz.utc.localize(revision.date)),
-        hash_to_hex(revision.root)
-    ]) + '\n'
+    return (
+        ",".join(
+            [
+                hash_to_hex(revision.id),
+                str(pytz.utc.localize(revision.date)),
+                hash_to_hex(revision.root),
+            ]
+        )
+        + "\n"
+    )
 
 
 if __name__ == "__main__":
     conninfo = {
         "cls": "remote",
-        "url": "http://uffizi.internal.softwareheritage.org:5002"
+        "url": "http://uffizi.internal.softwareheritage.org:5002",
     }
     storage = get_storage(**conninfo)
 
     revisions = [
         # '6eec5815ef8fc88d9fc5bcc91c6465a8899c1445',
         # 'd1468bb5f06ca44cc42c43fbd011c5dcbdc262c6',
         # '6a45ebb887d87ee53f359aaeba8a9840576c907b'
-        '02f95c0a1868cbef82ff73fc1b903183a579c7de',
-        'da061f1caf293a5da00bff6a45abcf4d7ae54c50',
-        'e3bfd73a9fd8ef3dd4c5b05a927de485f9871323'
+        "02f95c0a1868cbef82ff73fc1b903183a579c7de",
+        "da061f1caf293a5da00bff6a45abcf4d7ae54c50",
+        "e3bfd73a9fd8ef3dd4c5b05a927de485f9871323",
     ]
     print(revisions)
 
     revisions = list(map(hash_to_bytes, revisions))
     print(revisions)
 
     entries = []
     for revision in storage.revision_get(revisions):
         if revision is not None:
             print(revision)
-            entries.append(RevisionEntry(
-                storage,
-                revision.id,
-                datetime.fromtimestamp(revision.date.timestamp.seconds),
-                revision.directory
-            ))
+            entries.append(
+                RevisionEntry(
+                    storage,
+                    revision.id,
+                    datetime.fromtimestamp(revision.date.timestamp.seconds),
+                    revision.directory,
+                )
+            )
 
     random.shuffle(entries)
-    with io.open('random.csv', 'w') as outfile:
+    with io.open("random.csv", "w") as outfile:
         for revision in entries:
             outfile.write(rev_to_csv(revision))
 
-    with io.open('ordered.csv', 'w') as outfile:
+    with io.open("ordered.csv", "w") as outfile:
         for revision in sorted(entries, key=lambda rev: rev.date):
             outfile.write(rev_to_csv(revision))
 
-    with io.open('reverse.csv', 'w') as outfile:
+    with io.open("reverse.csv", "w") as outfile:
         for revision in sorted(entries, key=lambda rev: rev.date, reverse=True):
             outfile.write(rev_to_csv(revision))
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
index 5fca354..f733b0e 100644
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -1,186 +1,187 @@
 # Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 # WARNING: do not import unnecessary things here to keep cli startup time under
 # control
 import os
 from typing import Any, Dict, Optional
 
 import click
 import yaml
 
 from swh.core import config
 from swh.core.cli import CONTEXT_SETTINGS
 from swh.core.cli import swh as swh_cli_group
-from swh.model.hashutil import (hash_to_bytes, hash_to_hex)
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
 
 # All generic config code should reside in swh.core.config
 CONFIG_ENVVAR = "SWH_CONFIG_FILE"
 DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml")
 DEFAULT_PATH = os.environ.get(CONFIG_ENVVAR, DEFAULT_CONFIG_PATH)
 
 DEFAULT_CONFIG: Dict[str, Any] = {
     "archive": {
         "cls": "api",
         "storage": {
             "cls": "remote",
-            "url": "http://uffizi.internal.softwareheritage.org:5002"
+            "url": "http://uffizi.internal.softwareheritage.org:5002",
         }
         # "cls": "ps",
         # "db": {
         #     "host": "db.internal.softwareheritage.org",
         #     "dbname": "softwareheritage",
         #     "user": "guest"
         # }
     },
-    "provenance": {
-        "cls": "ps",
-        "db": {
-            "host": "localhost",
-            "dbname": "provenance"
-        }
-    }
+    "provenance": {"cls": "ps", "db": {"host": "localhost", "dbname": "provenance"}},
 }
 
 
 CONFIG_FILE_HELP = f"""Configuration file:
 
 \b
 The CLI option or the environment variable will fail if invalid.
 CLI option is checked first.
 Then, environment variable {CONFIG_ENVVAR} is checked.
 Then, if cannot load the default path, a set of default values are used.
 Default config path is {DEFAULT_CONFIG_PATH}.
 Default config values are:
 
 \b
 {yaml.dump(DEFAULT_CONFIG)}"""
 PROVENANCE_HELP = f"""Software Heritage Scanner tools.
 
 {CONFIG_FILE_HELP}"""
 
 
 @swh_cli_group.group(
-    name="provenance", context_settings=CONTEXT_SETTINGS, help=PROVENANCE_HELP,
+    name="provenance",
+    context_settings=CONTEXT_SETTINGS,
+    help=PROVENANCE_HELP,
 )
 @click.option(
     "-C",
     "--config-file",
     default=None,
     type=click.Path(exists=False, dir_okay=False, path_type=str),
     help="""YAML configuration file""",
 )
 @click.option("--profile", default=None)
 @click.pass_context
 def cli(ctx, config_file: Optional[str], profile: str):
     if config_file is None and config.config_exists(DEFAULT_PATH):
         config_file = DEFAULT_PATH
 
     if config_file is None:
         conf = DEFAULT_CONFIG
     else:
         # read_raw_config do not fail on ENOENT
         if not config.config_exists(config_file):
             raise FileNotFoundError(config_file)
         conf = config.read_raw_config(config.config_basepath(config_file))
         conf = config.merge_configs(DEFAULT_CONFIG, conf)
 
     ctx.ensure_object(dict)
     ctx.obj["config"] = conf
 
     if profile:
         import cProfile
         import atexit
 
         print("Profiling...")
         pr = cProfile.Profile()
         pr.enable()
 
         def exit():
             pr.disable()
             pr.dump_stats(profile)
 
         atexit.register(exit)
 
 
 @cli.command(name="create")
 @click.option("--name", default=None)
 @click.pass_context
 def create(ctx, name):
     """Create new provenance database."""
     from .postgresql.db_utils import connect
     from .postgresql.provenance import create_database
 
     # Connect to server without selecting a database
     conninfo = ctx.obj["config"]["provenance"]["db"]
     conn = connect(conninfo)
 
     create_database(conn, conninfo, name)
 
 
 @cli.command(name="iter-revisions")
 @click.argument("filename")
-@click.option('-l', '--limit', type=int)
+@click.option("-l", "--limit", type=int)
 @click.pass_context
 def iter_revisions(ctx, filename, limit):
     """Iterate over provided list of revisions and add them to the provenance database."""
     from . import get_archive, get_provenance
     from .revision import FileRevisionIterator
     from .provenance import revision_add
 
     archive = get_archive(**ctx.obj["config"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     revisions = FileRevisionIterator(filename, archive, limit=limit)
 
     while True:
         revision = revisions.next()
-        if revision is None: break
+        if revision is None:
+            break
         revision_add(provenance, archive, revision)
 
 
 @cli.command(name="iter-origins")
 @click.argument("filename")
-@click.option('-l', '--limit', type=int)
+@click.option("-l", "--limit", type=int)
 @click.pass_context
 def iter_origins(ctx, filename, limit):
     """Iterate over provided list of revisions and add them to the provenance database."""
     from . import get_archive, get_provenance
     from .origin import FileOriginIterator
     from .provenance import origin_add
 
     archive = get_archive(**ctx.obj["config"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
 
     for origin in FileOriginIterator(filename, archive, limit=limit):
         origin_add(provenance, origin)
 
 
 @cli.command(name="find-first")
 @click.argument("swhid")
 @click.pass_context
 def find_first(ctx, swhid):
     """Find first occurrence of the requested blob."""
     from . import get_provenance
 
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     # TODO: return a dictionary with proper keys for each field
     row = provenance.content_find_first(hash_to_bytes(swhid))
     if row is not None:
-        print(f'{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {row[2]}, {os.fsdecode(row[3])}')
+        print(
+            f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {row[2]}, {os.fsdecode(row[3])}"
+        )
     else:
-        print(f'Cannot find a content with the id {swhid}')
+        print(f"Cannot find a content with the id {swhid}")
 
 
 @cli.command(name="find-all")
 @click.argument("swhid")
 @click.pass_context
 def find_all(ctx, swhid):
     """Find all occurrences of the requested blob."""
     from swh.provenance import get_provenance
 
     provenance = get_provenance(**ctx.obj["config"]["provenance"])
     # TODO: return a dictionary with proper keys for each field
     for row in provenance.content_find_all(hash_to_bytes(swhid)):
-        print(f'{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {row[2]}, {os.fsdecode(row[3])}')
+        print(
+            f"{hash_to_hex(row[0])}, {hash_to_hex(row[1])}, {row[2]}, {os.fsdecode(row[3])}"
+        )
diff --git a/swh/provenance/model.py b/swh/provenance/model.py
index 4889b18..a04d8e8 100644
--- a/swh/provenance/model.py
+++ b/swh/provenance/model.py
@@ -1,46 +1,49 @@
 import os
 
 from .archive import ArchiveInterface
 
 from pathlib import PosixPath
 
 
 # class Tree:
 #     def __init__(self, archive: ArchiveInterface, id: bytes):
 #         self.root = DirectoryEntry(archive, id, PosixPath('.'))
 
 
 class TreeEntry:
     def __init__(self, id: bytes, name: PosixPath):
         self.id = id
         self.name = name
 
 
 class DirectoryEntry(TreeEntry):
     def __init__(self, archive: ArchiveInterface, id: bytes, name: PosixPath):
         super().__init__(id, name)
         self.archive = archive
         self.children = None
 
     def __iter__(self):
         if self.children is None:
             self.children = []
             for child in self.archive.directory_ls(self.id):
-                if child['type'] == 'dir':
-                    self.children.append(DirectoryEntry(
-                        self.archive,
-                        child['target'],
-                        PosixPath(os.fsdecode(child['name']))
-                    ))
-
-                elif child['type'] == 'file':
-                    self.children.append(FileEntry(
-                        child['target'],
-                        PosixPath(os.fsdecode(child['name']))
-                    ))
+                if child["type"] == "dir":
+                    self.children.append(
+                        DirectoryEntry(
+                            self.archive,
+                            child["target"],
+                            PosixPath(os.fsdecode(child["name"])),
+                        )
+                    )
+
+                elif child["type"] == "file":
+                    self.children.append(
+                        FileEntry(
+                            child["target"], PosixPath(os.fsdecode(child["name"]))
+                        )
+                    )
 
         return iter(self.children)
 
 
 class FileEntry(TreeEntry):
     pass
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
index 624ac8c..3db9d36 100644
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -1,100 +1,109 @@
 from .archive import ArchiveInterface
 from .revision import RevisionEntry
 
 from typing import Optional
 
 from swh.model.model import Origin, ObjectType, TargetType
 
 
 class OriginEntry:
     def __init__(self, url, revisions, id=None):
         self.id = id
         self.url = url
         self.revisions = revisions
 
 
 ################################################################################
 ################################################################################
 
+
 class OriginIterator:
     """Iterator interface."""
 
     def __iter__(self):
         pass
 
     def __next__(self):
         pass
 
 
 class FileOriginIterator(OriginIterator):
     """Iterator over origins present in the given CSV file."""
 
-    def __init__(self, filename: str, archive: ArchiveInterface, limit: Optional[int]=None):
+    def __init__(
+        self, filename: str, archive: ArchiveInterface, limit: Optional[int] = None
+    ):
         self.file = open(filename)
         self.limit = limit
         # self.mutex = threading.Lock()
         self.archive = archive
 
     def __iter__(self):
         yield from iterate_statuses(
-            [Origin(url.strip()) for url in self.file],
-            self.archive,
-            self.limit
+            [Origin(url.strip()) for url in self.file], self.archive, self.limit
         )
 
 
 class ArchiveOriginIterator:
     """Iterator over origins present in the given storage."""
 
-    def __init__(self, archive: ArchiveInterface, limit: Optional[int]=None):
+    def __init__(self, archive: ArchiveInterface, limit: Optional[int] = None):
         self.limit = limit
         # self.mutex = threading.Lock()
         self.archive = archive
 
     def __iter__(self):
         yield from iterate_statuses(
-            self.archive.iter_origins(),
-            self.archive,
-            self.limit
+            self.archive.iter_origins(), self.archive, self.limit
         )
 
 
-def iterate_statuses(origins, archive: ArchiveInterface, limit: Optional[int]=None):
+def iterate_statuses(origins, archive: ArchiveInterface, limit: Optional[int] = None):
     idx = 0
     for origin in origins:
         for visit in archive.iter_origin_visits(origin.url):
             for status in archive.iter_origin_visit_statuses(origin.url, visit.visit):
                 # TODO: may filter only those whose status is 'full'??
                 targets = []
                 releases = []
 
                 snapshot = archive.snapshot_get_all_branches(status.snapshot)
                 if snapshot is not None:
                     for branch in snapshot.branches:
                         if snapshot.branches[branch].target_type == TargetType.REVISION:
                             targets.append(snapshot.branches[branch].target)
 
-                        elif snapshot.branches[branch].target_type == TargetType.RELEASE:
+                        elif (
+                            snapshot.branches[branch].target_type == TargetType.RELEASE
+                        ):
                             releases.append(snapshot.branches[branch].target)
 
                 # This is done to keep the query in release_get small, hence avoiding a timeout.
                 limit = 100
                 for i in range(0, len(releases), limit):
-                    for release in archive.release_get(releases[i:i+limit]):
+                    for release in archive.release_get(releases[i : i + limit]):
                         if release is not None:
                             if release.target_type == ObjectType.REVISION:
                                 targets.append(release.target)
 
                 # This is done to keep the query in revision_get small, hence avoiding a timeout.
                 revisions = []
                 limit = 100
                 for i in range(0, len(targets), limit):
-                    for revision in archive.revision_get(targets[i:i+limit]):
+                    for revision in archive.revision_get(targets[i : i + limit]):
                         if revision is not None:
-                            parents = list(map(lambda id: RevisionEntry(archive, id), revision.parents))
-                            revisions.append(RevisionEntry(archive, revision.id, parents=parents))
+                            parents = list(
+                                map(
+                                    lambda id: RevisionEntry(archive, id),
+                                    revision.parents,
+                                )
+                            )
+                            revisions.append(
+                                RevisionEntry(archive, revision.id, parents=parents)
+                            )
 
                 yield OriginEntry(status.origin, revisions)
 
                 idx = idx + 1
-                if idx == limit: return
+                if idx == limit:
+                    return
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
index 25b1f18..c4f6389 100644
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -1,65 +1,65 @@
 import psycopg2
 
 from ..archive import ArchiveInterface
 
 # from functools import lru_cache
 from methodtools import lru_cache
 from typing import List
 
 
 class ArchivePostgreSQL(ArchiveInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         self.conn = conn
         self.cursor = conn.cursor()
 
     @lru_cache(maxsize=1000000)
     def directory_ls(self, id: bytes):
-        self.cursor.execute('''WITH
+        self.cursor.execute(
+            """WITH
             dir  AS (SELECT id AS dir_id, dir_entries, file_entries, rev_entries
                         FROM directory WHERE id=%s),
             ls_d AS (SELECT dir_id, unnest(dir_entries) AS entry_id from dir),
             ls_f AS (SELECT dir_id, unnest(file_entries) AS entry_id from dir),
             ls_r AS (SELECT dir_id, unnest(rev_entries) AS entry_id from dir)
             (SELECT 'dir'::directory_entry_type AS type, e.target, e.name, NULL::sha1_git
                 FROM ls_d
                 LEFT JOIN directory_entry_dir e ON ls_d.entry_id=e.id)
             UNION
             (WITH known_contents AS
             (SELECT 'file'::directory_entry_type AS type, e.target, e.name, c.sha1_git
                 FROM ls_f
                 LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id
                 INNER JOIN content c ON e.target=c.sha1_git)
             SELECT * FROM known_contents
             UNION
             (SELECT 'file'::directory_entry_type AS type, e.target, e.name, c.sha1_git
                 FROM ls_f
                 LEFT JOIN directory_entry_file e ON ls_f.entry_id=e.id
                 LEFT JOIN skipped_content c ON e.target=c.sha1_git
                 WHERE NOT EXISTS (SELECT 1 FROM known_contents WHERE known_contents.sha1_git=e.target)))
             ORDER BY name
-        ''', (id,))
-        return [{'type': row[0], 'target': row[1], 'name': row[2]} for row in self.cursor.fetchall()]
-
+        """,
+            (id,),
+        )
+        return [
+            {"type": row[0], "target": row[1], "name": row[2]}
+            for row in self.cursor.fetchall()
+        ]
 
     def iter_origins(self):
         raise NotImplementedError
 
-
     def iter_origin_visits(self, origin: str):
         raise NotImplementedError
 
-
     def iter_origin_visit_statuses(self, origin: str, visit: int):
         raise NotImplementedError
 
-
     def release_get(self, ids: List[bytes]):
         raise NotImplementedError
 
-
     def revision_get(self, ids: List[bytes]):
         raise NotImplementedError
 
-
     def snapshot_get_all_branches(self, snapshot: bytes):
         raise NotImplementedError
diff --git a/swh/provenance/postgresql/db_utils.py b/swh/provenance/postgresql/db_utils.py
index 003f52b..483f6b1 100644
--- a/swh/provenance/postgresql/db_utils.py
+++ b/swh/provenance/postgresql/db_utils.py
@@ -1,62 +1,62 @@
 import io
 import psycopg2
 
 from configparser import ConfigParser
 from pathlib import PosixPath
 
 
 def config(filename: PosixPath, section: str):
     # create a parser
     parser = ConfigParser()
     # read config file
     parser.read(filename)
 
     # get section, default to postgresql
     db = {}
     if parser.has_section(section):
         params = parser.items(section)
         for param in params:
             db[param[0]] = param[1]
     else:
-        raise Exception(f'Section {section} not found in the {filename} file')
+        raise Exception(f"Section {section} not found in the {filename} file")
 
     return db
 
 
 def typecast_bytea(value, cur):
     if value is not None:
         data = psycopg2.BINARY(value, cur)
         return data.tobytes()
 
 
 def adapt_conn(conn):
     """Makes psycopg2 use 'bytes' to decode bytea instead of
     'memoryview', for this connection."""
     t_bytes = psycopg2.extensions.new_type((17,), "bytea", typecast_bytea)
     psycopg2.extensions.register_type(t_bytes, conn)
 
     t_bytes_array = psycopg2.extensions.new_array_type((1001,), "bytea[]", t_bytes)
     psycopg2.extensions.register_type(t_bytes_array, conn)
 
 
 def connect(params: dict):
     """ Connect to the PostgreSQL database server """
     conn = None
 
     try:
         # connect to the PostgreSQL server
         conn = psycopg2.connect(**params)
         adapt_conn(conn)
 
     except (Exception, psycopg2.DatabaseError) as error:
         print(error)
 
     return conn
 
 
 def execute_sql(conn: psycopg2.extensions.connection, filename: PosixPath):
     with io.open(filename) as file:
         cur = conn.cursor()
         cur.execute(file.read())
         cur.close()
         conn.commit()
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
index 525821b..379152c 100644
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -1,435 +1,425 @@
 import itertools
 import logging
 import os
 import psycopg2
 import psycopg2.extras
 
 from ..model import DirectoryEntry, FileEntry
 from ..origin import OriginEntry
 from .db_utils import connect, execute_sql
 from ..provenance import ProvenanceInterface
 from ..revision import RevisionEntry
 
 from datetime import datetime
 from pathlib import PosixPath
 from typing import Any, Dict, List
 
 from swh.model.hashutil import hash_to_hex
 
 
 def normalize(path: PosixPath) -> PosixPath:
     spath = str(path)
-    if spath.startswith('./'):
+    if spath.startswith("./"):
         return PosixPath(spath[2:])
     return path
 
 
-def create_database(
-    conn: psycopg2.extensions.connection,
-    conninfo: dict,
-    name: str
-):
+def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
     conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
 
     # Normalize dbname to avoid issues when reconnecting below
     name = name.casefold()
 
     # Create new database dropping previous one if exists
     cursor = conn.cursor()
-    cursor.execute(f'''DROP DATABASE IF EXISTS {name}''')
-    cursor.execute(f'''CREATE DATABASE {name}''')
+    cursor.execute(f"""DROP DATABASE IF EXISTS {name}""")
+    cursor.execute(f"""CREATE DATABASE {name}""")
     conn.close()
 
     # Reconnect to server selecting newly created database to add tables
-    conninfo['dbname'] = name
+    conninfo["dbname"] = name
     conn = connect(conninfo)
 
     sqldir = os.path.dirname(os.path.realpath(__file__))
-    execute_sql(conn, PosixPath(os.path.join(sqldir, 'provenance.sql')))
+    execute_sql(conn, PosixPath(os.path.join(sqldir, "provenance.sql")))
 
 
 ################################################################################
 ################################################################################
 ################################################################################
 
+
 class ProvenancePostgreSQL(ProvenanceInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         # TODO: consider adding a mutex for thread safety
         conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
         self.conn = conn
         self.cursor = self.conn.cursor()
         self.insert_cache: Dict[str, Any] = {}
         self.select_cache: Dict[str, Any] = {}
         self.clear_caches()
 
-
     def clear_caches(self):
         self.insert_cache = {
             "content": dict(),
             "content_early_in_rev": list(),
             "content_in_dir": list(),
             "directory": dict(),
             "directory_in_rev": list(),
             "revision": dict(),
             "revision_before_rev": list(),
-            "revision_in_org": list()
-        }
-        self.select_cache = {
-            "content": dict(),
-            "directory": dict(),
-            "revision": dict()
+            "revision_in_org": list(),
         }
-
+        self.select_cache = {"content": dict(), "directory": dict(), "revision": dict()}
 
     def commit(self):
         result = False
         try:
             self.insert_all()
             # self.conn.commit()
             result = True
 
         # except psycopg2.DatabaseError:
         #     # Database error occurred, rollback all changes
         #     self.conn.rollback()
         #     # TODO: maybe serialize and auto-merge transations.
         #     # The only conflicts are on:
         #     #   - content: we keep the earliest date
         #     #   - directory: we keep the earliest date
         #     #   - content_in_dir: there should be just duplicated entries.
 
         except Exception as error:
             # Unexpected error occurred, rollback all changes and log message
-            logging.warning(f'Unexpected error: {error}')
+            logging.warning(f"Unexpected error: {error}")
             # self.conn.rollback()
 
         finally:
             self.clear_caches()
 
         return result
 
-
     def content_add_to_directory(
-        self,
-        directory: DirectoryEntry,
-        blob: FileEntry,
-        prefix: PosixPath
+        self, directory: DirectoryEntry, blob: FileEntry, prefix: PosixPath
     ):
         # logging.debug(f'NEW occurrence of content {hash_to_hex(blob.id)} in directory {hash_to_hex(directory.id)} (path: {prefix / blob.name})')
         # self.cursor.execute('''INSERT INTO content_in_dir VALUES (%s,%s,%s)''',
         #                        (blob.id, directory.id, bytes(normalize(prefix / blob.name))))
-        self.insert_cache['content_in_dir'].append(
+        self.insert_cache["content_in_dir"].append(
             (blob.id, directory.id, bytes(normalize(prefix / blob.name)))
         )
 
-
     def content_add_to_revision(
-        self,
-        revision: RevisionEntry,
-        blob: FileEntry,
-        prefix: PosixPath
+        self, revision: RevisionEntry, blob: FileEntry, prefix: PosixPath
     ):
         # logging.debug(f'EARLY occurrence of blob {hash_to_hex(blob.id)} in revision {hash_to_hex(revision.id)} (path: {prefix / blob.name})')
         # self.cursor.execute('''INSERT INTO content_early_in_rev VALUES (%s,%s,%s)''',
         #                        (blob.id, revision.id, bytes(normalize(prefix / blob.name))))
-        self.insert_cache['content_early_in_rev'].append(
+        self.insert_cache["content_early_in_rev"].append(
             (blob.id, revision.id, bytes(normalize(prefix / blob.name)))
         )
 
-
     def content_find_first(self, blobid: str):
-        logging.info(f'Retrieving first occurrence of content {hash_to_hex(blobid)}')
-        self.cursor.execute('''SELECT blob, rev, date, path
+        logging.info(f"Retrieving first occurrence of content {hash_to_hex(blobid)}")
+        self.cursor.execute(
+            """SELECT blob, rev, date, path
                           FROM content_early_in_rev JOIN revision ON revision.id=content_early_in_rev.rev
-                          WHERE content_early_in_rev.blob=%s ORDER BY date, rev, path ASC LIMIT 1''', (blobid,))
+                          WHERE content_early_in_rev.blob=%s ORDER BY date, rev, path ASC LIMIT 1""",
+            (blobid,),
+        )
         return self.cursor.fetchone()
 
-
     def content_find_all(self, blobid: str):
-        logging.info(f'Retrieving all occurrences of content {hash_to_hex(blobid)}')
-        self.cursor.execute('''(SELECT blob, rev, date, path
+        logging.info(f"Retrieving all occurrences of content {hash_to_hex(blobid)}")
+        self.cursor.execute(
+            """(SELECT blob, rev, date, path
                           FROM content_early_in_rev JOIN revision ON revision.id=content_early_in_rev.rev
                           WHERE content_early_in_rev.blob=%s)
                           UNION
                           (SELECT content_in_rev.blob, content_in_rev.rev, revision.date, content_in_rev.path
                           FROM (SELECT content_in_dir.blob, directory_in_rev.rev,
                                     CASE directory_in_rev.path
                                         WHEN '.' THEN content_in_dir.path
                                         ELSE (directory_in_rev.path || '/' || content_in_dir.path)::unix_path
                                     END AS path
                                 FROM content_in_dir
                                 JOIN directory_in_rev ON content_in_dir.dir=directory_in_rev.dir
                                 WHERE content_in_dir.blob=%s) AS content_in_rev
                           JOIN revision ON revision.id=content_in_rev.rev)
-                          ORDER BY date, rev, path''', (blobid, blobid))
-                          # POSTGRESQL EXPLAIN
+                          ORDER BY date, rev, path""",
+            (blobid, blobid),
+        )
+        # POSTGRESQL EXPLAIN
         yield from self.cursor.fetchall()
 
-
     def content_get_early_date(self, blob: FileEntry) -> datetime:
-        logging.debug(f'Getting content {hash_to_hex(blob.id)} early date')
+        logging.debug(f"Getting content {hash_to_hex(blob.id)} early date")
         # First check if the date is being modified by current transection.
-        date = self.insert_cache['content'].get(blob.id, None)
+        date = self.insert_cache["content"].get(blob.id, None)
         if date is None:
             # If not, check whether it's been query before
-            date = self.select_cache['content'].get(blob.id, None)
+            date = self.select_cache["content"].get(blob.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
-                self.cursor.execute('''SELECT date FROM content WHERE id=%s''',
-                                       (blob.id,))
+                self.cursor.execute(
+                    """SELECT date FROM content WHERE id=%s""", (blob.id,)
+                )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
-                self.select_cache['content'][blob.id] = date
+                self.select_cache["content"][blob.id] = date
         return date
 
-
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for blob in blobs:
             # First check if the date is being modified by current transection.
-            date = self.insert_cache['content'].get(blob.id, None)
+            date = self.insert_cache["content"].get(blob.id, None)
             if date is not None:
                 dates[blob.id] = date
             else:
                 # If not, check whether it's been query before
-                date = self.select_cache['content'].get(blob.id, None)
+                date = self.select_cache["content"].get(blob.id, None)
                 if date is not None:
                     dates[blob.id] = date
                 else:
                     pending.append(blob.id)
         if pending:
             # Otherwise, query the database and cache the values
-            values = ', '.join(itertools.repeat('%s', len(pending)))
-            self.cursor.execute(f'''SELECT id, date FROM content WHERE id IN ({values})''',
-                                    tuple(pending))
+            values = ", ".join(itertools.repeat("%s", len(pending)))
+            self.cursor.execute(
+                f"""SELECT id, date FROM content WHERE id IN ({values})""",
+                tuple(pending),
+            )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
-                self.select_cache['content'][row[0]] = row[1]
+                self.select_cache["content"][row[0]] = row[1]
         return dates
 
-
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         # logging.debug(f'EARLY occurrence of blob {hash_to_hex(blob.id)} (timestamp: {date})')
         # self.cursor.execute('''INSERT INTO content VALUES (%s,%s)
         #                        ON CONFLICT (id) DO UPDATE SET date=%s''',
         #                        (blob.id, date, date))
-        self.insert_cache['content'][blob.id] = date
-
+        self.insert_cache["content"][blob.id] = date
 
     def directory_add_to_revision(
-        self,
-        revision: RevisionEntry,
-        directory: DirectoryEntry,
-        path: PosixPath
+        self, revision: RevisionEntry, directory: DirectoryEntry, path: PosixPath
     ):
         # logging.debug(f'NEW occurrence of directory {hash_to_hex(directory.id)} on the ISOCHRONE FRONTIER of revision {hash_to_hex(revision.id)} (path: {path})')
         # self.cursor.execute('''INSERT INTO directory_in_rev VALUES (%s,%s,%s)''',
         #                        (directory.id, revision.id, bytes(normalize(path))))
-        self.insert_cache['directory_in_rev'].append(
+        self.insert_cache["directory_in_rev"].append(
             (directory.id, revision.id, bytes(normalize(path)))
         )
 
-
-    def directory_get_date_in_isochrone_frontier(self, directory: DirectoryEntry) -> datetime:
+    def directory_get_date_in_isochrone_frontier(
+        self, directory: DirectoryEntry
+    ) -> datetime:
         # logging.debug(f'Getting directory {hash_to_hex(directory.id)} early date')
         # First check if the date is being modified by current transection.
-        date = self.insert_cache['directory'].get(directory.id, None)
+        date = self.insert_cache["directory"].get(directory.id, None)
         if date is None:
             # If not, check whether it's been query before
-            date = self.select_cache['directory'].get(directory.id, None)
+            date = self.select_cache["directory"].get(directory.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
-                self.cursor.execute('''SELECT date FROM directory WHERE id=%s''',
-                                       (directory.id,))
+                self.cursor.execute(
+                    """SELECT date FROM directory WHERE id=%s""", (directory.id,)
+                )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
-                self.select_cache['directory'][directory.id] = date
+                self.select_cache["directory"][directory.id] = date
         return date
 
-
-    def directory_get_early_dates(self, dirs: List[DirectoryEntry]) -> Dict[bytes, datetime]:
+    def directory_get_early_dates(
+        self, dirs: List[DirectoryEntry]
+    ) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for dir in dirs:
             # First check if the date is being modified by current transection.
-            date = self.insert_cache['directory'].get(dir.id, None)
+            date = self.insert_cache["directory"].get(dir.id, None)
             if date is not None:
                 dates[dir.id] = date
             else:
                 # If not, check whether it's been query before
-                date = self.select_cache['directory'].get(dir.id, None)
+                date = self.select_cache["directory"].get(dir.id, None)
                 if date is not None:
                     dates[dir.id] = date
                 else:
                     pending.append(dir.id)
         if pending:
             # Otherwise, query the database and cache the values
-            values = ', '.join(itertools.repeat('%s', len(pending)))
-            self.cursor.execute(f'''SELECT id, date FROM directory WHERE id IN ({values})''',
-                                    tuple(pending))
+            values = ", ".join(itertools.repeat("%s", len(pending)))
+            self.cursor.execute(
+                f"""SELECT id, date FROM directory WHERE id IN ({values})""",
+                tuple(pending),
+            )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
-                self.select_cache['directory'][row[0]] = row[1]
+                self.select_cache["directory"][row[0]] = row[1]
         return dates
 
-
-    def directory_set_date_in_isochrone_frontier(self, directory: DirectoryEntry, date: datetime):
+    def directory_set_date_in_isochrone_frontier(
+        self, directory: DirectoryEntry, date: datetime
+    ):
         # logging.debug(f'EARLY occurrence of directory {hash_to_hex(directory.id)} on the ISOCHRONE FRONTIER (timestamp: {date})')
         # self.cursor.execute('''INSERT INTO directory VALUES (%s,%s)
         #                        ON CONFLICT (id) DO UPDATE SET date=%s''',
         #                        (directory.id, date, date))
-        self.insert_cache['directory'][directory.id] = date
-
+        self.insert_cache["directory"][directory.id] = date
 
     def insert_all(self):
         # Performe insertions with cached information
-        if self.insert_cache['content']:
+        if self.insert_cache["content"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO content(id, date) VALUES %s
+                """INSERT INTO content(id, date) VALUES %s
                    ON CONFLICT (id) DO
-                       UPDATE SET date=LEAST(EXCLUDED.date,content.date)''',
-                self.insert_cache['content'].items()
+                       UPDATE SET date=LEAST(EXCLUDED.date,content.date)""",
+                self.insert_cache["content"].items(),
             )
 
-        if self.insert_cache['content_early_in_rev']:
+        if self.insert_cache["content_early_in_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO content_early_in_rev VALUES %s
-                   ON CONFLICT DO NOTHING''',
-                self.insert_cache['content_early_in_rev']
+                """INSERT INTO content_early_in_rev VALUES %s
+                   ON CONFLICT DO NOTHING""",
+                self.insert_cache["content_early_in_rev"],
             )
 
-        if self.insert_cache['content_in_dir']:
+        if self.insert_cache["content_in_dir"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO content_in_dir VALUES %s
-                   ON CONFLICT DO NOTHING''',
-                self.insert_cache['content_in_dir']
+                """INSERT INTO content_in_dir VALUES %s
+                   ON CONFLICT DO NOTHING""",
+                self.insert_cache["content_in_dir"],
             )
 
-        if self.insert_cache['directory']:
+        if self.insert_cache["directory"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO directory(id, date) VALUES %s
+                """INSERT INTO directory(id, date) VALUES %s
                    ON CONFLICT (id) DO
-                       UPDATE SET date=LEAST(EXCLUDED.date,directory.date)''',
-                self.insert_cache['directory'].items()
+                       UPDATE SET date=LEAST(EXCLUDED.date,directory.date)""",
+                self.insert_cache["directory"].items(),
             )
 
-        if self.insert_cache['directory_in_rev']:
+        if self.insert_cache["directory_in_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO directory_in_rev VALUES %s
-                   ON CONFLICT DO NOTHING''',
-                self.insert_cache['directory_in_rev']
+                """INSERT INTO directory_in_rev VALUES %s
+                   ON CONFLICT DO NOTHING""",
+                self.insert_cache["directory_in_rev"],
             )
 
-        if self.insert_cache['revision']:
+        if self.insert_cache["revision"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO revision(id, date) VALUES %s
+                """INSERT INTO revision(id, date) VALUES %s
                    ON CONFLICT (id) DO
-                       UPDATE SET date=LEAST(EXCLUDED.date,revision.date)''',
-                self.insert_cache['revision'].items()
+                       UPDATE SET date=LEAST(EXCLUDED.date,revision.date)""",
+                self.insert_cache["revision"].items(),
             )
 
-        if self.insert_cache['revision_before_rev']:
+        if self.insert_cache["revision_before_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO revision_before_rev VALUES %s
-                   ON CONFLICT DO NOTHING''',
-                self.insert_cache['revision_before_rev']
+                """INSERT INTO revision_before_rev VALUES %s
+                   ON CONFLICT DO NOTHING""",
+                self.insert_cache["revision_before_rev"],
             )
 
-        if self.insert_cache['revision_in_org']:
+        if self.insert_cache["revision_in_org"]:
             psycopg2.extras.execute_values(
                 self.cursor,
-                '''INSERT INTO revision_in_org VALUES %s
-                   ON CONFLICT DO NOTHING''',
-                self.insert_cache['revision_in_org']
+                """INSERT INTO revision_in_org VALUES %s
+                   ON CONFLICT DO NOTHING""",
+                self.insert_cache["revision_in_org"],
             )
 
-
     def origin_get_id(self, origin: OriginEntry) -> int:
         if origin.id is None:
             # Check if current origin is already known and retrieve its internal id.
-            self.cursor.execute('''SELECT id FROM origin WHERE url=%s''', (origin.url,))
+            self.cursor.execute("""SELECT id FROM origin WHERE url=%s""", (origin.url,))
             row = self.cursor.fetchone()
 
             if row is None:
                 # If the origin is seen for the first time, current revision is
                 # the prefered one.
-                self.cursor.execute('''INSERT INTO origin (url) VALUES (%s) RETURNING id''',
-                                       (origin.url,))
+                self.cursor.execute(
+                    """INSERT INTO origin (url) VALUES (%s) RETURNING id""",
+                    (origin.url,),
+                )
                 return self.cursor.fetchone()[0]
             else:
                 return row[0]
         else:
             return origin.id
 
-
     def revision_add(self, revision: RevisionEntry):
         # Add current revision to the compact DB
-        self.insert_cache['revision'][revision.id] = revision.date
-
+        self.insert_cache["revision"][revision.id] = revision.date
 
-    def revision_add_before_revision(self, relative: RevisionEntry, revision: RevisionEntry):
+    def revision_add_before_revision(
+        self, relative: RevisionEntry, revision: RevisionEntry
+    ):
         # self.cursor.execute('''INSERT INTO revision_before_rev VALUES (%s,%s)
         #                        ON CONFLICT DO NOTHING''',
         #                        (revision.id, relative.id))
-        self.insert_cache['revision_before_rev'].append((revision.id, relative.id))
-
+        self.insert_cache["revision_before_rev"].append((revision.id, relative.id))
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         # self.cursor.execute('''INSERT INTO revision_in_org VALUES (%s,%s)
         #                        ON CONFLICT DO NOTHING''',
         #                        (revision.id, origin.id))
-        self.insert_cache['revision_in_org'].append((revision.id, origin.id))
-
+        self.insert_cache["revision_in_org"].append((revision.id, origin.id))
 
     def revision_get_early_date(self, revision: RevisionEntry) -> datetime:
         # logging.debug(f'Getting revision {hash_to_hex(revision.id)} early date')
         # First check if the date is being modified by current transection.
-        date = self.insert_cache['revision'].get(revision.id, None)
+        date = self.insert_cache["revision"].get(revision.id, None)
         if date is None:
             # If not, check whether it's been query before
-            date = self.select_cache['revision'].get(revision.id, None)
+            date = self.select_cache["revision"].get(revision.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
-                self.cursor.execute('''SELECT date FROM revision WHERE id=%s''',
-                                       (revision.id,))
+                self.cursor.execute(
+                    """SELECT date FROM revision WHERE id=%s""", (revision.id,)
+                )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
-                self.select_cache['revision'][revision.id] = date
+                self.select_cache["revision"][revision.id] = date
         return date
 
-
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         # TODO: adapt this method to consider cached values
-        self.cursor.execute('''SELECT COALESCE(org,0) FROM revision WHERE id=%s''',
-                               (revision.id,))
+        self.cursor.execute(
+            """SELECT COALESCE(org,0) FROM revision WHERE id=%s""", (revision.id,)
+        )
         row = self.cursor.fetchone()
         # None means revision is not in database
         # 0 means revision has no prefered origin
         return row[0] if row is not None and row[0] != 0 else None
 
-
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
-        self.cursor.execute('''SELECT 1 FROM revision_before_rev WHERE prev=%s''',
-                               (revision.id,))
+        self.cursor.execute(
+            """SELECT 1 FROM revision_before_rev WHERE prev=%s""", (revision.id,)
+        )
         return self.cursor.fetchone() is not None
 
-
-    def revision_set_prefered_origin(self, origin: OriginEntry, revision: RevisionEntry):
+    def revision_set_prefered_origin(
+        self, origin: OriginEntry, revision: RevisionEntry
+    ):
         # TODO: adapt this method to consider cached values
-        self.cursor.execute('''UPDATE revision SET org=%s WHERE id=%s''',
-                               (origin.id, revision.id))
-
+        self.cursor.execute(
+            """UPDATE revision SET org=%s WHERE id=%s""", (origin.id, revision.id)
+        )
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
-        self.cursor.execute('''SELECT 1 FROM revision_in_org WHERE rev=%s''',
-                               (revision.id,))
+        self.cursor.execute(
+            """SELECT 1 FROM revision_in_org WHERE rev=%s""", (revision.id,)
+        )
         return self.cursor.fetchone() is not None
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
index dd4e72e..291d010 100644
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -1,346 +1,364 @@
 import logging
 
 from .archive import ArchiveInterface
 from .model import DirectoryEntry, FileEntry
 from .origin import OriginEntry
 from .revision import RevisionEntry
 
 from datetime import datetime
 from pathlib import PosixPath
 from typing import Dict, List, Optional, Tuple
 
 from swh.model.hashutil import hash_to_hex
 
 
 class ProvenanceInterface:
     def __init__(self, **kwargs):
         raise NotImplementedError
 
-
     def commit(self):
         raise NotImplementedError
 
-
-    def content_add_to_directory(self, directory: DirectoryEntry, blob: FileEntry, prefix: PosixPath):
+    def content_add_to_directory(
+        self, directory: DirectoryEntry, blob: FileEntry, prefix: PosixPath
+    ):
         raise NotImplementedError
 
-
-    def content_add_to_revision(self, revision: RevisionEntry, blob: FileEntry, prefix: PosixPath):
+    def content_add_to_revision(
+        self, revision: RevisionEntry, blob: FileEntry, prefix: PosixPath
+    ):
         raise NotImplementedError
 
-
     def content_find_first(self, blobid: str):
         raise NotImplementedError
 
-
     def content_find_all(self, blobid: str):
         raise NotImplementedError
 
-
     def content_get_early_date(self, blob: FileEntry) -> datetime:
         raise NotImplementedError
 
-
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
-
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         raise NotImplementedError
 
-
-    def directory_add_to_revision(self, revision: RevisionEntry, directory: DirectoryEntry, path: PosixPath):
+    def directory_add_to_revision(
+        self, revision: RevisionEntry, directory: DirectoryEntry, path: PosixPath
+    ):
         raise NotImplementedError
 
-
-    def directory_get_date_in_isochrone_frontier(self, directory: DirectoryEntry) -> datetime:
+    def directory_get_date_in_isochrone_frontier(
+        self, directory: DirectoryEntry
+    ) -> datetime:
         raise NotImplementedError
 
-
-    def directory_get_early_dates(self, dirs: List[DirectoryEntry]) -> Dict[bytes, datetime]:
+    def directory_get_early_dates(
+        self, dirs: List[DirectoryEntry]
+    ) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
-
-    def directory_set_date_in_isochrone_frontier(self, directory: DirectoryEntry, date: datetime):
+    def directory_set_date_in_isochrone_frontier(
+        self, directory: DirectoryEntry, date: datetime
+    ):
         raise NotImplementedError
 
-
     def origin_get_id(self, origin: OriginEntry) -> int:
         raise NotImplementedError
 
-
     def revision_add(self, revision: RevisionEntry):
         raise NotImplementedError
 
-
-    def revision_add_before_revision(self, relative: RevisionEntry, revision: RevisionEntry):
+    def revision_add_before_revision(
+        self, relative: RevisionEntry, revision: RevisionEntry
+    ):
         raise NotImplementedError
 
-
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         raise NotImplementedError
 
-
     def revision_get_early_date(self, revision: RevisionEntry) -> datetime:
         raise NotImplementedError
 
-
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         raise NotImplementedError
 
-
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
-
-    def revision_set_prefered_origin(self, origin: OriginEntry, revision: RevisionEntry):
+    def revision_set_prefered_origin(
+        self, origin: OriginEntry, revision: RevisionEntry
+    ):
         raise NotImplementedError
 
-
     def revision_visited(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
 
 def directory_process_content(
     provenance: ProvenanceInterface,
     directory: DirectoryEntry,
     relative: DirectoryEntry,
-    prefix: PosixPath
+    prefix: PosixPath,
 ):
     stack = [(directory, prefix)]
 
     while stack:
         current, path = stack.pop()
 
         for child in iter(current):
             if isinstance(child, FileEntry):
                 # Add content to the relative directory with the computed path.
                 provenance.content_add_to_directory(relative, child, path)
             else:
                 # Recursively walk the child directory.
                 stack.append((child, path / child.name))
 
 
 def directory_update_content(
     stack: List[Tuple[DirectoryEntry, PosixPath]],
     provenance: ProvenanceInterface,
     revision: RevisionEntry,
     directory: DirectoryEntry,
     path: PosixPath,
-    subdirs: Optional[List[DirectoryEntry]]=None,
-    blobs: Optional[List[FileEntry]]=None,
-    blobdates: Optional[Dict[bytes, datetime]]=None
+    subdirs: Optional[List[DirectoryEntry]] = None,
+    blobs: Optional[List[FileEntry]] = None,
+    blobdates: Optional[Dict[bytes, datetime]] = None,
 ):
     assert revision.date is not None
 
     # Init optional parameters if not provided.
     if subdirs is None:
-        subdirs = [child for child in iter(directory) if isinstance(child, DirectoryEntry)]
+        subdirs = [
+            child for child in iter(directory) if isinstance(child, DirectoryEntry)
+        ]
 
     if blobs is None:
         blobs = [child for child in iter(directory) if isinstance(child, FileEntry)]
 
     if blobdates is None:
         blobdates = provenance.content_get_early_dates(blobs)
 
     # Iterate over blobs updating their date if necessary.
     for blob in blobs:
         date = blobdates.get(blob.id, None)
         if date is None or revision.date < date:
             provenance.content_set_early_date(blob, revision.date)
         provenance.content_add_to_revision(revision, blob, path)
 
     # Push all subdirectories with its corresponding path to analyze them
     # recursively.
     for subdir in subdirs:
         stack.append((subdir, path / subdir.name))
 
 
-def origin_add(
-    provenance: ProvenanceInterface,
-    origin: OriginEntry
-):
+def origin_add(provenance: ProvenanceInterface, origin: OriginEntry):
     # TODO: refactor to iterate over origin visit statuses and commit only once
     # per status.
     origin.id = provenance.origin_get_id(origin)
 
     for revision in origin.revisions:
         # logging.info(f'Processing revision {hash_to_hex(revision.id)} from origin {origin.url}')
         origin_add_revision(provenance, origin, revision)
 
         # Commit after each revision
-        provenance.commit()      # TODO: verify this!
+        provenance.commit()  # TODO: verify this!
 
 
 def origin_add_revision(
-    provenance: ProvenanceInterface,
-    origin: OriginEntry,
-    revision: RevisionEntry
+    provenance: ProvenanceInterface, origin: OriginEntry, revision: RevisionEntry
 ):
     stack: List[Tuple[Optional[RevisionEntry], RevisionEntry]] = [(None, revision)]
 
     while stack:
         relative, current = stack.pop()
 
         # Check if current revision has no prefered origin and update if necessary.
         prefered = provenance.revision_get_prefered_origin(current)
         # logging.debug(f'Prefered origin for revision {hash_to_hex(rev.id)}: {prefered}')
 
         if prefered is None:
             provenance.revision_set_prefered_origin(origin, current)
         ########################################################################
 
         if relative is None:
             # This revision is pointed directly by the origin.
             visited = provenance.revision_visited(current)
-            logging.debug(f'Revision {hash_to_hex(current.id)} in origin {origin.id}: {visited}')
+            logging.debug(
+                f"Revision {hash_to_hex(current.id)} in origin {origin.id}: {visited}"
+            )
 
-            logging.debug(f'Adding revision {hash_to_hex(current.id)} to origin {origin.id}')
+            logging.debug(
+                f"Adding revision {hash_to_hex(current.id)} to origin {origin.id}"
+            )
             provenance.revision_add_to_origin(origin, current)
 
             if not visited:
                 stack.append((current, current))
 
         else:
             # This revision is a parent of another one in the history of the
             # relative revision.
             for parent in iter(current):
                 visited = provenance.revision_visited(parent)
-                logging.debug(f'Parent {hash_to_hex(parent.id)} in some origin: {visited}')
+                logging.debug(
+                    f"Parent {hash_to_hex(parent.id)} in some origin: {visited}"
+                )
 
                 if not visited:
                     # The parent revision has never been seen before pointing
                     # directly to an origin.
                     known = provenance.revision_in_history(parent)
-                    logging.debug(f'Revision {hash_to_hex(parent.id)} before revision: {known}')
+                    logging.debug(
+                        f"Revision {hash_to_hex(parent.id)} before revision: {known}"
+                    )
 
                     if known:
                         # The parent revision is already known in some other
                         # revision's history. We should point it directly to
                         # the origin and (eventually) walk its history.
-                        logging.debug(f'Adding revision {hash_to_hex(parent.id)} directly to origin {origin.id}')
+                        logging.debug(
+                            f"Adding revision {hash_to_hex(parent.id)} directly to origin {origin.id}"
+                        )
                         stack.append((None, parent))
                     else:
                         # The parent revision was never seen before. We should
                         # walk its history and associate it with the same
                         # relative revision.
-                        logging.debug(f'Adding parent revision {hash_to_hex(parent.id)} to revision {hash_to_hex(relative.id)}')
+                        logging.debug(
+                            f"Adding parent revision {hash_to_hex(parent.id)} to revision {hash_to_hex(relative.id)}"
+                        )
                         provenance.revision_add_before_revision(relative, parent)
                         stack.append((relative, parent))
                 else:
                     # The parent revision already points to an origin, so its
                     # history was properly processed before. We just need to
                     # make sure it points to the current origin as well.
-                    logging.debug(f'Adding parent revision {hash_to_hex(parent.id)} to origin {origin.id}')
+                    logging.debug(
+                        f"Adding parent revision {hash_to_hex(parent.id)} to origin {origin.id}"
+                    )
                     provenance.revision_add_to_origin(origin, parent)
 
 
 def revision_add(
-    provenance: ProvenanceInterface,
-    archive: ArchiveInterface,
-    revision: RevisionEntry
+    provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry
 ):
     assert revision.date is not None
     assert revision.root is not None
 
     # Processed content starting from the revision's root directory
     date = provenance.revision_get_early_date(revision)
     if date is None or revision.date < date:
         provenance.revision_add(revision)
         revision_process_content(
-            provenance,
-            revision,
-            DirectoryEntry(archive, revision.root, PosixPath('.'))
+            provenance, revision, DirectoryEntry(archive, revision.root, PosixPath("."))
         )
 
     return provenance.commit()
 
 
 def revision_process_content(
-    provenance: ProvenanceInterface,
-    revision: RevisionEntry,
-    directory: DirectoryEntry
+    provenance: ProvenanceInterface, revision: RevisionEntry, directory: DirectoryEntry
 ):
     assert revision.date is not None
 
     stack = [(directory, directory.name)]
 
     while stack:
         # Get next directory to process and query its date right before
         # processing to be sure we get the most recently updated value.
         current, path = stack.pop()
         date = provenance.directory_get_date_in_isochrone_frontier(current)
 
         if date is None:
             # The directory has never been seen on the isochrone graph of a
             # revision. Its children should be checked.
             blobs = [child for child in iter(current) if isinstance(child, FileEntry)]
-            dirs = [child for child in iter(current) if isinstance(child, DirectoryEntry)]
+            dirs = [
+                child for child in iter(current) if isinstance(child, DirectoryEntry)
+            ]
 
             blobdates = provenance.content_get_early_dates(blobs)
             dirdates = provenance.directory_get_early_dates(dirs)
 
             # Get the list of ids with no duplicates to ensure we have
             # available dates for all the elements. This prevents takign a
             # wrong decision when a blob occurres more than once in the same
             # directory.
             ids = list(dict.fromkeys([child.id for child in blobs + dirs]))
             if ids:
                 dates = list(blobdates.values()) + list(dirdates.values())
 
                 if len(dates) == len(ids):
                     # All child nodes of current directory are already known.
                     maxdate = max(dates) < revision.date
 
                     if maxdate < revision.date:
                         # The directory belongs to the outer isochrone frontier
                         # of the current revision, and this is the first time
                         # it appears as such.
                         # TODO: maybe the parent directory is the one that has
                         # to be added to the frontier instead!
-                        provenance.directory_set_date_in_isochrone_frontier(current, maxdate)
+                        provenance.directory_set_date_in_isochrone_frontier(
+                            current, maxdate
+                        )
                         provenance.directory_add_to_revision(revision, current, path)
                         directory_process_content(
                             provenance,
                             directory=current,
                             relative=current,
-                            prefix=PosixPath('.')
+                            prefix=PosixPath("."),
                         )
 
                     else:
                         # This revision is either in the inner frontiern or out
                         # of order. All the children from the current directory
                         # should be analyzed (and timestamps eventually
                         # updated) yet current.
                         directory_update_content(
-                            stack, provenance, revision, current, path,
-                            subdirs=dirs, blobs=blobs, blobdates=blobdates)
+                            stack,
+                            provenance,
+                            revision,
+                            current,
+                            path,
+                            subdirs=dirs,
+                            blobs=blobs,
+                            blobdates=blobdates,
+                        )
 
                 else:
                     # Al least one child node is known, ie. the directory is
                     # not on the outer isochrone frontier of the current
                     # revision. Its child nodes should be analyzed and current
                     # directory updated before them.
                     # FIXME: I believe the only different between this branche
                     # and the two 'else' cases above is this push to the stack.
                     # If so, we might refactor this to avoid so many branches.
                     # stack.append((current, path))
                     # TODO: to uncomment the above line is it necessary to
                     # postpone the adding of dictories to the isochrone
                     # frontier from the branch above (maxdate < revision.date).
                     directory_update_content(
-                        stack, provenance, revision, current, path,
-                        subdirs=dirs, blobs=blobs, blobdates=blobdates)
+                        stack,
+                        provenance,
+                        revision,
+                        current,
+                        path,
+                        subdirs=dirs,
+                        blobs=blobs,
+                        blobdates=blobdates,
+                    )
 
         elif revision.date < date:
             # The directory has already been seen on the isochrone frontier of
             # a revision, but current revision is earlier. Its children should
             # be updated.
             directory_update_content(stack, provenance, revision, current, path)
             provenance.directory_set_date_in_isochrone_frontier(current, revision.date)
 
         else:
             # The directory has already been seen on the outer isochrone
             # frontier of an earlier revision. Just add it to the current
             # revision.
             provenance.directory_add_to_revision(revision, current, path)
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
index 4701bfe..2369282 100644
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -1,168 +1,173 @@
 import logging
 import threading
 
 from .archive import ArchiveInterface
 
 from datetime import datetime
 from typing import Optional
 
 from swh.model.hashutil import hash_to_bytes, hash_to_hex
 
 
 class RevisionEntry:
     def __init__(
         self,
         archive: ArchiveInterface,
         id: bytes,
-        date: Optional[datetime]=None,
-        root: Optional[bytes]=None,
-        parents: Optional[list]=None
+        date: Optional[datetime] = None,
+        root: Optional[bytes] = None,
+        parents: Optional[list] = None,
     ):
         self.archive = archive
         self.id = id
         self.date = date
         self.parents = parents
         self.root = root
 
     def __iter__(self):
         if self.parents is None:
             self.parents = []
             for parent in self.archive.revision_get([self.id]):
                 if parent is not None:
                     self.parents.append(
                         RevisionEntry(
                             self.archive,
                             parent.id,
-                            parents=[RevisionEntry(self.archive, id) for id in parent.parents]
+                            parents=[
+                                RevisionEntry(self.archive, id) for id in parent.parents
+                            ],
                         )
                     )
 
         return iter(self.parents)
 
 
 ################################################################################
 ################################################################################
 
+
 class RevisionIterator:
     """Iterator interface."""
 
     def __iter__(self):
         pass
 
     def __next__(self):
         pass
 
 
 class FileRevisionIterator(RevisionIterator):
     """Iterator over revisions present in the given CSV file."""
 
-    def __init__(self, filename: str, archive: ArchiveInterface, limit: Optional[int]=None):
+    def __init__(
+        self, filename: str, archive: ArchiveInterface, limit: Optional[int] = None
+    ):
         self.file = open(filename)
         self.idx = 0
         self.limit = limit
         self.mutex = threading.Lock()
         self.archive = archive
 
     def next(self):
         self.mutex.acquire()
         line = self.file.readline().strip()
         if line and (self.limit is None or self.idx < self.limit):
             self.idx = self.idx + 1
-            id, date, root = line.strip().split(',')
+            id, date, root = line.strip().split(",")
             self.mutex.release()
 
             return RevisionEntry(
                 self.archive,
                 hash_to_bytes(id),
                 date=datetime.fromisoformat(date),
-                root=hash_to_bytes(root)
+                root=hash_to_bytes(root),
             )
         else:
             self.mutex.release()
             return None
 
 
 # class ArchiveRevisionIterator(RevisionIterator):
 #     """Iterator over revisions present in the given database."""
 #
 #     def __init__(self, conn, limit=None, chunksize=100):
 #         self.cur = conn.cursor()
 #         self.chunksize = chunksize
 #         self.records = []
 #         if limit is None:
 #             self.cur.execute('''SELECT id, date, committer_date, directory
 #                             FROM revision''')
 #         else:
 #             self.cur.execute('''SELECT id, date, committer_date, directory
 #                             FROM revision
 #                             LIMIT %s''', (limit,))
 #         for row in self.cur.fetchmany(self.chunksize):
 #             record = self.make_record(row)
 #             if record is not None:
 #                 self.records.append(record)
 #         self.mutex = threading.Lock()
 #
 #     def __del__(self):
 #         self.cur.close()
 #
 #     def next(self):
 #         self.mutex.acquire()
 #         if not self.records:
 #             self.records.clear()
 #             for row in self.cur.fetchmany(self.chunksize):
 #                 record = self.make_record(row)
 #                 if record is not None:
 #                     self.records.append(record)
 #
 #         if self.records:
 #             revision, *self.records = self.records
 #             self.mutex.release()
 #             return revision
 #         else:
 #             self.mutex.release()
 #             return None
 #
 #     def make_record(self, row):
 #         # Only revision with author or commiter date are considered
 #         if row[1] is not None:
 #             # If the revision has author date, it takes precedence
 #             return RevisionEntry(row[0], row[1], row[3])
 #         elif row[2] is not None:
 #             # If not, we use the commiter date
 #             return RevisionEntry(row[0], row[2], row[3])
 
 
 ################################################################################
 ################################################################################
 
 # class RevisionWorker(threading.Thread):
 #     def __init__(
 #         self,
 #         id: int,
 #         conninfo: dict,
 #         archive: ArchiveInterface,
 #         revisions: RevisionIterator
 #     ):
 #         from .provenance import get_provenance
 #
 #         super().__init__()
 #         self.archive = archive
 #         self.id = id
 #         self.provenance = get_provenance(conninfo)
 #         self.revisions = revisions
 #
 #
 #     def run(self):
 #         from .provenance import revision_add
 #
 #
 #         while True:
 #             revision = self.revisions.next()
 #             if revision is None: break
 #
 #             processed = False
 #             while not processed:
 #                 logging.info(f'Thread {self.id} - Processing revision {hash_to_hex(revision.id)} (timestamp: {revision.date})')
 #                 processed = revision_add(self.provenance, self.archive, revision)
 #                 if not processed:
 #                     logging.warning(f'Thread {self.id} - Failed to process revision {hash_to_hex(revision.id)} (timestamp: {revision.date})')
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
index c01464f..6ae6289 100644
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -1,45 +1,49 @@
 import psycopg2
 
 from ..archive import ArchiveInterface
 
 # from functools import lru_cache
 from methodtools import lru_cache
 from typing import List
 from swh.storage import get_storage
 
 
 class ArchiveStorage(ArchiveInterface):
     def __init__(self, cls: str, **kwargs):
         self.storage = get_storage(cls, **kwargs)
 
     @lru_cache(maxsize=1000000)
     def directory_ls(self, id: bytes):
         # TODO: filter unused fields
         return [entry for entry in self.storage.directory_ls(id)]
 
     def iter_origins(self):
         from swh.storage.algos.origin import iter_origins
+
         yield from iter_origins(self.storage)
 
     def iter_origin_visits(self, origin: str):
         from swh.storage.algos.origin import iter_origin_visits
+
         # TODO: filter unused fields
         yield from iter_origin_visits(self.storage, origin)
 
     def iter_origin_visit_statuses(self, origin: str, visit: int):
         from swh.storage.algos.origin import iter_origin_visit_statuses
+
         # TODO: filter unused fields
         yield from iter_origin_visit_statuses(self.storage, origin, visit)
 
     def release_get(self, ids: List[bytes]):
         # TODO: filter unused fields
         yield from self.storage.release_get(ids)
 
     def revision_get(self, ids: List[bytes]):
         # TODO: filter unused fields
         yield from self.storage.revision_get(ids)
 
     def snapshot_get_all_branches(self, snapshot: bytes):
         from swh.storage.algos.snapshot import snapshot_get_all_branches
+
         # TODO: filter unused fields
         return snapshot_get_all_branches(self.storage, snapshot)