diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
index a9b8aef..dc5a9e5 100644
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -1,407 +1,402 @@
 import itertools
 import logging
 import os
 import psycopg2
 import psycopg2.extras
 
 from ..model import DirectoryEntry, FileEntry
 from ..origin import OriginEntry
 from .db_utils import connect, execute_sql
 from ..provenance import ProvenanceInterface
 from ..revision import RevisionEntry
 
 from datetime import datetime
 from pathlib import PosixPath
 from typing import Any, Dict, List
 
-from swh.model.hashutil import hash_to_hex
-
 
 def normalize(path: PosixPath) -> PosixPath:
     spath = str(path)
     if spath.startswith("./"):
         return PosixPath(spath[2:])
     return path
 
 
 def create_database(conn: psycopg2.extensions.connection, conninfo: dict, name: str):
     conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
 
     # Normalize dbname to avoid issues when reconnecting below
     name = name.casefold()
 
     # Create new database dropping previous one if exists
     cursor = conn.cursor()
     cursor.execute(f"""DROP DATABASE IF EXISTS {name}""")
     cursor.execute(f"""CREATE DATABASE {name}""")
     conn.close()
 
     # Reconnect to server selecting newly created database to add tables
     conninfo["dbname"] = name
     conn = connect(conninfo)
 
     sqldir = os.path.dirname(os.path.realpath(__file__))
     execute_sql(conn, PosixPath(os.path.join(sqldir, "provenance.sql")))
 
 
 ################################################################################
 ################################################################################
 ################################################################################
 
 
 class ProvenancePostgreSQL(ProvenanceInterface):
     def __init__(self, conn: psycopg2.extensions.connection):
         # TODO: consider adding a mutex for thread safety
         conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
         self.conn = conn
         self.cursor = self.conn.cursor()
         self.insert_cache: Dict[str, Any] = {}
         self.select_cache: Dict[str, Any] = {}
         self.clear_caches()
 
     def clear_caches(self):
         self.insert_cache = {
             "content": dict(),
             "content_early_in_rev": list(),
             "content_in_dir": list(),
             "directory": dict(),
             "directory_in_rev": list(),
             "revision": dict(),
             "revision_before_rev": list(),
             "revision_in_org": list(),
         }
         self.select_cache = {"content": dict(), "directory": dict(), "revision": dict()}
 
     def commit(self):
         result = False
         try:
             self.insert_all()
             # self.conn.commit()
             result = True
 
         # except psycopg2.DatabaseError:
         #     # Database error occurred, rollback all changes
         #     self.conn.rollback()
         #     # TODO: maybe serialize and auto-merge transations.
         #     # The only conflicts are on:
         #     #   - content: we keep the earliest date
         #     #   - directory: we keep the earliest date
         #     #   - content_in_dir: there should be just duplicated entries.
 
         except Exception as error:
             # Unexpected error occurred, rollback all changes and log message
-            logging.warning(f"Unexpected error: {error}")
+            logging.error(f"Unexpected error: {error}")
             # self.conn.rollback()
 
         finally:
             self.clear_caches()
 
         return result
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: PosixPath
     ):
         self.insert_cache["content_in_dir"].append(
             (blob.id, directory.id, bytes(normalize(prefix / blob.name)))
         )
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: PosixPath
     ):
         self.insert_cache["content_early_in_rev"].append(
             (blob.id, revision.id, bytes(normalize(prefix / blob.name)))
         )
 
     def content_find_first(self, blobid: str):
-        logging.info(f"Retrieving first occurrence of content {hash_to_hex(blobid)}")
         self.cursor.execute(
             """SELECT blob, rev, date, path
                    FROM content_early_in_rev
                    JOIN revision ON revision.id=content_early_in_rev.rev
                    WHERE content_early_in_rev.blob=%s
                    ORDER BY date, rev, path ASC LIMIT 1""",
             (blobid,),
         )
         return self.cursor.fetchone()
 
     def content_find_all(self, blobid: str):
-        logging.info(f"Retrieving all occurrences of content {hash_to_hex(blobid)}")
         self.cursor.execute(
             """(SELECT blob, rev, date, path
                    FROM content_early_in_rev
                    JOIN revision ON revision.id=content_early_in_rev.rev
                    WHERE content_early_in_rev.blob=%s)
                UNION
                (SELECT content_in_rev.blob, content_in_rev.rev, revision.date,
                        content_in_rev.path
                    FROM (SELECT content_in_dir.blob, directory_in_rev.rev,
                                 CASE directory_in_rev.path
                                     WHEN '.' THEN content_in_dir.path
                                     ELSE (directory_in_rev.path || '/' ||
                                              content_in_dir.path)::unix_path
                                 END AS path
                             FROM content_in_dir
                             JOIN directory_in_rev
                                 ON content_in_dir.dir=directory_in_rev.dir
                             WHERE content_in_dir.blob=%s
                         ) AS content_in_rev
                    JOIN revision ON revision.id=content_in_rev.rev
                 )
                 ORDER BY date, rev, path""",
             (blobid, blobid),
         )
         # POSTGRESQL EXPLAIN
         yield from self.cursor.fetchall()
 
     def content_get_early_date(self, blob: FileEntry) -> datetime:
-        logging.debug(f"Getting content {hash_to_hex(blob.id)} early date")
         # First check if the date is being modified by current transection.
         date = self.insert_cache["content"].get(blob.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["content"].get(blob.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM content WHERE id=%s""", (blob.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["content"][blob.id] = date
         return date
 
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for blob in blobs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["content"].get(blob.id, None)
             if date is not None:
                 dates[blob.id] = date
             else:
                 # If not, check whether it's been query before
                 date = self.select_cache["content"].get(blob.id, None)
                 if date is not None:
                     dates[blob.id] = date
                 else:
                     pending.append(blob.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT id, date FROM content WHERE id IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["content"][row[0]] = row[1]
         return dates
 
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         self.insert_cache["content"][blob.id] = date
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: PosixPath
     ):
         self.insert_cache["directory_in_rev"].append(
             (directory.id, revision.id, bytes(normalize(path)))
         )
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> datetime:
         # First check if the date is being modified by current transection.
         date = self.insert_cache["directory"].get(directory.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["directory"].get(directory.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM directory WHERE id=%s""", (directory.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["directory"][directory.id] = date
         return date
 
     def directory_get_early_dates(
         self, dirs: List[DirectoryEntry]
     ) -> Dict[bytes, datetime]:
         dates = {}
         pending = []
         for directory in dirs:
             # First check if the date is being modified by current transection.
             date = self.insert_cache["directory"].get(directory.id, None)
             if date is not None:
                 dates[directory.id] = date
             else:
                 # If not, check whether it's been query before
                 date = self.select_cache["directory"].get(directory.id, None)
                 if date is not None:
                     dates[directory.id] = date
                 else:
                     pending.append(directory.id)
         if pending:
             # Otherwise, query the database and cache the values
             values = ", ".join(itertools.repeat("%s", len(pending)))
             self.cursor.execute(
                 f"""SELECT id, date FROM directory WHERE id IN ({values})""",
                 tuple(pending),
             )
             for row in self.cursor.fetchall():
                 dates[row[0]] = row[1]
                 self.select_cache["directory"][row[0]] = row[1]
         return dates
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ):
         self.insert_cache["directory"][directory.id] = date
 
     def insert_all(self):
         # Performe insertions with cached information
         if self.insert_cache["content"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO content(id, date) VALUES %s
                    ON CONFLICT (id) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,content.date)""",
                 self.insert_cache["content"].items(),
             )
 
         if self.insert_cache["content_early_in_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO content_early_in_rev VALUES %s
                    ON CONFLICT DO NOTHING""",
                 self.insert_cache["content_early_in_rev"],
             )
 
         if self.insert_cache["content_in_dir"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO content_in_dir VALUES %s
                    ON CONFLICT DO NOTHING""",
                 self.insert_cache["content_in_dir"],
             )
 
         if self.insert_cache["directory"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO directory(id, date) VALUES %s
                    ON CONFLICT (id) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,directory.date)""",
                 self.insert_cache["directory"].items(),
             )
 
         if self.insert_cache["directory_in_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO directory_in_rev VALUES %s
                    ON CONFLICT DO NOTHING""",
                 self.insert_cache["directory_in_rev"],
             )
 
         if self.insert_cache["revision"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO revision(id, date) VALUES %s
                    ON CONFLICT (id) DO
                        UPDATE SET date=LEAST(EXCLUDED.date,revision.date)""",
                 self.insert_cache["revision"].items(),
             )
 
         if self.insert_cache["revision_before_rev"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO revision_before_rev VALUES %s
                    ON CONFLICT DO NOTHING""",
                 self.insert_cache["revision_before_rev"],
             )
 
         if self.insert_cache["revision_in_org"]:
             psycopg2.extras.execute_values(
                 self.cursor,
                 """INSERT INTO revision_in_org VALUES %s
                    ON CONFLICT DO NOTHING""",
                 self.insert_cache["revision_in_org"],
             )
 
     def origin_get_id(self, origin: OriginEntry) -> int:
         if origin.id is None:
             # Check if current origin is already known and retrieve its internal id.
             self.cursor.execute("""SELECT id FROM origin WHERE url=%s""", (origin.url,))
             row = self.cursor.fetchone()
 
             if row is None:
                 # If the origin is seen for the first time, current revision is
                 # the prefered one.
                 self.cursor.execute(
                     """INSERT INTO origin (url) VALUES (%s) RETURNING id""",
                     (origin.url,),
                 )
                 return self.cursor.fetchone()[0]
             else:
                 return row[0]
         else:
             return origin.id
 
     def revision_add(self, revision: RevisionEntry):
         # Add current revision to the compact DB
         self.insert_cache["revision"][revision.id] = revision.date
 
     def revision_add_before_revision(
         self, relative: RevisionEntry, revision: RevisionEntry
     ):
         self.insert_cache["revision_before_rev"].append((revision.id, relative.id))
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         self.insert_cache["revision_in_org"].append((revision.id, origin.id))
 
     def revision_get_early_date(self, revision: RevisionEntry) -> datetime:
         date = self.insert_cache["revision"].get(revision.id, None)
         if date is None:
             # If not, check whether it's been query before
             date = self.select_cache["revision"].get(revision.id, None)
             if date is None:
                 # Otherwise, query the database and cache the value
                 self.cursor.execute(
                     """SELECT date FROM revision WHERE id=%s""", (revision.id,)
                 )
                 row = self.cursor.fetchone()
                 date = row[0] if row is not None else None
                 self.select_cache["revision"][revision.id] = date
         return date
 
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT COALESCE(org,0) FROM revision WHERE id=%s""", (revision.id,)
         )
         row = self.cursor.fetchone()
         # None means revision is not in database
         # 0 means revision has no prefered origin
         return row[0] if row is not None and row[0] != 0 else None
 
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1 FROM revision_before_rev WHERE prev=%s""", (revision.id,)
         )
         return self.cursor.fetchone() is not None
 
     def revision_set_prefered_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ):
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """UPDATE revision SET org=%s WHERE id=%s""", (origin.id, revision.id)
         )
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         # TODO: adapt this method to consider cached values
         self.cursor.execute(
             """SELECT 1 FROM revision_in_org WHERE rev=%s""", (revision.id,)
         )
         return self.cursor.fetchone() is not None
diff --git a/swh/provenance/provenance.gr.py b/swh/provenance/provenance.gr.txt
similarity index 100%
rename from swh/provenance/provenance.gr.py
rename to swh/provenance/provenance.gr.txt
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
index 7a9fe90..70d718c 100644
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -1,427 +1,443 @@
 from .archive import ArchiveInterface
 from .model import DirectoryEntry, FileEntry
 from .origin import OriginEntry
 from .revision import RevisionEntry
 
 from datetime import datetime
 from pathlib import PosixPath
 from typing import Dict, List, Optional, Tuple
 
 import logging
 from swh.model.hashutil import hash_to_hex
 
 
 # TODO: consider moving to path utils file together with normalize.
 def is_child(path: PosixPath, prefix: PosixPath) -> bool:
     # PosixPath returns '.' as parent when there is not upper directory. First check
     # avoids considering current directory its own parent.
     return path.parent != path and path.parent == prefix
 
 
 class ProvenanceInterface:
     def __init__(self, **kwargs):
         raise NotImplementedError
 
     def commit(self):
         raise NotImplementedError
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: PosixPath
     ):
         raise NotImplementedError
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: PosixPath
     ):
         raise NotImplementedError
 
     def content_find_first(self, blobid: str):
         raise NotImplementedError
 
     def content_find_all(self, blobid: str):
         raise NotImplementedError
 
     def content_get_early_date(self, blob: FileEntry) -> datetime:
         raise NotImplementedError
 
     def content_get_early_dates(self, blobs: List[FileEntry]) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
     def content_set_early_date(self, blob: FileEntry, date: datetime):
         raise NotImplementedError
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: PosixPath
     ):
         raise NotImplementedError
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> datetime:
         raise NotImplementedError
 
     def directory_get_early_dates(
         self, dirs: List[DirectoryEntry]
     ) -> Dict[bytes, datetime]:
         raise NotImplementedError
 
     # def directory_remove_from_isochrone_frontier(
     #     self, directory: DirectoryEntry
     # ):
     #     raise NotImplementedError
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ):
         raise NotImplementedError
 
     def origin_get_id(self, origin: OriginEntry) -> int:
         raise NotImplementedError
 
     def revision_add(self, revision: RevisionEntry):
         raise NotImplementedError
 
     def revision_add_before_revision(
         self, relative: RevisionEntry, revision: RevisionEntry
     ):
         raise NotImplementedError
 
     def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry):
         raise NotImplementedError
 
     def revision_get_early_date(self, revision: RevisionEntry) -> datetime:
         raise NotImplementedError
 
     def revision_get_prefered_origin(self, revision: RevisionEntry) -> int:
         raise NotImplementedError
 
     def revision_in_history(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
     def revision_set_prefered_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ):
         raise NotImplementedError
 
     def revision_visited(self, revision: RevisionEntry) -> bool:
         raise NotImplementedError
 
 
 def directory_process_content(
     provenance: ProvenanceInterface,
     directory: DirectoryEntry,
     relative: DirectoryEntry,
     prefix: PosixPath,
 ):
     stack = [(directory, prefix)]
 
     while stack:
         current, path = stack.pop()
 
         for child in iter(current):
             if isinstance(child, FileEntry):
                 # Add content to the relative directory with the computed path.
                 provenance.content_add_to_directory(relative, child, path)
             else:
                 # Recursively walk the child directory.
                 stack.append((child, path / child.name))
 
 
 def directory_update_content(
     stack: List[Tuple[DirectoryEntry, PosixPath]],
     provenance: ProvenanceInterface,
     revision: RevisionEntry,
     directory: DirectoryEntry,
     path: PosixPath,
     subdirs: Optional[List[DirectoryEntry]] = None,
     blobs: Optional[List[FileEntry]] = None,
     blobdates: Optional[Dict[bytes, datetime]] = None,
 ):
     assert revision.date is not None
 
     # Init optional parameters if not provided.
     if subdirs is None:
         subdirs = [child for child in directory if isinstance(child, DirectoryEntry)]
 
     if blobs is None:
         blobs = [child for child in directory if isinstance(child, FileEntry)]
 
     if blobdates is None:
         blobdates = provenance.content_get_early_dates(blobs)
 
     # Iterate over blobs updating their date if necessary.
     for blob in blobs:
         date = blobdates.get(blob.id, None)
         if date is None or revision.date < date:
             provenance.content_set_early_date(blob, revision.date)
         # provenance.content_add_to_revision(revision, blob, path)
 
     # Push all subdirectories with its corresponding path to analyze them
     # recursively.
     for subdir in subdirs:
         stack.append((subdir, path / subdir.name))
 
 
 def origin_add(provenance: ProvenanceInterface, origin: OriginEntry):
     # TODO: refactor to iterate over origin visit statuses and commit only once
     # per status.
     origin.id = provenance.origin_get_id(origin)
 
     for revision in origin.revisions:
         origin_add_revision(provenance, origin, revision)
 
         # Commit after each revision
         provenance.commit()  # TODO: verify this!
 
 
 def origin_add_revision(
     provenance: ProvenanceInterface, origin: OriginEntry, revision: RevisionEntry
 ):
     stack: List[Tuple[Optional[RevisionEntry], RevisionEntry]] = [(None, revision)]
 
     while stack:
         relative, current = stack.pop()
 
         # Check if current revision has no prefered origin and update if necessary.
         prefered = provenance.revision_get_prefered_origin(current)
 
         if prefered is None:
             provenance.revision_set_prefered_origin(origin, current)
         ########################################################################
 
         if relative is None:
             # This revision is pointed directly by the origin.
             visited = provenance.revision_visited(current)
             provenance.revision_add_to_origin(origin, current)
 
             if not visited:
                 stack.append((current, current))
 
         else:
             # This revision is a parent of another one in the history of the
             # relative revision.
             for parent in iter(current):
                 visited = provenance.revision_visited(parent)
 
                 if not visited:
                     # The parent revision has never been seen before pointing
                     # directly to an origin.
                     known = provenance.revision_in_history(parent)
 
                     if known:
                         # The parent revision is already known in some other
                         # revision's history. We should point it directly to
                         # the origin and (eventually) walk its history.
                         stack.append((None, parent))
                     else:
                         # The parent revision was never seen before. We should
                         # walk its history and associate it with the same
                         # relative revision.
                         provenance.revision_add_before_revision(relative, parent)
                         stack.append((relative, parent))
                 else:
                     # The parent revision already points to an origin, so its
                     # history was properly processed before. We just need to
                     # make sure it points to the current origin as well.
                     provenance.revision_add_to_origin(origin, parent)
 
 
 def revision_add(
     provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry
 ):
     assert revision.date is not None
     assert revision.root is not None
 
-    logging.warning(f'Processing revision {hash_to_hex(revision.id)}')
+    logging.warning(f"Processing revision {hash_to_hex(revision.id)}")
     # Processed content starting from the revision's root directory
     date = provenance.revision_get_early_date(revision)
     if date is None or revision.date < date:
         provenance.revision_add(revision)
         revision_process_content(
             provenance, revision, DirectoryEntry(archive, revision.root, PosixPath("."))
         )
 
     return provenance.commit()
 
 
 def revision_process_content(
-    provenance: ProvenanceInterface, revision: RevisionEntry, directory: DirectoryEntry
+    provenance: ProvenanceInterface, revision: RevisionEntry, root: DirectoryEntry
 ):
     assert revision.date is not None
 
     # Stack of directories (and their paths) to be processed.
-    stack: List[Tuple[DirectoryEntry, PosixPath]] = [(directory, directory.name)]
+    stack: List[Tuple[DirectoryEntry, PosixPath]] = [(root, root.name)]
     # This dictionary will hold the computed dates for visited subdirectories inside the
     # isochrone frontier.
     innerdirs: Dict[PosixPath, Tuple[DirectoryEntry, datetime]] = {}
     # This dictionary will hold the computed dates for visited subdirectories outside
     # the isochrone frontier which are candidates to be added to the outer frontier (if
     # their parent is in the inner frontier).
     outerdirs: Dict[PosixPath, Tuple[DirectoryEntry, datetime]] = {}
 
     while stack:
         # Get next directory to process and query its date right before processing to be
         # sure we get the most recently updated value.
         current, prefix = stack.pop()
         date = provenance.directory_get_date_in_isochrone_frontier(current)
 
         if date is None:
             # The directory has never been seen on the outer isochrone frontier of
             # previously processed revisions. Its children should be analyzed.
             blobs = [child for child in current if isinstance(child, FileEntry)]
             subdirs = [child for child in current if isinstance(child, DirectoryEntry)]
 
             # Get the list of ids with no duplicates to ensure we have available dates
             # for all the elements. This prevents taking a wrong decision when a blob
             # occurs more than once in the same directory.
             ids = list(dict.fromkeys([child.id for child in blobs + subdirs]))
             if ids:
                 # Known dates for the blobs in the current directory.
                 blobdates = provenance.content_get_early_dates(blobs)
                 # Known dates for the subdirectories in the current directory that
                 # belong to the outer isochrone frontier of some previously processed
                 # revision.
                 knowndates = provenance.directory_get_early_dates(subdirs)
                 # Known dates for the subdirectories in the current directory that are
                 # inside the isochrone frontier of the revision.
                 innerdates = {
                     innerdir.id: innerdate
                     for path, (innerdir, innerdate) in innerdirs.items()
                     if is_child(path, prefix)
                 }
                 # Known dates for the subdirectories in the current directory that are
                 # outside the isochrone frontier of the revision.
                 outerdates = {
                     outerdir.id: outerdate
                     for path, (outerdir, outerdate) in outerdirs.items()
                     if is_child(path, prefix)
                 }
 
                 # All known dates for child nodes of the current directory.
                 assert not (innerdates.keys() & outerdates.keys())
                 dates = list(
                     {**blobdates, **knowndates, **innerdates, **outerdates}.values()
                 )
 
                 if len(dates) == len(ids):
                     # All child nodes of current directory are already known.
                     maxdate = max(dates)
 
                     if maxdate < revision.date:
                         # The directory is outside the isochrone frontier of the
                         # revision. It is a candidate to be added to the outer frontier.
                         outerdirs[prefix] = (current, maxdate)
                         # Its children are removed since they are no longer candidates.
                         outerdirs = {
                             path: outerdir
                             for path, outerdir in outerdirs.items()
                             if not is_child(path, prefix)
                         }
-                            
+
                     elif maxdate == revision.date:
                         # The current directory is inside the isochrone frontier.
                         innerdirs[prefix] = (current, revision.date)
                         # Add blobs present in this level to the revision. No need to
                         # update dates as they are at most equal to current one.
                         for blob in blobs:
                             provenance.content_add_to_revision(revision, blob, prefix)
                         # If any of its children was found outside the frontier it
                         # should be added to the outer frontier now.
                         if outerdates:
                             for path, (outerdir, outerdate) in outerdirs.items():
                                 if is_child(path, prefix):
                                     provenance.directory_set_date_in_isochrone_frontier(
                                         outerdir, outerdate
                                     )
                                     provenance.directory_add_to_revision(
                                         revision, outerdir, path
                                     )
                                     directory_process_content(
                                         provenance,
                                         directory=outerdir,
                                         relative=outerdir,
                                         prefix=PosixPath("."),
                                     )
                             # Removed processed elements to avoid duplicating work.
                             outerdirs = {
                                 path: outerdir
                                 for path, outerdir in outerdirs.items()
                                 if not is_child(path, prefix)
                             }
+                        # There can still be subdirectories that are known to be in the
+                        # outter isochrone frontier of previous processed revisions.
+                        # Thus, they are not in the list of candidates but have to be
+                        # added to current revisions as well.
+                        for subdir in subdirs:
+                            knowndate = blobdates.get(blob.id, None)
+                            if knowndate is not None and knowndate <= revision.date:
+                                # Less or equal since the directory could have been
+                                # added to the outer isochrone frontier when processing
+                                # a different directory's subtree of this very same
+                                # revision.
+                                provenance.directory_add_to_revision(
+                                    revision, subdir, prefix / subdir.name
+                                )
 
                     else:
-                        logging.warning(f'Should not happen 1: {hash_to_hex(revision.id)}')
+                        logging.warning(
+                            f"Should not happen 1: {hash_to_hex(revision.id)}"
+                        )
                         assert False
                         # The revision is out of order. The current directory does not
                         # belong to the outer isochrone frontier of any previously
                         # processed revision yet all its children nodes are known. They
                         # should be re-analyzed (and timestamps eventually updated) and
                         # current directory updated after them.
                         stack.append((current, prefix))
                         directory_update_content(
                             stack,
                             provenance,
                             revision,
                             current,
                             prefix,
                             subdirs=subdirs,
                             blobs=blobs,
                             blobdates=blobdates,
                         )
 
                 else:
                     # Al least one child node is unknown, ie. the current directory is
                     # inside the isochrone frontier of the current revision. Its child
                     # nodes should be analyzed and current directory updated after them.
                     stack.append((current, prefix))
                     directory_update_content(
                         stack,
                         provenance,
                         revision,
                         current,
                         prefix,
                         subdirs=subdirs,
                         blobs=blobs,
                         blobdates=blobdates,
                     )
 
             else:
                 # Empty directory. Just consider it to be in the inner frontier of
                 # current revision (ie. all its children are already "known").
                 innerdirs[prefix] = (current, revision.date)
 
         elif revision.date < date:
             # The revision is out of order. The current directory belongs to the
             # outer isochrone frontier of some previously processed revison but current
             # revision is earlier. The children nodes should be re-analyzed (and
             # timestamps eventually updated) and current directory updated after them.
             # Current directory's date in the outer isochrone frontier should be updated
             # as well.
-            logging.warning(f'Should not happen 2: {hash_to_hex(revision.id)}')
+            logging.warning(f"Should not happen 2: {hash_to_hex(revision.id)}")
             assert False
             stack.append((current, prefix))
             directory_update_content(stack, provenance, revision, current, prefix)
             provenance.directory_set_date_in_isochrone_frontier(current, revision.date)
             # FIXME: although this might not lose any occurrence, when recursively
             # re-analyzing the current directory it will *always* enter the next brach
-            # of the if-then-else and be added to the revision since it is already 
+            # of the if-then-else and be added to the revision since it is already
             # tagged as and outer frontier directory. This may lead to inconsistencies
             # if this directory hold the very first occurrence of a blob in history,
             # since this occurrence won't end up in the content_early_in_rev table as
             # expected but split between content_in_dir and directory_in_rev instead.
 
         else:
             # The directory has already been seen on the outer isochrone frontier of an
             # earlier revision. Just add it to the current revision.
             provenance.directory_add_to_revision(revision, current, prefix)
 
     if outerdirs:
         # This should only happen if the root directory is in the outer frontier.
-        assert len(outerdirs) == 1 and directory.name in outerdirs
-        outerdir, outerdate = outerdirs[directory.name]
+        assert len(outerdirs) == 1 and root.name in outerdirs
+        outerdir, outerdate = outerdirs[root.name]
 
         provenance.directory_set_date_in_isochrone_frontier(outerdir, outerdate)
-        provenance.directory_add_to_revision(revision, outerdir, directory.name)
+        provenance.directory_add_to_revision(revision, outerdir, root.name)
         directory_process_content(
             provenance, directory=outerdir, relative=outerdir, prefix=PosixPath(".")
         )