diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
index 5a276bf..0dc10a8 100644
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -1,602 +1,646 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 # WARNING: do not import unnecessary things here to keep cli startup time under
 # control
 from datetime import datetime, timezone
 from functools import partial
 import os
 from typing import Any, Dict, Generator, Optional, Tuple
 
 import click
 from deprecated import deprecated
 import iso8601
 import yaml
 
 from swh.core import config
 from swh.core.cli import CONTEXT_SETTINGS
 from swh.core.cli import swh as swh_cli_group
 from swh.model.hashutil import hash_to_bytes, hash_to_hex
 from swh.model.model import Sha1Git
 
 # All generic config code should reside in swh.core.config
 CONFIG_ENVVAR = "SWH_CONFIG_FILENAME"
 DEFAULT_PATH = os.environ.get(CONFIG_ENVVAR, None)
 
 DEFAULT_CONFIG: Dict[str, Any] = {
     "provenance": {
         "archive": {
             # Storage API based Archive object
             # "cls": "api",
             # "storage": {
             #     "cls": "remote",
             #     "url": "http://uffizi.internal.softwareheritage.org:5002",
             # }
             # Direct access Archive object
             "cls": "direct",
             "db": {
                 "host": "belvedere.internal.softwareheritage.org",
                 "port": 5432,
                 "dbname": "softwareheritage",
                 "user": "guest",
             },
         },
         "storage": {
             # Local PostgreSQL Storage
             # "cls": "postgresql",
             # "db": {
             #     "host": "localhost",
             #     "user": "postgres",
             #     "password": "postgres",
             #     "dbname": "provenance",
             # },
             # Remote RabbitMQ/PostgreSQL Storage
             "cls": "rabbitmq",
             "url": "amqp://localhost:5672/%2f",
             "storage_config": {
                 "cls": "postgresql",
                 "db": {
                     "host": "localhost",
                     "user": "postgres",
                     "password": "postgres",
                     "dbname": "provenance",
                 },
             },
             "batch_size": 100,
             "prefetch_count": 100,
         },
     }
 }
 
 
 CONFIG_FILE_HELP = f"""
 \b Configuration can be loaded from a yaml file given either as --config-file
 option or the {CONFIG_ENVVAR} environment variable. If no configuration file
 is specified, use the following default configuration::
 
 \b
 {yaml.dump(DEFAULT_CONFIG)}"""
 PROVENANCE_HELP = f"""Software Heritage provenance index database tools
 
 {CONFIG_FILE_HELP}
 """
 
 
 @swh_cli_group.group(
     name="provenance", context_settings=CONTEXT_SETTINGS, help=PROVENANCE_HELP
 )
 @click.option(
     "-C",
     "--config-file",
     default=None,
     type=click.Path(exists=True, dir_okay=False, path_type=str),
     help="""YAML configuration file.""",
 )
 @click.option(
     "-P",
     "--profile",
     default=None,
     type=click.Path(exists=False, dir_okay=False, path_type=str),
     help="""Enable profiling to specified file.""",
 )
 @click.pass_context
 def cli(ctx: click.core.Context, config_file: Optional[str], profile: str) -> None:
     if (
         config_file is None
         and DEFAULT_PATH is not None
         and config.config_exists(DEFAULT_PATH)
     ):
         config_file = DEFAULT_PATH
 
     if config_file is None:
         conf = DEFAULT_CONFIG
     else:
         # read_raw_config do not fail on ENOENT
         if not os.path.exists(config_file):
             raise FileNotFoundError(config_file)
         conf = yaml.safe_load(open(config_file, "rb"))
 
     ctx.ensure_object(dict)
     ctx.obj["config"] = conf
 
     if profile:
         import atexit
         import cProfile
 
         print("Profiling...")
         pr = cProfile.Profile()
         pr.enable()
 
         def exit() -> None:
             pr.disable()
             pr.dump_stats(profile)
 
         atexit.register(exit)
 
 
 @cli.group(name="origin")
 @click.pass_context
 def origin(ctx: click.core.Context):
     from . import get_archive, get_provenance
 
     archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
 
     ctx.obj["provenance"] = provenance
     ctx.obj["archive"] = archive
 
 
 @origin.command(name="from-csv")
 @click.argument("filename", type=click.Path(exists=True))
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (origins) to read from the input file.""",
 )
 @click.pass_context
 def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]):
     from .origin import CSVOriginIterator, origin_add
 
     provenance = ctx.obj["provenance"]
     archive = ctx.obj["archive"]
 
     origins_provider = generate_origin_tuples(filename)
     origins = CSVOriginIterator(origins_provider, limit=limit)
 
     with provenance:
         for origin in origins:
             origin_add(provenance, archive, [origin])
 
 
 @origin.command(name="from-journal")
 @click.pass_context
 def origin_from_journal(ctx: click.core.Context):
     from swh.journal.client import get_journal_client
 
     from .journal_client import process_journal_origins
 
     provenance = ctx.obj["provenance"]
     archive = ctx.obj["archive"]
 
     journal_cfg = ctx.obj["config"].get("journal_client", {})
 
     worker_fn = partial(
         process_journal_origins,
         archive=archive,
         provenance=provenance,
     )
 
     cls = journal_cfg.pop("cls", None) or "kafka"
     client = get_journal_client(
         cls,
         **{
             **journal_cfg,
             "object_types": ["origin_visit_status"],
         },
     )
 
     try:
         client.process(worker_fn)
     except KeyboardInterrupt:
         ctx.exit(0)
     else:
         print("Done.")
     finally:
         client.close()
 
 
 @cli.group(name="revision")
 @click.pass_context
 def revision(ctx: click.core.Context):
     from . import get_archive, get_provenance
 
     archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
     provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
 
     ctx.obj["provenance"] = provenance
     ctx.obj["archive"] = archive
 
 
 @revision.command(name="from-csv")
 @click.argument("filename", type=click.Path(exists=True))
 @click.option(
     "-a",
     "--track-all",
     default=True,
     type=bool,
     help="""Index all occurrences of files in the development history.""",
 )
 @click.option(
     "-f",
     "--flatten",
     default=True,
     type=bool,
     help="""Create flat models for directories in the isochrone frontier.""",
 )
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (revisions) to read from the input file.""",
 )
 @click.option(
     "-m",
     "--min-depth",
     default=1,
     type=int,
     help="""Set minimum depth (in the directory tree) at which an isochrone """
     """frontier can be defined.""",
 )
 @click.option(
     "-r",
     "--reuse",
     default=True,
     type=bool,
     help="""Prioritize the usage of previously defined isochrone frontiers """
     """whenever possible.""",
 )
 @click.option(
     "-s",
     "--min-size",
     default=0,
     type=int,
     help="""Set the minimum size (in bytes) of files to be indexed. """
     """Any smaller file will be ignored.""",
 )
 @click.pass_context
 def revision_from_csv(
     ctx: click.core.Context,
     filename: str,
     track_all: bool,
     flatten: bool,
     limit: Optional[int],
     min_depth: int,
     reuse: bool,
     min_size: int,
 ) -> None:
     from .revision import CSVRevisionIterator, revision_add
 
     provenance = ctx.obj["provenance"]
     archive = ctx.obj["archive"]
 
     revisions_provider = generate_revision_tuples(filename)
     revisions = CSVRevisionIterator(revisions_provider, limit=limit)
 
     with provenance:
         for revision in revisions:
             revision_add(
                 provenance,
                 archive,
                 [revision],
                 trackall=track_all,
                 flatten=flatten,
                 lower=reuse,
                 mindepth=min_depth,
                 minsize=min_size,
             )
 
 
 @revision.command(name="from-journal")
 @click.option(
     "-a",
     "--track-all",
     default=True,
     type=bool,
     help="""Index all occurrences of files in the development history.""",
 )
 @click.option(
     "-f",
     "--flatten",
     default=True,
     type=bool,
     help="""Create flat models for directories in the isochrone frontier.""",
 )
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (revisions) to read from the input file.""",
 )
 @click.option(
     "-m",
     "--min-depth",
     default=1,
     type=int,
     help="""Set minimum depth (in the directory tree) at which an isochrone """
     """frontier can be defined.""",
 )
 @click.option(
     "-r",
     "--reuse",
     default=True,
     type=bool,
     help="""Prioritize the usage of previously defined isochrone frontiers """
     """whenever possible.""",
 )
 @click.option(
     "-s",
     "--min-size",
     default=0,
     type=int,
     help="""Set the minimum size (in bytes) of files to be indexed. """
     """Any smaller file will be ignored.""",
 )
 @click.pass_context
 def revision_from_journal(
     ctx: click.core.Context,
     track_all: bool,
     flatten: bool,
     limit: Optional[int],
     min_depth: int,
     reuse: bool,
     min_size: int,
 ) -> None:
     from swh.journal.client import get_journal_client
 
     from .journal_client import process_journal_revisions
 
     provenance = ctx.obj["provenance"]
     archive = ctx.obj["archive"]
 
     journal_cfg = ctx.obj["config"].get("journal_client", {})
 
     worker_fn = partial(
         process_journal_revisions,
         archive=archive,
         provenance=provenance,
     )
 
     cls = journal_cfg.pop("cls", None) or "kafka"
     client = get_journal_client(
         cls,
         **{
             **journal_cfg,
             "object_types": ["revision"],
         },
     )
 
     try:
         client.process(worker_fn)
     except KeyboardInterrupt:
         ctx.exit(0)
     else:
         print("Done.")
     finally:
         client.close()
 
 
+@cli.group(name="directory")
+@click.pass_context
+def directory(ctx: click.core.Context):
+    from . import get_archive, get_provenance
+
+    archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
+    provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
+
+    ctx.obj["provenance"] = provenance
+    ctx.obj["archive"] = archive
+
+
+@directory.command(name="flatten")
+@click.option(
+    "--range-from", type=str, help="start ID of the range of directories to flatten"
+)
+@click.option(
+    "--range-to", type=str, help="stop ID of the range of directories to flatten"
+)
+@click.option(
+    "-s",
+    "--min-size",
+    default=0,
+    type=int,
+    help="""Set the minimum size (in bytes) of files to be indexed.
+    Any smaller file will be ignored.""",
+)
+@click.pass_context
+def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
+    from swh.provenance.directory import directory_flatten_range
+
+    provenance = ctx.obj["provenance"]
+    archive = ctx.obj["archive"]
+
+    directory_flatten_range(
+        provenance,
+        archive,
+        hash_to_bytes(range_from),
+        hash_to_bytes(range_to),
+        min_size,
+    )
+
+
+# old (deprecated) commands
 @cli.command(name="iter-frontiers")
 @click.argument("filename")
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (directories) to read from the input file.""",
 )
 @click.option(
     "-s",
     "--min-size",
     default=0,
     type=int,
     help="""Set the minimum size (in bytes) of files to be indexed. """
     """Any smaller file will be ignored.""",
 )
 @click.pass_context
 def iter_frontiers(
     ctx: click.core.Context,
     filename: str,
     limit: Optional[int],
     min_size: int,
 ) -> None:
     """Process a provided list of directories in the isochrone frontier."""
     from . import get_archive, get_provenance
     from .directory import CSVDirectoryIterator, directory_add
 
     archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
     directories_provider = generate_directory_ids(filename)
     directories = CSVDirectoryIterator(directories_provider, limit=limit)
 
     with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance:
         for directory in directories:
             directory_add(
                 provenance,
                 archive,
                 [directory],
                 minsize=min_size,
             )
 
 
 def generate_directory_ids(
     filename: str,
 ) -> Generator[Sha1Git, None, None]:
     for line in open(filename, "r"):
         if line.strip():
             yield hash_to_bytes(line.strip())
 
 
 @cli.command(name="iter-revisions")
 @click.argument("filename")
 @click.option(
     "-a",
     "--track-all",
     default=True,
     type=bool,
     help="""Index all occurrences of files in the development history.""",
 )
 @click.option(
     "-f",
     "--flatten",
     default=True,
     type=bool,
     help="""Create flat models for directories in the isochrone frontier.""",
 )
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (revisions) to read from the input file.""",
 )
 @click.option(
     "-m",
     "--min-depth",
     default=1,
     type=int,
     help="""Set minimum depth (in the directory tree) at which an isochrone """
     """frontier can be defined.""",
 )
 @click.option(
     "-r",
     "--reuse",
     default=True,
     type=bool,
     help="""Prioritize the usage of previously defined isochrone frontiers """
     """whenever possible.""",
 )
 @click.option(
     "-s",
     "--min-size",
     default=0,
     type=int,
     help="""Set the minimum size (in bytes) of files to be indexed. """
     """Any smaller file will be ignored.""",
 )
 @click.pass_context
 def iter_revisions(
     ctx: click.core.Context,
     filename: str,
     track_all: bool,
     flatten: bool,
     limit: Optional[int],
     min_depth: int,
     reuse: bool,
     min_size: int,
 ) -> None:
     """Process a provided list of revisions."""
     from . import get_archive, get_provenance
     from .revision import CSVRevisionIterator, revision_add
 
     archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
     revisions_provider = generate_revision_tuples(filename)
     revisions = CSVRevisionIterator(revisions_provider, limit=limit)
 
     with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance:
         for revision in revisions:
             revision_add(
                 provenance,
                 archive,
                 [revision],
                 trackall=track_all,
                 flatten=flatten,
                 lower=reuse,
                 mindepth=min_depth,
                 minsize=min_size,
             )
 
 
 def generate_revision_tuples(
     filename: str,
 ) -> Generator[Tuple[Sha1Git, datetime, Sha1Git], None, None]:
     for line in open(filename, "r"):
         if line.strip():
             revision, date, root = line.strip().split(",")
             yield (
                 hash_to_bytes(revision),
                 iso8601.parse_date(date, default_timezone=timezone.utc),
                 hash_to_bytes(root),
             )
 
 
 @cli.command(name="iter-origins")
 @click.argument("filename")
 @click.option(
     "-l",
     "--limit",
     type=int,
     help="""Limit the amount of entries (origins) to read from the input file.""",
 )
 @click.pass_context
 @deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead")
 def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None:
     """Process a provided list of origins."""
     from . import get_archive, get_provenance
     from .origin import CSVOriginIterator, origin_add
 
     archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
     origins_provider = generate_origin_tuples(filename)
     origins = CSVOriginIterator(origins_provider, limit=limit)
 
     with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance:
         for origin in origins:
             origin_add(provenance, archive, [origin])
 
 
 def generate_origin_tuples(filename: str) -> Generator[Tuple[str, bytes], None, None]:
     for line in open(filename, "r"):
         if line.strip():
             url, snapshot = line.strip().split(",")
             yield (url, hash_to_bytes(snapshot))
 
 
 @cli.command(name="find-first")
 @click.argument("swhid")
 @click.pass_context
 def find_first(ctx: click.core.Context, swhid: str) -> None:
     """Find first occurrence of the requested blob."""
     from . import get_provenance
 
     with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance:
         occur = provenance.content_find_first(hash_to_bytes(swhid))
         if occur is not None:
             print(
                 f"swh:1:cnt:{hash_to_hex(occur.content)}, "
                 f"swh:1:rev:{hash_to_hex(occur.revision)}, "
                 f"{occur.date}, "
                 f"{occur.origin}, "
                 f"{os.fsdecode(occur.path)}"
             )
         else:
             print(f"Cannot find a content with the id {swhid}")
 
 
 @cli.command(name="find-all")
 @click.argument("swhid")
 @click.option(
     "-l", "--limit", type=int, help="""Limit the amount results to be retrieved."""
 )
 @click.pass_context
 def find_all(ctx: click.core.Context, swhid: str, limit: Optional[int]) -> None:
     """Find all occurrences of the requested blob."""
     from . import get_provenance
 
     with get_provenance(**ctx.obj["config"]["provenance"]["storage"]) as provenance:
         for occur in provenance.content_find_all(hash_to_bytes(swhid), limit=limit):
             print(
                 f"swh:1:cnt:{hash_to_hex(occur.content)}, "
                 f"swh:1:rev:{hash_to_hex(occur.revision)}, "
                 f"{occur.date}, "
                 f"{occur.origin}, "
                 f"{os.fsdecode(occur.path)}"
             )
diff --git a/swh/provenance/directory.py b/swh/provenance/directory.py
index 5430454..4f3ebef 100644
--- a/swh/provenance/directory.py
+++ b/swh/provenance/directory.py
@@ -1,86 +1,108 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 from typing import Generator, Iterable, Iterator, List, Optional
 
 from swh.core.statsd import statsd
 from swh.model.model import Sha1Git
 
 from .archive import ArchiveInterface
 from .interface import ProvenanceInterface
 from .model import DirectoryEntry
 
 REVISION_DURATION_METRIC = "swh_provenance_directory_duration_seconds"
 
 
 class CSVDirectoryIterator:
     """Iterator over directories typically present in the given CSV file.
 
     The input is an iterator that produces ids (sha1_git) of directories
     """
 
     def __init__(
         self,
         directories: Iterable[Sha1Git],
         limit: Optional[int] = None,
     ) -> None:
         self.directories: Iterator[Sha1Git]
         if limit is not None:
             from itertools import islice
 
             self.directories = islice(directories, limit)
         else:
             self.directories = iter(directories)
 
     def __iter__(self) -> Generator[DirectoryEntry, None, None]:
         for id in self.directories:
             yield DirectoryEntry(id)
 
 
-@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
+def directory_flatten_range(
+    provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
+    start_id: Sha1Git,
+    end_id: Sha1Git,
+    minsize: int = 0,
+    commit: bool = True,
+) -> None:
+    """Flatten the known directories from ``start_id`` to ``end_id``."""
+    current = start_id
+    while current < end_id:
+        dirs = provenance.storage.directory_iter_not_flattenned(
+            limit=100, start_id=current
+        )
+        if not dirs:
+            break
+        directory_add(
+            provenance, archive, [DirectoryEntry(id=d) for d in dirs], minsize, commit
+        )
+        current = dirs[-1]
+
+
+@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "add"})
 def directory_add(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     directories: List[DirectoryEntry],
     minsize: int = 0,
     commit: bool = True,
 ) -> None:
     for directory in directories:
         # Only flatten directories that are present in the provenance model, but not
         # flattenned yet.
         flattenned = provenance.directory_already_flattenned(directory)
         if flattenned is not None and not flattenned:
             directory_flatten(
                 provenance,
                 archive,
                 directory,
                 minsize=minsize,
             )
     if commit:
         provenance.flush()
 
 
 @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten"})
 def directory_flatten(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     directory: DirectoryEntry,
     minsize: int = 0,
 ) -> None:
     """Recursively retrieve all the files of 'directory' and insert them in the
     'provenance' database in the 'content_to_directory' table.
     """
     stack = [(directory, b"")]
     while stack:
         current, prefix = stack.pop()
         current.retrieve_children(archive, minsize=minsize)
         for f_child in current.files:
             # Add content to the directory with the computed prefix.
             provenance.content_add_to_directory(directory, f_child, prefix)
         for d_child in current.dirs:
             # Recursively walk the child directory.
             stack.append((d_child, os.path.join(prefix, d_child.name)))
     provenance.directory_flag_as_flattenned(directory)
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
index e0e936d..81f62b4 100644
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -1,395 +1,402 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from dataclasses import dataclass
 from datetime import datetime
 import enum
 from types import TracebackType
-from typing import Dict, Generator, Iterable, Optional, Set, Type, Union
+from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union
 
 from typing_extensions import Protocol, runtime_checkable
 
 from swh.core.api import remote_api_endpoint
 from swh.model.model import Sha1Git
 
 from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
 
 
 class EntityType(enum.Enum):
     CONTENT = "content"
     DIRECTORY = "directory"
     REVISION = "revision"
     ORIGIN = "origin"
 
 
 class RelationType(enum.Enum):
     CNT_EARLY_IN_REV = "content_in_revision"
     CNT_IN_DIR = "content_in_directory"
     DIR_IN_REV = "directory_in_revision"
     REV_IN_ORG = "revision_in_origin"
     REV_BEFORE_REV = "revision_before_revision"
 
 
 @dataclass(eq=True, frozen=True)
 class ProvenanceResult:
     content: Sha1Git
     revision: Sha1Git
     date: datetime
     origin: Optional[str]
     path: bytes
 
 
 @dataclass(eq=True, frozen=True)
 class DirectoryData:
     """Object representing the data associated to a directory in the provenance model,
     where `date` is the date of the directory in the isochrone frontier, and `flat` is a
     flag acknowledging that a flat model for the elements outside the frontier has
     already been created.
     """
 
     date: datetime
     flat: bool
 
 
 @dataclass(eq=True, frozen=True)
 class RevisionData:
     """Object representing the data associated to a revision in the provenance model,
     where `date` is the optional date of the revision (specifying it acknowledges that
     the revision was already processed by the revision-content algorithm); and `origin`
     identifies the preferred origin for the revision, if any.
     """
 
     date: Optional[datetime]
     origin: Optional[Sha1Git]
 
 
 @dataclass(eq=True, frozen=True)
 class RelationData:
     """Object representing a relation entry in the provenance model, where `src` and
     `dst` are the sha1 ids of the entities being related, and `path` is optional
     depending on the relation being represented.
     """
 
     dst: Sha1Git
     path: Optional[bytes]
 
 
 @runtime_checkable
 class ProvenanceStorageInterface(Protocol):
     def __enter__(self) -> ProvenanceStorageInterface:
         ...
 
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
         exc_val: Optional[BaseException],
         exc_tb: Optional[TracebackType],
     ) -> None:
         ...
 
     @remote_api_endpoint("close")
     def close(self) -> None:
         """Close connection to the storage and release resources."""
         ...
 
     @remote_api_endpoint("content_add")
     def content_add(self, cnts: Dict[Sha1Git, datetime]) -> bool:
         """Add blobs identified by sha1 ids, with an associated date (as paired in
         `cnts`) to the provenance storage. Return a boolean stating whether the
         information was successfully stored.
         """
         ...
 
     @remote_api_endpoint("content_find_first")
     def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
         """Retrieve the first occurrence of the blob identified by `id`."""
         ...
 
     @remote_api_endpoint("content_find_all")
     def content_find_all(
         self, id: Sha1Git, limit: Optional[int] = None
     ) -> Generator[ProvenanceResult, None, None]:
         """Retrieve all the occurrences of the blob identified by `id`."""
         ...
 
     @remote_api_endpoint("content_get")
     def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
         """Retrieve the associated date for each blob sha1 in `ids`."""
         ...
 
     @remote_api_endpoint("directory_add")
     def directory_add(self, dirs: Dict[Sha1Git, DirectoryData]) -> bool:
         """Add directories identified by sha1 ids, with associated date and (optional)
         flatten flag (as paired in `dirs`) to the provenance storage. If the flatten
         flag is set to None, the previous value present in the storage is preserved.
         Return a boolean stating if the information was successfully stored.
         """
         ...
 
     @remote_api_endpoint("directory_get")
     def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, DirectoryData]:
         """Retrieve the associated date and (optional) flatten flag for each directory
         sha1 in `ids`. If some directories has no associated date, it is not present in
         the resulting dictionary.
         """
         ...
 
+    @remote_api_endpoint("directory_iter_not_flattenned")
+    def directory_iter_not_flattenned(
+        self, limit: int, start_id: Sha1Git
+    ) -> List[Sha1Git]:
+        """Retrieve the unflattenned directories after ``start_id`` up to ``limit`` entries."""
+        ...
+
     @remote_api_endpoint("entity_get_all")
     def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
         """Retrieve all sha1 ids for entities of type `entity` present in the provenance
         model. This method is used only in tests.
         """
         ...
 
     @remote_api_endpoint("location_add")
     def location_add(self, paths: Iterable[bytes]) -> bool:
         """Register the given `paths` in the storage."""
         ...
 
     @remote_api_endpoint("location_get_all")
     def location_get_all(self) -> Set[bytes]:
         """Retrieve all paths present in the provenance model.
         This method is used only in tests."""
         ...
 
     @remote_api_endpoint("open")
     def open(self) -> None:
         """Open connection to the storage and allocate necessary resources."""
         ...
 
     @remote_api_endpoint("origin_add")
     def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool:
         """Add origins identified by sha1 ids, with their corresponding url (as paired
         in `orgs`) to the provenance storage. Return a boolean stating if the
         information was successfully stored.
         """
         ...
 
     @remote_api_endpoint("origin_get")
     def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
         """Retrieve the associated url for each origin sha1 in `ids`."""
         ...
 
     @remote_api_endpoint("revision_add")
     def revision_add(
         self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]]
     ) -> bool:
         """Add revisions identified by sha1 ids, with optional associated date or origin
         (as paired in `revs`) to the provenance storage. Return a boolean stating if the
         information was successfully stored.
         """
         ...
 
     @remote_api_endpoint("revision_get")
     def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]:
         """Retrieve the associated date and origin for each revision sha1 in `ids`. If
         some revision has no associated date nor origin, it is not present in the
         resulting dictionary.
         """
         ...
 
     @remote_api_endpoint("relation_add")
     def relation_add(
         self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]
     ) -> bool:
         """Add entries in the selected `relation`. This method assumes all entities
         being related are already registered in the storage. See `content_add`,
         `directory_add`, `origin_add`, and `revision_add`.
         """
         ...
 
     @remote_api_endpoint("relation_get")
     def relation_get(
         self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False
     ) -> Dict[Sha1Git, Set[RelationData]]:
         """Retrieve all entries in the selected `relation` whose source entities are
         identified by some sha1 id in `ids`. If `reverse` is set, destination entities
         are matched instead.
         """
         ...
 
     @remote_api_endpoint("relation_get_all")
     def relation_get_all(
         self, relation: RelationType
     ) -> Dict[Sha1Git, Set[RelationData]]:
         """Retrieve all entries in the selected `relation` that are present in the
         provenance model. This method is used only in tests.
         """
         ...
 
     @remote_api_endpoint("with_path")
     def with_path(self) -> bool:
         ...
 
 
 @runtime_checkable
 class ProvenanceInterface(Protocol):
     storage: ProvenanceStorageInterface
 
     def __enter__(self) -> ProvenanceInterface:
         ...
 
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
         exc_val: Optional[BaseException],
         exc_tb: Optional[TracebackType],
     ) -> None:
         ...
 
     def close(self) -> None:
         """Close connection to the underlying `storage` and release resources."""
         ...
 
     def flush(self) -> None:
         """Flush internal cache to the underlying `storage`."""
         ...
 
     def flush_if_necessary(self) -> bool:
         """Flush internal cache to the underlying `storage`, if the cache reached
         a threshold (MAX_CACHE_ELEMENTS).
         Return True if the cache is flushed, false otherwise.
         """
         ...
 
     def content_add_to_directory(
         self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
     ) -> None:
         """Associate `blob` with `directory` in the provenance model. `prefix` is the
         relative path from `directory` to `blob` (excluding `blob`'s name).
         """
         ...
 
     def content_add_to_revision(
         self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
     ) -> None:
         """Associate `blob` with `revision` in the provenance model. `prefix` is the
         absolute path from `revision`'s root directory to `blob` (excluding `blob`'s
         name).
         """
         ...
 
     def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
         """Retrieve the first occurrence of the blob identified by `id`."""
         ...
 
     def content_find_all(
         self, id: Sha1Git, limit: Optional[int] = None
     ) -> Generator[ProvenanceResult, None, None]:
         """Retrieve all the occurrences of the blob identified by `id`."""
         ...
 
     def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
         """Retrieve the earliest known date of `blob`."""
         ...
 
     def content_get_early_dates(
         self, blobs: Iterable[FileEntry]
     ) -> Dict[Sha1Git, datetime]:
         """Retrieve the earliest known date for each blob in `blobs`. If some blob has
         no associated date, it is not present in the resulting dictionary.
         """
         ...
 
     def content_set_early_date(self, blob: FileEntry, date: datetime) -> None:
         """Associate `date` to `blob` as it's earliest known date."""
         ...
 
     def directory_add_to_revision(
         self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
     ) -> None:
         """Associate `directory` with `revision` in the provenance model. `path` is the
         absolute path from `revision`'s root directory to `directory` (including
         `directory`'s name).
         """
         ...
 
     def directory_already_flattenned(self, directory: DirectoryEntry) -> Optional[bool]:
         """Check if the directory is already flattenned in the provenance model. If the
         directory is unknown for the model, the methods returns None.
         """
         ...
 
     def directory_flag_as_flattenned(self, directory: DirectoryEntry) -> None:
         """Mark the directory as flattenned in the provenance model. If the
         directory is unknown for the model, this method has no effect.
         """
         ...
 
     def directory_get_date_in_isochrone_frontier(
         self, directory: DirectoryEntry
     ) -> Optional[datetime]:
         """Retrieve the earliest known date of `directory` as an isochrone frontier in
         the provenance model.
         """
         ...
 
     def directory_get_dates_in_isochrone_frontier(
         self, dirs: Iterable[DirectoryEntry]
     ) -> Dict[Sha1Git, datetime]:
         """Retrieve the earliest known date for each directory in `dirs` as isochrone
         frontiers provenance model. If some directory has no associated date, it is not
         present in the resulting dictionary.
         """
         ...
 
     def directory_set_date_in_isochrone_frontier(
         self, directory: DirectoryEntry, date: datetime
     ) -> None:
         """Associate `date` to `directory` as it's earliest known date as an isochrone
         frontier in the provenance model.
         """
         ...
 
     def open(self) -> None:
         """Open connection to the underlying `storage` and allocate necessary
         resources.
         """
         ...
 
     def origin_add(self, origin: OriginEntry) -> None:
         """Add `origin` to the provenance model."""
         ...
 
     def revision_add(self, revision: RevisionEntry) -> None:
         """Add `revision` to the provenance model. This implies storing `revision`'s
         date in the model, thus `revision.date` must be a valid date.
         """
         ...
 
     def revision_add_before_revision(
         self, head: RevisionEntry, revision: RevisionEntry
     ) -> None:
         """Associate `revision` to `head` as an ancestor of the latter."""
         ...
 
     def revision_add_to_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ) -> None:
         """Associate `revision` to `origin` as a head revision of the latter (ie. the
         target of an snapshot for `origin` in the archive)."""
         ...
 
     def revision_is_head(self, revision: RevisionEntry) -> bool:
         """Check if `revision` is associated as a head revision for some origin."""
         ...
 
     def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]:
         """Retrieve the date associated to `revision`."""
         ...
 
     def revision_get_preferred_origin(
         self, revision: RevisionEntry
     ) -> Optional[Sha1Git]:
         """Retrieve the preferred origin associated to `revision`."""
         ...
 
     def revision_set_preferred_origin(
         self, origin: OriginEntry, revision: RevisionEntry
     ) -> None:
         """Associate `origin` as the preferred origin for `revision`."""
         ...
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
index 318a8c7..ed37bd9 100644
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -1,385 +1,399 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from contextlib import contextmanager
 from datetime import datetime
 from functools import wraps
 import itertools
 import logging
 from types import TracebackType
 from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union
 
 import psycopg2.extensions
 import psycopg2.extras
 
 from swh.core.db import BaseDb
 from swh.core.statsd import statsd
 from swh.model.model import Sha1Git
 
 from ..interface import (
     DirectoryData,
     EntityType,
     ProvenanceResult,
     ProvenanceStorageInterface,
     RelationData,
     RelationType,
     RevisionData,
 )
 
 LOGGER = logging.getLogger(__name__)
 
 STORAGE_DURATION_METRIC = "swh_provenance_storage_postgresql_duration_seconds"
 
 
 def handle_raise_on_commit(f):
     @wraps(f)
     def handle(self, *args, **kwargs):
         try:
             return f(self, *args, **kwargs)
         except BaseException as ex:
             # Unexpected error occurred, rollback all changes and log message
             LOGGER.exception("Unexpected error")
             if self.raise_on_commit:
                 raise ex
             return False
 
     return handle
 
 
 class ProvenanceStoragePostgreSql:
     def __init__(
         self, page_size: Optional[int] = None, raise_on_commit: bool = False, **kwargs
     ) -> None:
         self.conn: Optional[psycopg2.extensions.connection] = None
         self.conn_args = kwargs
         self._flavor: Optional[str] = None
         self.page_size = page_size
         self.raise_on_commit = raise_on_commit
 
     def __enter__(self) -> ProvenanceStorageInterface:
         self.open()
         return self
 
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
         exc_val: Optional[BaseException],
         exc_tb: Optional[TracebackType],
     ) -> None:
         self.close()
 
     @contextmanager
     def transaction(
         self, readonly: bool = False
     ) -> Generator[psycopg2.extras.RealDictCursor, None, None]:
         if self.conn is None:
             raise RuntimeError(
                 "Tried to access ProvenanceStoragePostgreSQL transaction() without opening it"
             )
         self.conn.set_session(readonly=readonly)
         with self.conn:
             with self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur:
                 yield cur
 
     @property
     def flavor(self) -> str:
         if self._flavor is None:
             with self.transaction(readonly=True) as cursor:
                 cursor.execute("SELECT swh_get_dbflavor() AS flavor")
                 self._flavor = cursor.fetchone()["flavor"]
         assert self._flavor is not None
         return self._flavor
 
     @property
     def denormalized(self) -> bool:
         return "denormalized" in self.flavor
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "close"})
     def close(self) -> None:
         assert self.conn is not None
         self.conn.close()
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "content_add"})
     @handle_raise_on_commit
     def content_add(self, cnts: Dict[Sha1Git, datetime]) -> bool:
         if cnts:
             sql = """
                 INSERT INTO content(sha1, date) VALUES %s
                   ON CONFLICT (sha1) DO
                   UPDATE SET date=LEAST(EXCLUDED.date,content.date)
                 """
             page_size = self.page_size or len(cnts)
             with self.transaction() as cursor:
                 psycopg2.extras.execute_values(
                     cursor, sql, argslist=cnts.items(), page_size=page_size
                 )
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "content_find_first"})
     def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
         sql = "SELECT * FROM swh_provenance_content_find_first(%s)"
         with self.transaction(readonly=True) as cursor:
             cursor.execute(query=sql, vars=(id,))
             row = cursor.fetchone()
         return ProvenanceResult(**row) if row is not None else None
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "content_find_all"})
     def content_find_all(
         self, id: Sha1Git, limit: Optional[int] = None
     ) -> Generator[ProvenanceResult, None, None]:
         sql = "SELECT * FROM swh_provenance_content_find_all(%s, %s)"
         with self.transaction(readonly=True) as cursor:
             cursor.execute(query=sql, vars=(id, limit))
             yield from (ProvenanceResult(**row) for row in cursor)
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "content_get"})
     def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
         dates: Dict[Sha1Git, datetime] = {}
         sha1s = tuple(ids)
         if sha1s:
             # TODO: consider splitting this query in several ones if sha1s is too big!
             values = ", ".join(itertools.repeat("%s", len(sha1s)))
             sql = f"""
                 SELECT sha1, date
                   FROM content
                   WHERE sha1 IN ({values})
                     AND date IS NOT NULL
                 """
             with self.transaction(readonly=True) as cursor:
                 cursor.execute(query=sql, vars=sha1s)
                 dates.update((row["sha1"], row["date"]) for row in cursor)
         return dates
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "directory_add"})
     @handle_raise_on_commit
     def directory_add(self, dirs: Dict[Sha1Git, DirectoryData]) -> bool:
         data = [(sha1, rev.date, rev.flat) for sha1, rev in dirs.items()]
         if data:
             sql = """
                 INSERT INTO directory(sha1, date, flat) VALUES %s
                   ON CONFLICT (sha1) DO
                   UPDATE SET
                     date=LEAST(EXCLUDED.date, directory.date),
                     flat=(EXCLUDED.flat OR directory.flat)
                 """
             page_size = self.page_size or len(data)
             with self.transaction() as cursor:
                 psycopg2.extras.execute_values(
                     cur=cursor, sql=sql, argslist=data, page_size=page_size
                 )
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "directory_get"})
     def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, DirectoryData]:
         result: Dict[Sha1Git, DirectoryData] = {}
         sha1s = tuple(ids)
         if sha1s:
             # TODO: consider splitting this query in several ones if sha1s is too big!
             values = ", ".join(itertools.repeat("%s", len(sha1s)))
             sql = f"""
                 SELECT sha1, date, flat
                   FROM directory
                   WHERE sha1 IN ({values})
                     AND date IS NOT NULL
                 """
             with self.transaction(readonly=True) as cursor:
                 cursor.execute(query=sql, vars=sha1s)
                 result.update(
                     (row["sha1"], DirectoryData(date=row["date"], flat=row["flat"]))
                     for row in cursor
                 )
         return result
 
+    @statsd.timed(
+        metric=STORAGE_DURATION_METRIC, tags={"method": "directory_iter_not_flattenned"}
+    )
+    def directory_iter_not_flattenned(
+        self, limit: int, start_id: Sha1Git
+    ) -> List[Sha1Git]:
+        sql = """
+        SELECT sha1 FROM directory
+        WHERE flat=false AND sha1>%s ORDER BY sha1 LIMIT %s
+        """
+        with self.transaction(readonly=True) as cursor:
+            cursor.execute(query=sql, vars=(start_id, limit))
+            return [row["sha1"] for row in cursor]
+
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "entity_get_all"})
     def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
         with self.transaction(readonly=True) as cursor:
             cursor.execute(f"SELECT sha1 FROM {entity.value}")
             return {row["sha1"] for row in cursor}
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "location_add"})
     @handle_raise_on_commit
     def location_add(self, paths: Iterable[bytes]) -> bool:
         if self.with_path():
             values = [(path,) for path in paths]
             if values:
                 sql = """
                     INSERT INTO location(path) VALUES %s
                       ON CONFLICT DO NOTHING
                     """
                 page_size = self.page_size or len(values)
                 with self.transaction() as cursor:
                     psycopg2.extras.execute_values(
                         cursor, sql, argslist=values, page_size=page_size
                     )
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "location_get_all"})
     def location_get_all(self) -> Set[bytes]:
         with self.transaction(readonly=True) as cursor:
             cursor.execute("SELECT location.path AS path FROM location")
             return {row["path"] for row in cursor}
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "origin_add"})
     @handle_raise_on_commit
     def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool:
         if orgs:
             sql = """
                 INSERT INTO origin(sha1, url) VALUES %s
                   ON CONFLICT DO NOTHING
                 """
             page_size = self.page_size or len(orgs)
             with self.transaction() as cursor:
                 psycopg2.extras.execute_values(
                     cur=cursor,
                     sql=sql,
                     argslist=orgs.items(),
                     page_size=page_size,
                 )
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "open"})
     def open(self) -> None:
         self.conn = BaseDb.connect(**self.conn_args).conn
         BaseDb.adapt_conn(self.conn)
         with self.transaction() as cursor:
             cursor.execute("SET timezone TO 'UTC'")
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "origin_get"})
     def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
         urls: Dict[Sha1Git, str] = {}
         sha1s = tuple(ids)
         if sha1s:
             # TODO: consider splitting this query in several ones if sha1s is too big!
             values = ", ".join(itertools.repeat("%s", len(sha1s)))
             sql = f"""
                 SELECT sha1, url
                   FROM origin
                   WHERE sha1 IN ({values})
                 """
             with self.transaction(readonly=True) as cursor:
                 cursor.execute(query=sql, vars=sha1s)
                 urls.update((row["sha1"], row["url"]) for row in cursor)
         return urls
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "revision_add"})
     @handle_raise_on_commit
     def revision_add(
         self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]]
     ) -> bool:
         if isinstance(revs, dict):
             data = [(sha1, rev.date, rev.origin) for sha1, rev in revs.items()]
         else:
             data = [(sha1, None, None) for sha1 in revs]
         if data:
             sql = """
                 INSERT INTO revision(sha1, date, origin)
                   (SELECT V.rev AS sha1, V.date::timestamptz AS date, O.id AS origin
                    FROM (VALUES %s) AS V(rev, date, org)
                    LEFT JOIN origin AS O ON (O.sha1=V.org::sha1_git))
                   ON CONFLICT (sha1) DO
                   UPDATE SET
                     date=LEAST(EXCLUDED.date, revision.date),
                     origin=COALESCE(EXCLUDED.origin, revision.origin)
                 """
             page_size = self.page_size or len(data)
             with self.transaction() as cursor:
                 psycopg2.extras.execute_values(
                     cur=cursor, sql=sql, argslist=data, page_size=page_size
                 )
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "revision_get"})
     def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]:
         result: Dict[Sha1Git, RevisionData] = {}
         sha1s = tuple(ids)
         if sha1s:
             # TODO: consider splitting this query in several ones if sha1s is too big!
             values = ", ".join(itertools.repeat("%s", len(sha1s)))
             sql = f"""
                 SELECT R.sha1, R.date, O.sha1 AS origin
                   FROM revision AS R
                   LEFT JOIN origin AS O ON (O.id=R.origin)
                   WHERE R.sha1 IN ({values})
                     AND (R.date is not NULL OR O.sha1 is not NULL)
                 """
             with self.transaction(readonly=True) as cursor:
                 cursor.execute(query=sql, vars=sha1s)
                 result.update(
                     (row["sha1"], RevisionData(date=row["date"], origin=row["origin"]))
                     for row in cursor
                 )
         return result
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "relation_add"})
     @handle_raise_on_commit
     def relation_add(
         self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]
     ) -> bool:
         rows = [(src, rel.dst, rel.path) for src, dsts in data.items() for rel in dsts]
         if rows:
             rel_table = relation.value
             src_table, *_, dst_table = rel_table.split("_")
             page_size = self.page_size or len(rows)
             # Put the next three queries in a manual single transaction:
             # they use the same temp table
             with self.transaction() as cursor:
                 cursor.execute("SELECT swh_mktemp_relation_add()")
                 psycopg2.extras.execute_values(
                     cur=cursor,
                     sql="INSERT INTO tmp_relation_add(src, dst, path) VALUES %s",
                     argslist=rows,
                     page_size=page_size,
                 )
                 sql = "SELECT swh_provenance_relation_add_from_temp(%s, %s, %s)"
                 cursor.execute(query=sql, vars=(rel_table, src_table, dst_table))
         return True
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "relation_get"})
     def relation_get(
         self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False
     ) -> Dict[Sha1Git, Set[RelationData]]:
         return self._relation_get(relation, ids, reverse)
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "relation_get_all"})
     def relation_get_all(
         self, relation: RelationType
     ) -> Dict[Sha1Git, Set[RelationData]]:
         return self._relation_get(relation, None)
 
     def _relation_get(
         self,
         relation: RelationType,
         ids: Optional[Iterable[Sha1Git]],
         reverse: bool = False,
     ) -> Dict[Sha1Git, Set[RelationData]]:
         result: Dict[Sha1Git, Set[RelationData]] = {}
 
         sha1s: List[Sha1Git]
         if ids is not None:
             sha1s = list(ids)
             filter = "filter-src" if not reverse else "filter-dst"
         else:
             sha1s = []
             filter = "no-filter"
 
         if filter == "no-filter" or sha1s:
             rel_table = relation.value
             src_table, *_, dst_table = rel_table.split("_")
 
             sql = "SELECT * FROM swh_provenance_relation_get(%s, %s, %s, %s, %s)"
             with self.transaction(readonly=True) as cursor:
                 cursor.execute(
                     query=sql, vars=(rel_table, src_table, dst_table, filter, sha1s)
                 )
                 for row in cursor:
                     src = row.pop("src")
                     result.setdefault(src, set()).add(RelationData(**row))
         return result
 
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "with_path"})
     def with_path(self) -> bool:
         return "with-path" in self.flavor
diff --git a/swh/provenance/sql/60-indexes.sql b/swh/provenance/sql/60-indexes.sql
index 313fdbc..bf2c70d 100644
--- a/swh/provenance/sql/60-indexes.sql
+++ b/swh/provenance/sql/60-indexes.sql
@@ -1,19 +1,20 @@
 -- psql variables to get the current database flavor
 select position('denormalized' in swh_get_dbflavor()::text) = 0 as dbflavor_norm \gset
 
 -- create unique indexes (instead of pkey) because location might be null for
 -- the without-path flavor
 \if :dbflavor_norm
 create unique index on content_in_revision(content, revision, location);
 create unique index on directory_in_revision(directory, revision, location);
 create unique index on content_in_directory(content, directory, location);
 \else
 create unique index on content_in_revision(content);
 create unique index on directory_in_revision(directory);
 create unique index on content_in_directory(content);
 \endif
 
 create unique index on location(digest(path, 'sha1'));
+create index on directory(sha1) where flat=false;
 
 alter table revision_in_origin add primary key (revision, origin);
 alter table revision_before_revision add primary key (prev, next);
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
index 82f7257..7fcee50 100644
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -1,72 +1,105 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 from datetime import datetime, timezone
+from typing import Tuple
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
+from swh.provenance.directory import directory_add, directory_flatten_range
 from swh.provenance.interface import (
     DirectoryData,
     ProvenanceInterface,
     RelationData,
     RelationType,
 )
 from swh.provenance.model import DirectoryEntry, FileEntry
 from swh.provenance.tests.conftest import fill_storage, load_repo_data
 
 
-def test_directory_add(
-    provenance: ProvenanceInterface,
-    archive: ArchiveInterface,
-) -> None:
+def prepare(
+    provenance: ProvenanceInterface, archive: ArchiveInterface
+) -> Tuple[datetime, DirectoryEntry, FileEntry, FileEntry]:
+    """Prepare the provenance database with some content suitable for flattening tests"""
     # read data/README.md for more details on how these datasets are generated
     data = load_repo_data("cmdbts2")
     fill_storage(archive.storage, data)
 
     # just take a directory that is known to exists in cmdbts2
     directory = DirectoryEntry(
         id=hash_to_bytes("48007c961cc734d1f63886d0413a6dc605e3e2ea")
     )
     content1 = FileEntry(
         id=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), name=b"a"
     )
     content2 = FileEntry(
         id=hash_to_bytes("50e9cdb03f9719261dd39d7f2920b906db3711a3"), name=b"b"
     )
     date = datetime.fromtimestamp(1000000010, timezone.utc)
 
     # directory_add and the internal directory_flatten require the directory and its
     # content to be known by the provenance object. Otherwise, they do nothing
     provenance.directory_set_date_in_isochrone_frontier(directory, date)
     provenance.content_set_early_date(content1, date)
     provenance.content_set_early_date(content2, date)
     provenance.flush()
     assert provenance.storage.directory_get([directory.id]) == {
         directory.id: DirectoryData(date=date, flat=False)
     }
     assert provenance.storage.content_get([content1.id, content2.id]) == {
         content1.id: date,
         content2.id: date,
     }
 
     # this query forces the directory date to be retrieved from the storage and cached
     # (otherwise, the flush below won't update the directory flatten flag)
     flattenned = provenance.directory_already_flattenned(directory)
     assert flattenned is not None and not flattenned
 
+    return date, directory, content1, content2
+
+
+def test_directory_add(
+    provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
+) -> None:
+
+    date, directory, content1, content2 = prepare(provenance, archive)
+
     # flatten the directory and check the expected result
     directory_add(provenance, archive, [directory])
     assert provenance.storage.directory_get([directory.id]) == {
         directory.id: DirectoryData(date=date, flat=True)
     }
     assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == {
         content1.id: {
             RelationData(dst=directory.id, path=b"a"),
             RelationData(dst=directory.id, path=b"C/a"),
         },
         content2.id: {RelationData(dst=directory.id, path=b"C/b")},
     }
+
+
+def test_directory_flatten_range(
+    provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
+) -> None:
+
+    date, directory, content1, content2 = prepare(provenance, archive)
+
+    # flatten the directory and check the expected result
+    directory_flatten_range(provenance, archive, directory.id[:-1], directory.id)
+
+    assert provenance.storage.directory_get([directory.id]) == {
+        directory.id: DirectoryData(date=date, flat=True)
+    }
+    assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == {
+        content1.id: {
+            RelationData(dst=directory.id, path=b"a"),
+            RelationData(dst=directory.id, path=b"C/a"),
+        },
+        content2.id: {RelationData(dst=directory.id, path=b"C/b")},
+    }