diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -388,6 +388,50 @@
         client.close()
 
 
+@cli.group(name="directory")
+@click.pass_context
+def directory(ctx: click.core.Context):
+    from . import get_archive, get_provenance
+
+    archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
+    provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
+
+    ctx.obj["provenance"] = provenance
+    ctx.obj["archive"] = archive
+
+
+@directory.command(name="flatten")
+@click.option(
+    "--range-from", type=str, help="start ID of the range of directories to flatten"
+)
+@click.option(
+    "--range-to", type=str, help="stop ID of the range of directories to flatten"
+)
+@click.option(
+    "-s",
+    "--min-size",
+    default=0,
+    type=int,
+    help="""Set the minimum size (in bytes) of files to be indexed.
+    Any smaller file will be ignored.""",
+)
+@click.pass_context
+def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
+    from swh.provenance.directory import directory_flatten_range
+
+    provenance = ctx.obj["provenance"]
+    archive = ctx.obj["archive"]
+
+    directory_flatten_range(
+        provenance,
+        archive,
+        hash_to_bytes(range_from),
+        hash_to_bytes(range_to),
+        min_size,
+    )
+
+
+# old (deprecated) commands
 @cli.command(name="iter-frontiers")
 @click.argument("filename")
 @click.option(
diff --git a/swh/provenance/directory.py b/swh/provenance/directory.py
--- a/swh/provenance/directory.py
+++ b/swh/provenance/directory.py
@@ -40,6 +40,28 @@
             yield DirectoryEntry(id)
 
 
+def directory_flatten_range(
+    provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
+    start_id: Sha1Git,
+    end_id: Sha1Git,
+    minsize: int = 0,
+    commit: bool = True,
+) -> None:
+    """XXX"""
+    current = start_id
+    while current < end_id:
+        dirs = provenance.storage.directory_iter_not_flattenned(
+            limit=100, start_id=current
+        )
+        if not dirs:
+            break
+        directory_add(
+            provenance, archive, [DirectoryEntry(id=d) for d in dirs], minsize, commit
+        )
+        current = dirs[-1]
+
+
 @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
 def directory_add(
     provenance: ProvenanceInterface,
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -9,7 +9,7 @@
 from datetime import datetime
 import enum
 from types import TracebackType
-from typing import Dict, Generator, Iterable, Optional, Set, Type, Union
+from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union
 
 from typing_extensions import Protocol, runtime_checkable
 
@@ -138,6 +138,13 @@
         """
         ...
 
+    @remote_api_endpoint("directory_iter_not_flattenned")
+    def directory_iter_not_flattenned(
+        self, limit: int, start_id: Sha1Git
+    ) -> List[Sha1Git]:
+        """Retrieve the unflattenned directories"""
+        ...
+
     @remote_api_endpoint("entity_get_all")
     def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
         """Retrieve all sha1 ids for entities of type `entity` present in the provenance
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/postgresql/provenance.py
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/postgresql/provenance.py
@@ -194,6 +194,20 @@
                 )
         return result
 
+    @statsd.timed(
+        metric=STORAGE_DURATION_METRIC, tags={"method": "directory_iter_not_flattenned"}
+    )
+    def directory_iter_not_flattenned(
+        self, limit: int, start_id: Sha1Git
+    ) -> List[Sha1Git]:
+        sql = """
+        SELECT sha1 FROM directory
+        WHERE flat=false AND sha1>%s ORDER BY sha1 LIMIT %s
+        """
+        with self.transaction(readonly=True) as cursor:
+            cursor.execute(query=sql, vars=(start_id, limit))
+            return [row["sha1"] for row in cursor]
+
     @statsd.timed(metric=STORAGE_DURATION_METRIC, tags={"method": "entity_get_all"})
     def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
         with self.transaction(readonly=True) as cursor:
diff --git a/swh/provenance/sql/60-indexes.sql b/swh/provenance/sql/60-indexes.sql
--- a/swh/provenance/sql/60-indexes.sql
+++ b/swh/provenance/sql/60-indexes.sql
@@ -14,6 +14,7 @@
 \endif
 
 create unique index on location(digest(path, 'sha1'));
+create index on directory(sha1) where flat=false;
 
 alter table revision_in_origin add primary key (revision, origin);
 alter table revision_before_revision add primary key (prev, next);
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -8,7 +8,7 @@
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
+from swh.provenance.directory import directory_add, directory_flatten_range
 from swh.provenance.interface import (
     DirectoryData,
     ProvenanceInterface,
@@ -70,3 +70,58 @@
         },
         content2.id: {RelationData(dst=directory.id, path=b"C/b")},
     }
+
+
+def test_directory_flatten_range(
+    provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
+) -> None:
+    # read data/README.md for more details on how these datasets are generated
+    data = load_repo_data("cmdbts2")
+    fill_storage(archive.storage, data)
+
+    # just take a directory that is known to exists in cmdbts2
+    directory = DirectoryEntry(
+        id=hash_to_bytes("48007c961cc734d1f63886d0413a6dc605e3e2ea")
+    )
+    content1 = FileEntry(
+        id=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"), name=b"a"
+    )
+    content2 = FileEntry(
+        id=hash_to_bytes("50e9cdb03f9719261dd39d7f2920b906db3711a3"), name=b"b"
+    )
+    date = datetime.fromtimestamp(1000000010, timezone.utc)
+
+    # directory_add and the internal directory_flatten require the directory and its
+    # content to be known by the provenance object. Otherwise, they do nothing
+    provenance.directory_set_date_in_isochrone_frontier(directory, date)
+    provenance.content_set_early_date(content1, date)
+    provenance.content_set_early_date(content2, date)
+    provenance.flush()
+
+    assert provenance.storage.directory_get([directory.id]) == {
+        directory.id: DirectoryData(date=date, flat=False)
+    }
+    assert provenance.storage.content_get([content1.id, content2.id]) == {
+        content1.id: date,
+        content2.id: date,
+    }
+
+    # this query forces the directory date to be retrieved from the storage and cached
+    # (otherwise, the flush below won't update the directory flatten flag)
+    flattenned = provenance.directory_already_flattenned(directory)
+    assert flattenned is not None and not flattenned
+
+    # flatten the directory and check the expected result
+    directory_flatten_range(provenance, archive, directory.id[:-1], directory.id)
+
+    assert provenance.storage.directory_get([directory.id]) == {
+        directory.id: DirectoryData(date=date, flat=True)
+    }
+    assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == {
+        content1.id: {
+            RelationData(dst=directory.id, path=b"a"),
+            RelationData(dst=directory.id, path=b"C/a"),
+        },
+        content2.id: {RelationData(dst=directory.id, path=b"C/b")},
+    }