diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -142,7 +142,9 @@
     "--policy",
     default="auto",
     show_default=True,
-    type=click.Choice(["auto", "bfs", "greedybfs", "filepriority", "dirpriority"]),
+    type=click.Choice(
+        ["auto", "bfs", "greedybfs", "filepriority", "dirpriority", "randomdir"]
+    ),
     help="The scan policy.",
 )
 @click.option(
@@ -178,6 +180,8 @@
       dirpriority: scan all the source code directories and check only unknown
       directory contents.
 
+      randomdir: scan the source code using a random Merkle search on directories.
+
     Other information about software artifacts could be specified with the -e/
     --extra-info option:\n
     \b
diff --git a/swh/scanner/policy.py b/swh/scanner/policy.py
--- a/swh/scanner/policy.py
+++ b/swh/scanner/policy.py
@@ -4,16 +4,20 @@
 # See top-level LICENSE file for more information
 
 import abc
-from typing import no_type_check
+import itertools
+from typing import Iterable, List, no_type_check
 
 from swh.core.utils import grouper
-from swh.model.from_disk import Directory
+from swh.loader.core import discovery
+from swh.model import from_disk
+from swh.model.from_disk import model
+from swh.model.model import Sha1Git
 
 from .client import QUERY_LIMIT, Client
 from .data import MerkleNodeInfo
 
 
-def source_size(source_tree: Directory):
+def source_size(source_tree: from_disk.Directory):
     """return the size of a source tree as the number of nodes it contains"""
     return sum(1 for n in source_tree.iter_tree(dedup=False))
 
@@ -23,10 +27,10 @@
     data: MerkleNodeInfo
     """information about contents and directories of the merkle tree"""
 
-    source_tree: Directory
+    source_tree: from_disk.Directory
     """representation of a source code project directory in the merkle tree"""
 
-    def __init__(self, source_tree: Directory, data: MerkleNodeInfo):
+    def __init__(self, source_tree: from_disk.Directory, data: MerkleNodeInfo):
         self.source_tree = source_tree
         self.data = data
 
@@ -232,20 +236,86 @@
                 "known"
             ]
 
-    def has_contents(self, directory: Directory):
+    def has_contents(self, directory: from_disk.Directory):
         """Check if the directory given in input has contents"""
         for entry in directory.entries:
             if entry["type"] == "file":
                 return True
         return False
 
-    def get_contents(self, dir_: Directory):
+    def get_contents(self, dir_: from_disk.Directory):
         """Get all the contents of a given directory"""
         for _, node in list(dir_.items()):
             if node.object_type == "content":
                 yield node
 
 
+class WebAPIConnection(discovery.ArchiveDiscoveryInterface):
+    """Use the web APIs to query the archive"""
+
+    def __init__(
+        self,
+        contents: List[model.Content],
+        skipped_contents: List[model.SkippedContent],
+        directories: List[model.Directory],
+        client: Client,
+    ) -> None:
+        self.contents = contents
+        self.skipped_contents = skipped_contents
+        self.directories = directories
+        self.client = client
+        self.sha_to_swhid = {}
+        self.swhid_to_sha = {}
+        for content in contents:
+            self.sha_to_swhid[content.sha1_git] = str(content.swhid())
+            self.swhid_to_sha[str(content.swhid())] = content.sha1_git
+
+        for directory in directories:
+            self.sha_to_swhid[directory.id] = str(directory.swhid())
+            self.swhid_to_sha[str(directory.swhid())] = directory.id
+
+    async def content_missing(self, contents: List[Sha1Git]) -> List[Sha1Git]:
+        """List content missing from the archive by sha1"""
+        return await self._missing(contents)
+
+    async def skipped_content_missing(
+        self, skipped_contents: List[Sha1Git]
+    ) -> Iterable[Sha1Git]:
+        """List skipped content missing from the archive by sha1"""
+        # TODO what should we do about skipped contents?
+        return skipped_contents
+
+    async def directory_missing(self, directories: List[Sha1Git]) -> Iterable[Sha1Git]:
+        """List directories missing from the archive by sha1"""
+        return await self._missing(directories)
+
+    async def _missing(self, shas):
+        res = await self.client.known([self.sha_to_swhid[o] for o in shas])
+        return [self.swhid_to_sha[k] for k, v in res.items() if not v["known"]]
+
+
+class RandomDirSamplingPriority(Policy):
+    """Check the Merkle tree querying random directories. Set all ancestors to
+    unknown for unknown directories, otherwise set all descendants to known.
+    Finally check all the remaining file contents.
+    """
+
+    @no_type_check
+    async def run(self, client: Client):
+        contents, skipped_contents, directories = from_disk.iter_directory(
+            self.source_tree
+        )
+
+        get_unknowns = discovery.filter_known_objects(
+            WebAPIConnection(contents, skipped_contents, directories, client),
+        )
+
+        unknowns = set(itertools.chain(*await get_unknowns))
+
+        for obj in itertools.chain(contents, skipped_contents, directories):
+            self.data[obj.swhid()]["known"] = obj not in unknowns
+
+
 class QueryAll(Policy):
     """Check the status of every node in the Merkle tree."""
 
diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py
--- a/swh/scanner/scanner.py
+++ b/swh/scanner/scanner.py
@@ -21,6 +21,7 @@
     GreedyBFS,
     LazyBFS,
     QueryAll,
+    RandomDirSamplingPriority,
     source_size,
 )
 
@@ -66,6 +67,8 @@
         return FilePriority(source_tree, nodes_data)
     elif policy == "dirpriority":
         return DirectoryPriority(source_tree, nodes_data)
+    elif policy == "randomdir":
+        return RandomDirSamplingPriority(source_tree, nodes_data)
     else:
         raise Exception(f"policy '{policy}' not found")