diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -139,7 +139,7 @@
     "--policy",
     default="auto",
     show_default=True,
-    type=click.Choice(["auto", "bfs", "filepriority", "dirpriority"]),
+    type=click.Choice(["auto", "bfs", "greedybfs", "filepriority", "dirpriority"]),
     help="The scan policy.",
 )
 @click.pass_context
@@ -155,6 +155,9 @@
 
     bfs: scan the source code in the BFS order, checking unknown directories only.
 
+    greedybfs: same as "bfs" policy, but lookup the status of source code artifacts in
+    chunks, in order to minimize the number of Web API round-trips with the archive.
+
     filepriority: scan all the source code file contents, checking only unset
     directories. (useful if the codebase contains a lot of source files)
 
diff --git a/swh/scanner/policy.py b/swh/scanner/policy.py
--- a/swh/scanner/policy.py
+++ b/swh/scanner/policy.py
@@ -10,6 +10,7 @@
 
 import aiohttp
 
+from swh.core.utils import grouper
 from swh.model.from_disk import Directory
 from swh.model.identifiers import CONTENT, DIRECTORY, CoreSWHID
 
@@ -67,6 +68,12 @@
         return await make_request(swhids)
 
 
+def source_size(source_tree: Directory):
+    """return the size of a source tree as the number of nodes it contains
+    """
+    return sum(1 for n in source_tree.iter_tree(dedup=False))
+
+
 class Policy(metaclass=abc.ABCMeta):
 
     data: MerkleNodeInfo
@@ -120,6 +127,48 @@
                             self.data[sub_node.swhid()]["known"] = True  # type: ignore
 
 
+class GreedyBFS(Policy):
+    """Query graph nodes in chunks (to maximize the Web API rate limit use) and set the
+       downstream contents of known directories to known.
+    """
+
+    async def run(
+        self, session: aiohttp.ClientSession, api_url: str,
+    ):
+        ssize = source_size(self.source_tree)
+        seen = []
+
+        async for nodes_chunk in self.get_nodes_chunks(session, api_url, ssize):
+            for node in nodes_chunk:
+                seen.append(node)
+                if len(seen) == ssize:
+                    return
+                if node.object_type == DIRECTORY and self.data[node.swhid()]["known"]:
+                    sub_nodes = [n for n in node.iter_tree(dedup=False)]
+                    sub_nodes.remove(node)  # remove root node
+                    for sub_node in sub_nodes:
+                        seen.append(sub_node)
+                        self.data[sub_node.swhid()]["known"] = True
+
+    @no_type_check
+    async def get_nodes_chunks(
+        self, session: aiohttp.ClientSession, api_url: str, ssize: int
+    ):
+        """Query chunks of QUERY_LIMIT nodes at once in order to fill the Web API
+           rate limit. It query all the nodes in the case the source code contains
+           less than QUERY_LIMIT nodes.
+        """
+        nodes = self.source_tree.iter_tree(dedup=False)
+        for nodes_chunk in grouper(nodes, QUERY_LIMIT):
+            nodes_chunk = [n for n in nodes_chunk]
+            swhids = [node.swhid() for node in nodes_chunk]
+            swhids_res = await swhids_discovery(swhids, session, api_url)
+            for node in nodes_chunk:
+                swhid = node.swhid()
+                self.data[swhid]["known"] = swhids_res[str(swhid)]["known"]
+            yield nodes_chunk
+
+
 class FilePriority(Policy):
     """Check the Merkle tree querying all the file contents and set all the upstream
        directories to unknown in the case a file content is unknown.
diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py
--- a/swh/scanner/scanner.py
+++ b/swh/scanner/scanner.py
@@ -13,7 +13,15 @@
 
 from .data import MerkleNodeInfo
 from .output import Output
-from .policy import QUERY_LIMIT, DirectoryPriority, FilePriority, LazyBFS, QueryAll
+from .policy import (
+    QUERY_LIMIT,
+    DirectoryPriority,
+    FilePriority,
+    GreedyBFS,
+    LazyBFS,
+    QueryAll,
+    source_size,
+)
 
 
 async def run(config: Dict[str, Any], policy) -> None:
@@ -35,10 +43,6 @@
         await policy.run(session, api_url)
 
 
-def source_size(source_tree: Directory):
-    return len([n for n in source_tree.iter_tree(dedup=False)])
-
-
 def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str):
     if policy == "auto":
         return (
@@ -48,6 +52,8 @@
         )
     elif policy == "bfs":
         return LazyBFS(source_tree, nodes_data)
+    elif policy == "greedybfs":
+        return GreedyBFS(source_tree, nodes_data)
     elif policy == "filepriority":
         return FilePriority(source_tree, nodes_data)
     elif policy == "dirpriority":
diff --git a/swh/scanner/tests/conftest.py b/swh/scanner/tests/conftest.py
--- a/swh/scanner/tests/conftest.py
+++ b/swh/scanner/tests/conftest.py
@@ -7,6 +7,7 @@
 import os
 from pathlib import Path
 import shutil
+import sys
 
 import aiohttp
 from aioresponses import aioresponses  # type: ignore
@@ -78,13 +79,17 @@
     """Generate a model.from_disk.Directory from a "big" temporary directory
        (more than 1000 nodes)
     """
+    # workaround to avoid a RecursionError that could be generated while creating
+    # a large number of directories
+    sys.setrecursionlimit(1100)
     dir_ = tmp_path / "big-directory"
-    dir_.mkdir()
+    sub_dirs = dir_
     for i in range(0, QUERY_LIMIT + 1):
-        file_ = dir_ / f"file_{i}.org"
-        file_.touch()
+        sub_dirs = sub_dirs / "dir"
+    sub_dirs.mkdir(parents=True, exist_ok=True)
+    file_ = sub_dirs / "file.org"
+    file_.touch()
     dir_obj = model_of_dir(str(dir_).encode())
-    assert len(dir_obj) > QUERY_LIMIT
     return dir_obj
 
 
diff --git a/swh/scanner/tests/flask_api.py b/swh/scanner/tests/flask_api.py
--- a/swh/scanner/tests/flask_api.py
+++ b/swh/scanner/tests/flask_api.py
@@ -6,6 +6,7 @@
 from flask import Flask, request
 
 from swh.scanner.exceptions import LargePayloadExc
+from swh.scanner.policy import QUERY_LIMIT
 
 from .data import unknown_swhids
 
@@ -24,9 +25,10 @@
             for swhid in swhids:
                 f.write(swhid + "\n")
 
-        if len(swhids) > 900:
+        if len(swhids) > QUERY_LIMIT:
             raise LargePayloadExc(
-                "The maximum number of SWHIDs this endpoint can receive is 900"
+                f"The maximum number of SWHIDs this endpoint can receive is "
+                f"{QUERY_LIMIT}"
             )
 
         res = {swhid: {"known": False} for swhid in swhids}
diff --git a/swh/scanner/tests/test_policy.py b/swh/scanner/tests/test_policy.py
--- a/swh/scanner/tests/test_policy.py
+++ b/swh/scanner/tests/test_policy.py
@@ -8,13 +8,15 @@
 from flask import url_for
 import pytest
 
-from swh.model.identifiers import CoreSWHID, ObjectType
+from swh.model.identifiers import CONTENT, CoreSWHID, ObjectType
 from swh.scanner.data import MerkleNodeInfo
 from swh.scanner.exceptions import APIError
 from swh.scanner.policy import (
     DirectoryPriority,
     FilePriority,
+    GreedyBFS,
     LazyBFS,
+    source_size,
     swhids_discovery,
 )
 
@@ -47,17 +49,6 @@
         )
 
 
-def test_scanner_raise_apierror_input_size_limit(event_loop, aiosession, live_server):
-
-    api_url = url_for("index", _external=True)
-    request = [
-        "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a" for i in range(901)
-    ]  # /known/ is limited at 900
-
-    with pytest.raises(APIError):
-        event_loop.run_until_complete(swhids_discovery(request, aiosession, api_url))
-
-
 def test_scanner_directory_priority_has_contents(source_tree):
     nodes_data = MerkleNodeInfo()
     policy = DirectoryPriority(source_tree, nodes_data)
@@ -143,3 +134,35 @@
 
     for swhid in backend_swhids_requests[5:]:
         assert CoreSWHID.from_string(swhid).object_type == ObjectType.DIRECTORY
+
+
+def test_greedy_bfs_policy(
+    live_server, event_loop, aiosession, big_source_tree, tmp_requests
+):
+    open(tmp_requests, "w").close()
+    api_url = url_for("index", _external=True)
+
+    nodes_data = MerkleNodeInfo()
+    policy = GreedyBFS(big_source_tree, nodes_data)
+    event_loop.run_until_complete(policy.run(aiosession, api_url))
+
+    backend_swhids_requests = get_backend_swhids_order(tmp_requests)
+
+    last_swhid = backend_swhids_requests[-1]
+    assert CoreSWHID.from_string(last_swhid).object_type == ObjectType.CONTENT
+
+
+@pytest.mark.asyncio
+async def test_greedy_bfs_get_nodes_chunks(live_server, aiosession, big_source_tree):
+    api_url = url_for("index", _external=True)
+
+    nodes_data = MerkleNodeInfo()
+    policy = GreedyBFS(big_source_tree, nodes_data)
+    chunks = [
+        n_chunk
+        async for n_chunk in policy.get_nodes_chunks(
+            aiosession, api_url, source_size(big_source_tree)
+        )
+    ]
+    assert len(chunks) == 2
+    assert chunks[1][-1].object_type == CONTENT