diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,4 +7,5 @@
 plotly
 pandas
 numpy
+ndjson
 dulwich
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -76,7 +76,7 @@
 @click.option(
     "-f",
     "--format",
-    type=click.Choice(["text", "json", "sunburst"], case_sensitive=False),
+    type=click.Choice(["text", "json", "ndjson", "sunburst"], case_sensitive=False),
     default="text",
     help="select the output format",
 )
@@ -95,7 +95,7 @@
     loop = asyncio.get_event_loop()
     loop.run_until_complete(run(root_path, api_url, source_tree, sre_patterns))
 
-    source_tree.show(format)
+    source_tree.output(format)
 
 
 if __name__ == "__main__":
diff --git a/swh/scanner/model.py b/swh/scanner/model.py
--- a/swh/scanner/model.py
+++ b/swh/scanner/model.py
@@ -7,9 +7,11 @@
 import sys
 import json
 from pathlib import PosixPath
-from typing import Any, Dict, Tuple
+from typing import Any, Dict, Tuple, Iterable
 from enum import Enum
 
+import ndjson  # type: ignore
+
 from .plot import sunburst
 from .exceptions import InvalidObjectType
 
@@ -36,28 +38,32 @@
         self.path = path
         self.otype = DIRECTORY if path.is_dir() else CONTENT
         self.pid = ""
+        self.known = False
         self.children: Dict[PosixPath, Tree] = {}
 
-    def addNode(self, path: PosixPath, pid: str = None) -> None:
+    def addNode(self, path: PosixPath, pid: str, known: bool) -> None:
         """Recursively add a new path.
         """
         relative_path = path.relative_to(self.path)
 
         if relative_path == PosixPath("."):
-            if pid is not None:
-                self.pid = pid
+            self.pid = pid
+            self.known = known
             return
 
         new_path = self.path.joinpath(relative_path.parts[0])
         if new_path not in self.children:
             self.children[new_path] = Tree(new_path, self)
 
-        self.children[new_path].addNode(path, pid)
+        self.children[new_path].addNode(path, pid, known)
 
-    def show(self, format) -> None:
-        """Show tree in different formats"""
+    def output(self, format) -> None:
+        """Display the model with the specified format"""
         if format == "json":
-            print(json.dumps(self.getTree(), indent=4, sort_keys=True))
+            print(json.dumps(self.toDict(), indent=4, sort_keys=True))
+
+        elif format == "ndjson":
+            print(ndjson.dumps(dict_path for dict_path in self.iterate()))
 
         elif format == "text":
             isatty = sys.stdout.isatty()
@@ -82,7 +88,7 @@
         end = "/" if node.otype == DIRECTORY else ""
 
         if isatty:
-            if not node.pid:
+            if not node.known:
                 rel_path = colorize(rel_path, Color.red)
             elif node.otype == DIRECTORY:
                 rel_path = colorize(rel_path, Color.blue)
@@ -91,26 +97,42 @@
 
         print(f"{begin}{rel_path}{end}")
 
-    def getTree(self):
-        """Walk through the tree to discover content or directory that have
-        a persistent identifier. If a persistent identifier is found it saves
-        the path with the relative PID.
+    @property
+    def info(self):
+        """
+        Get information about the current path
 
         Returns:
-            child_tree: the tree with the content/directory found
+            a dictionary containing a path with its known/unknown status and the
+            Software Heritage persistent identifier
 
         """
-        child_tree = {}
-        for path, child_node in self.children.items():
-            rel_path = str(child_node.path.relative_to(self.path))
-            if child_node.pid:
-                child_tree[rel_path] = child_node.pid
-            else:
-                next_tree = child_node.getTree()
-                if next_tree:
-                    child_tree[rel_path] = next_tree
+        node_info = {}
+        node_info["swhid"] = self.pid
+        node_info["known"] = self.known
+        return {str(self.path): node_info}
+
+    def toDict(self, dict_nodes={}) -> Dict[str, Dict[str, Dict]]:
+        """
+            Groups each child inside a dictionary
+        """
+        for node_dict in self.iterate():
+            dict_nodes.update(node_dict)
+        return dict_nodes
 
-        return child_tree
+    def iterate(self) -> Iterable[Dict[str, Dict]]:
+        """
+            Recursively iterate through the children of the current node
+
+            Yields:
+                a dictionary containing a path with its known/unknown status and the
+                Software Heritage persistent identifier
+
+        """
+        for _, child_node in self.children.items():
+            yield child_node.info
+            if child_node.otype == DIRECTORY:
+                yield from child_node.iterate()
 
     def __getSubDirsInfo(self, root, directories):
         """Fills the directories given in input with the contents information
@@ -158,14 +180,14 @@
                 "Can't calculate contents of the " "object type: %s" % self.otype
             )
 
-        if self.pid:
+        if self.known:
             # to identify a directory with all files/directories present
             return (1, 1)
         else:
             for _, child_node in self.children.items():
                 if child_node.otype == CONTENT:
                     contents += 1
-                    if child_node.pid:
+                    if child_node.known:
                         discovered += 1
 
         return (contents, discovered)
diff --git a/swh/scanner/scanner.py b/swh/scanner/scanner.py
--- a/swh/scanner/scanner.py
+++ b/swh/scanner/scanner.py
@@ -164,18 +164,16 @@
     """
 
     async def _scan(root, session, api_url, source_tree, exclude_patterns):
-        for path, pid, found in await parse_path(
+        for path, pid, known in await parse_path(
             root, session, api_url, exclude_patterns
         ):
             obj_type = parse_persistent_identifier(pid).object_type
 
             if obj_type == CONTENT:
-                source_tree.addNode(path, pid if found else None)
+                source_tree.addNode(path, pid, known)
             elif obj_type == DIRECTORY and directory_filter(path, exclude_patterns):
-                if found:
-                    source_tree.addNode(path, pid)
-                else:
-                    source_tree.addNode(path)
+                source_tree.addNode(path, pid, known)
+                if not known:
                     await _scan(path, session, api_url, source_tree, exclude_patterns)
 
     async with aiohttp.ClientSession() as session:
diff --git a/swh/scanner/tests/conftest.py b/swh/scanner/tests/conftest.py
--- a/swh/scanner/tests/conftest.py
+++ b/swh/scanner/tests/conftest.py
@@ -89,7 +89,7 @@
 @pytest.fixture(scope="function")
 def example_tree(temp_folder):
     """Fixture that generate a Tree with the root present in the
-    session fixture "temp_folder".
+       session fixture "temp_folder".
     """
     example_tree = Tree(temp_folder["root"])
     assert example_tree.path == temp_folder["root"]
@@ -113,9 +113,9 @@
 
     for path, pid in temp_folder["paths"].items():
         if path in known_paths:
-            example_tree.addNode(path, pid)
+            example_tree.addNode(path, pid, True)
         else:
-            example_tree.addNode(path)
+            example_tree.addNode(path, pid, False)
 
     return example_tree.getDirectoriesInfo(root)
 
diff --git a/swh/scanner/tests/data.py b/swh/scanner/tests/data.py
--- a/swh/scanner/tests/data.py
+++ b/swh/scanner/tests/data.py
@@ -10,9 +10,12 @@
 }
 
 # present pids inside /data/sample-folder
-present_pids = [
+present_swhids = [
     "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a",  # quotes.md
     "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb",  # some-binary
     "swh:1:dir:9619a28687b2462efbb5be816bc1185b95753d93",  # barfoo2/
     "swh:1:dir:07d4d9ec5c406632d203dbd4631e7863612a0326",  # toexclude/
 ]
+
+
+to_exclude_swhid = "swh:1:dir:07d4d9ec5c406632d203dbd4631e7863612a0326"
diff --git a/swh/scanner/tests/data/sample-folder-result-no-toexclude.json b/swh/scanner/tests/data/sample-folder-result-no-toexclude.json
deleted file mode 100644
--- a/swh/scanner/tests/data/sample-folder-result-no-toexclude.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "foo": {
-        "quotes.md": "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"
-    },
-    "bar": {
-        "barfoo2": "swh:1:dir:9619a28687b2462efbb5be816bc1185b95753d93"
-    },
-    "link-to-foo": {
-        "quotes.md": "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"
-    },
-    "some-binary": "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb"
-}
diff --git a/swh/scanner/tests/data/sample-folder-result.json b/swh/scanner/tests/data/sample-folder-result.json
deleted file mode 100644
--- a/swh/scanner/tests/data/sample-folder-result.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "foo": {
-        "quotes.md": "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"
-    },
-    "bar": {
-        "barfoo2": "swh:1:dir:9619a28687b2462efbb5be816bc1185b95753d93"
-    },
-    "link-to-foo": {
-        "quotes.md": "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a"
-    },
-    "toexclude": "swh:1:dir:07d4d9ec5c406632d203dbd4631e7863612a0326",
-    "some-binary": "swh:1:cnt:68769579c3eaadbe555379b9c3538e6628bae1eb"
-}
diff --git a/swh/scanner/tests/flask_api.py b/swh/scanner/tests/flask_api.py
--- a/swh/scanner/tests/flask_api.py
+++ b/swh/scanner/tests/flask_api.py
@@ -5,7 +5,7 @@
 
 from flask import Flask, request
 
-from .data import present_pids
+from .data import present_swhids
 
 from swh.web.common.exc import LargePayloadExc
 
@@ -15,17 +15,18 @@
 
     @app.route("/known/", methods=["POST"])
     def known():
-        pids = request.get_json()
+        swhids = request.get_json()
+        max_requests = 100
 
-        if len(pids) > 900:
+        if len(swhids) > max_requests:
             raise LargePayloadExc(
-                "The maximum number of PIDs this endpoint " "can receive is 900"
+                "The maximum number of PIDs this endpoint " "can receive is 50"
             )
 
-        res = {pid: {"known": False} for pid in pids}
-        for pid in pids:
-            if pid in present_pids:
-                res[pid]["known"] = True
+        res = {swhid: {"known": False} for swhid in swhids}
+        for swhid in swhids:
+            if swhid in present_swhids:
+                res[swhid]["known"] = True
 
         return res
 
diff --git a/swh/scanner/tests/test_model.py b/swh/scanner/tests/test_model.py
--- a/swh/scanner/tests/test_model.py
+++ b/swh/scanner/tests/test_model.py
@@ -8,7 +8,7 @@
     avail_paths = temp_folder["paths"].keys()
 
     for path, pid in temp_folder["paths"].items():
-        example_tree.addNode(path, pid)
+        example_tree.addNode(path, pid, False)
 
     for path, node in example_tree.children.items():
         assert path in avail_paths
@@ -17,39 +17,41 @@
                 assert subpath in avail_paths
 
 
-def test_get_json_tree_all_not_present(example_tree, temp_folder):
+def test_to_json_no_one_present(example_tree, temp_folder):
     for path, pid in temp_folder["paths"].items():
-        example_tree.addNode(path)
+        example_tree.addNode(path, pid, False)
 
-    json_tree = example_tree.getTree()
+    result = example_tree.toDict()
 
-    assert len(json_tree) == 0
+    assert len(result) == 6
+
+    for _, node_info in result.items():
+        assert node_info["known"] is False
 
 
 def test_get_json_tree_all_present(example_tree, temp_folder):
     for path, pid in temp_folder["paths"].items():
-        example_tree.addNode(path, pid)
+        example_tree.addNode(path, pid, True)
+
+    result = example_tree.toDict()
 
-    tree_dict = example_tree.getTree()
+    assert len(result) == 6
 
-    assert len(tree_dict) == 3
-    # since subdir have a pid, it can't have a children path
-    assert tree_dict["subdir0"] is not dict
+    for _, node_info in result.items():
+        assert node_info["known"] is True
 
 
 def test_get_json_tree_only_one_present(example_tree, temp_folder):
+    root = temp_folder["root"]
     filesample_path = temp_folder["filesample"]
 
     for path, pid in temp_folder["paths"].items():
-        if path == filesample_path:
-            example_tree.addNode(path, pid)
-        else:
-            example_tree.addNode(path)
+        example_tree.addNode(path, pid, True if path == filesample_path else False)
 
-    tree_dict = example_tree.getTree()
+    result = example_tree.toDict()
 
-    assert len(tree_dict) == 1
-    assert tree_dict["subdir0"]["filesample.txt"]
+    assert len(result) == 6
+    assert result[str(root) + "/subdir0/filesample.txt"]["known"] is True
 
 
 def test_get_directories_info(example_tree, temp_folder):
@@ -61,9 +63,9 @@
 
     for path, pid in temp_folder["paths"].items():
         if path == filesample_path or path == filesample2_path:
-            example_tree.addNode(path, pid)
+            example_tree.addNode(path, pid, True)
         else:
-            example_tree.addNode(path)
+            example_tree.addNode(path, pid, False)
 
     directories = example_tree.getDirectoriesInfo(example_tree.path)
 
diff --git a/swh/scanner/tests/test_scanner.py b/swh/scanner/tests/test_scanner.py
--- a/swh/scanner/tests/test_scanner.py
+++ b/swh/scanner/tests/test_scanner.py
@@ -7,7 +7,7 @@
 import json
 from pathlib import PosixPath
 
-from .data import correct_api_response
+from .data import correct_api_response, present_swhids, to_exclude_swhid
 
 from swh.scanner.scanner import pids_discovery, get_subpaths, run
 from swh.scanner.model import Tree
@@ -45,8 +45,8 @@
 
     api_url = live_server.url() + "/"
     request = [
-        "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a" for i in range(901)
-    ]  # /known/ is limited at 900
+        "swh:1:cnt:7c4c57ba9ff496ad179b8f65b1d286edbda34c9a" for i in range(101)
+    ]  # /known/ maximum accepted requests 100
 
     with pytest.raises(APIError):
         event_loop.run_until_complete(pids_discovery(request, aiosession, api_url))
@@ -73,30 +73,24 @@
 def test_scanner_result(live_server, event_loop, test_folder):
     api_url = live_server.url() + "/"
 
-    result_path = test_folder.joinpath(PosixPath("sample-folder-result.json"))
-    with open(result_path, "r") as json_file:
-        expected_result = json.loads(json_file.read())
-
     sample_folder = test_folder.joinpath(PosixPath("sample-folder"))
 
     source_tree = Tree(sample_folder)
-    event_loop.run_until_complete(run(sample_folder, api_url, source_tree, tuple()))
-
-    actual_result = source_tree.getTree()
+    event_loop.run_until_complete(run(sample_folder, api_url, source_tree, set()))
 
-    assert actual_result == expected_result
+    for node_dict in source_tree.iterate():
+        node_info = list(node_dict.values())[0]
+        if node_info["swhid"] in present_swhids:
+            assert node_info["known"] is True
+        else:
+            assert node_info["known"] is False
 
 
 def test_scanner_result_with_exclude_patterns(live_server, event_loop, test_folder):
     api_url = live_server.url() + "/"
 
-    result_path = test_folder.joinpath(
-        PosixPath("sample-folder-result-no-toexclude.json")
-    )
-    with open(result_path, "r") as json_file:
-        expected_result = json.loads(json_file.read())
-
     sample_folder = test_folder.joinpath(PosixPath("sample-folder"))
+
     patterns = (str(sample_folder) + "/toexclude",)
     exclude_pattern = {
         reg_obj for reg_obj in extract_regex_objs(sample_folder, patterns)
@@ -107,6 +101,6 @@
         run(sample_folder, api_url, source_tree, exclude_pattern)
     )
 
-    actual_result = source_tree.getTree()
-
-    assert actual_result == expected_result
+    for node_dict in source_tree.iterate():
+        node_info = list(node_dict.values())[0]
+        assert node_info["swhid"] != to_exclude_swhid