diff --git a/benchmark.py b/benchmark.py
new file mode 100755
--- /dev/null
+++ b/benchmark.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import logging
+import os
+from pathlib import Path
+import subprocess
+import sys
+from tempfile import TemporaryDirectory
+from typing import Set
+
+import click
+
+SEED_OPTIONS = ["-s 10"]
+
+
+def get_scenario_cmd(algo, kb_url, kb_label, origin_info, extracted_repo_path):
+    return [
+        "swh",
+        "scanner",
+        "benchmark",
+        "--algo",
+        algo,
+        "--api-url",
+        kb_url,
+        "--backend-name",
+        kb_label,
+        "--origin",
+        origin_info["origin"],
+        "--commit",
+        origin_info["commit"],
+        "--exclude",
+        str(extracted_repo_path) + "/.git",
+        str(extracted_repo_path),
+    ]
+
+
+def run_experiments(
+    repo_path: str, temp_path: str, kb_state_file: str, algos: Set[str]
+):
+    """This function create a process for each experiment; one experiment is composed
+    by: the repository we want to scan, the algorithms we need to test and different
+    known-backends mapped in a "kb-state" file (given in input)
+    """
+    dirpath, dnames, _ = next(os.walk(temp_path))
+    extracted_repo_path = Path(dirpath).joinpath(dnames[0])
+
+    # get all the backends identifier and api URLs
+    backends = {}
+    with open(kb_state_file, "r") as kb_state_f:
+        for kb in kb_state_f.readlines():
+            if kb.startswith("#"):
+                continue
+            elems = kb.split(" ")
+            backends[elems[0]] = elems[1]
+
+    # get repository origin info from the "base_directory"
+    info_path = repo_path[:-7] + "info.json"
+    with open(info_path, "r") as json_file:
+        origin_info = json.load(json_file)
+
+    scenario_cmds = []
+
+    for algo in algos:
+        for kb_label, kb_url in backends.items():
+            if algo == "random":
+                for seed_opt in SEED_OPTIONS:
+                    random_cmd = get_scenario_cmd(
+                        algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
+                    )
+                    scenario_cmds.append(random_cmd + [seed_opt])
+            else:
+                scenario_cmds.append(
+                    get_scenario_cmd(
+                        algo, kb_url, kb_label, origin_info, str(extracted_repo_path)
+                    )
+                )
+
+    processes = [
+        subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
+        for cmd in scenario_cmds
+    ]
+
+    for proc in processes:
+        proc.wait()
+
+
+@click.command(
+    help="""Run multiple benchmark from an input repository. The repository
+            will be unpacked in the provided temporary path and tested with
+            the input algorithms."""
+)
+@click.argument("repo_path", type=click.Path(exists=True), required=True)
+@click.argument("temp_path", type=click.Path(exists=True), required=True)
+@click.argument("kb_state", type=click.Path(exists=True), required=True)
+@click.option(
+    "-a",
+    "--algo",
+    "algos",
+    multiple=True,
+    required=True,
+    type=click.Choice(
+        ["stopngo", "file_priority", "directory_priority", "random", "algo_min"],
+        case_sensitive=False,
+    ),
+    metavar="ALGORITHM_NAME",
+    help="The algorithm name for the benchmark.",
+)
+def main(repo_path, temp_path, kb_state, algos):
+    logging.basicConfig(
+        filename="experiments.log",
+        format="%(asctime)s %(message)s",
+        datefmt="%m/%d/%Y %I:%M:%S %p",
+    )
+    try:
+        repo_id = Path(repo_path).parts[-1].split(".")[0]
+        with TemporaryDirectory(prefix=repo_id + "_", dir=temp_path) as tmp_dir:
+            subprocess.run(
+                ["tar", "xf", repo_path, "-C", tmp_dir, "--strip-components=1"],
+                check=True,
+                stdout=subprocess.DEVNULL,
+                stderr=sys.stderr,
+            )
+            run_experiments(repo_path, temp_path, kb_state, set(algos))
+    except Exception as e:
+        logging.exception(e)
+    except IOError as ioerror:
+        logging.exception(ioerror)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/run_backend.sh b/run_backend.sh
new file mode 100755
--- /dev/null
+++ b/run_backend.sh
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+# This script simply runs multiple scanner backend using the backend information provided from stdin.
+
+while IFS= read -r line;
+do
+    kb_info=($line)
+    if [[ ${kb_info[0]} = "#" ]]
+    then
+        continue
+    else
+        gunicorn "-b" ${kb_info[1]:7:14} 'swh.scanner.backend:create_app("'${kb_info[2]}'")' &
+    fi
+
+done
diff --git a/run_benchmark.sh b/run_benchmark.sh
new file mode 100755
--- /dev/null
+++ b/run_benchmark.sh
@@ -0,0 +1,37 @@
+#!/usr/bin/env bash
+
+# This script run the benchmark for the repositories taken from stdin
+# You should provide:
+# - temporary directory (where the repositories will be unpacked)
+# - backend mapping file containing: the backend name, the backend api
+#   URL and the sqlite db file.
+# - the algorithms to be executed
+#
+# USAGE EXAMPLE:
+# find /repositories/dir -name '*.tar.zst' | ./run_benchmark.sh /temporary/dir kb_state.txt stopngo file_priority
+
+
+temp_dir=$1
+kb_state=$2
+
+if [ ! -d "$temp_dir" ]; then
+    echo "You should provide a valid temporary directory path"
+    exit 1
+fi
+
+if [ "$kb_state" == '' ]; then
+    echo "You should provide the file with mapped knowledge bases"
+    exit 1
+fi
+
+for i in "${@:3}"; do
+    algos="$algos -a $i"
+done
+
+# print headers
+echo "repo_id,origin,commit_id,kb_state,repo_size,algorithm_name,kb_queries,swhids_queried"
+
+while IFS= read -r repo;
+do
+    ./benchmark.py $repo $temp_dir $kb_state $algos
+done
diff --git a/swh/scanner/backend.py b/swh/scanner/backend.py
--- a/swh/scanner/backend.py
+++ b/swh/scanner/backend.py
@@ -3,6 +3,8 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from pathlib import Path
+
 from flask import Flask, request
 
 from .db import Db
@@ -11,10 +13,11 @@
 LIMIT = 1000
 
 
-def create_app(db: Db):
+def create_app(db_file: str):
     """Backend for swh-scanner, implementing the /known endpoint of the
        Software Heritage Web API"""
     app = Flask(__name__)
+    db = Db(Path(db_file))
 
     @app.route("/api/1/known/", methods=["POST"])
     def known():
@@ -34,8 +37,13 @@
     return app
 
 
-def run(host: str, port: int, db: Db):
+def run(host: str, port: int, db_file: str):
     """Serve the local database
     """
-    app = create_app(db)
-    app.run(host, port, debug=True)
+    # from .db import Db
+
+    # db = Db(db_file)
+    # app = create_app(db)
+    # app.run(host, port, debug=False)
+    # db.close()
+    pass
diff --git a/swh/scanner/benchmark_algos.py b/swh/scanner/benchmark_algos.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/benchmark_algos.py
@@ -0,0 +1,396 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import collections
+import itertools
+import json
+import os
+from pathlib import Path
+import random
+from typing import Dict, Iterable, List, Optional
+
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+
+from swh.model.from_disk import Content, Directory, accept_all_directories
+from swh.model.identifiers import CONTENT, DIRECTORY, swhid
+
+from .exceptions import APIError
+from .model import Status, Tree
+from .scanner import directory_filter, extract_regex_objs
+
+session = requests.Session()
+retries_rule = Retry(total=5, backoff_factor=1)
+session.mount("http://", HTTPAdapter(max_retries=retries_rule))
+
+
+def query_swhids(
+    swhids: List[Tree], api_url: str, counter: Optional[collections.Counter] = None
+) -> Dict[str, Dict[str, bool]]:
+    """
+    Returns:
+        A dictionary with:
+        key(str): persistent identifier
+        value(dict):
+            value['known'] = True if pid is found
+            value['known'] = False if pid is not found
+    """
+    endpoint = api_url + "known/"
+    chunk_size = 1000
+
+    if counter:
+        counter["queries"] += len(swhids)
+
+    def make_request(swhids):
+        swhids = [swhid.swhid for swhid in swhids]
+        req = session.post(endpoint, json=swhids)
+        if req.status_code != 200:
+            error_message = "%s with given values %s" % (req.text, str(swhids))
+            raise APIError(error_message)
+        if counter:
+            counter["api_calls"] += 1
+        resp = req.text
+        return json.loads(resp)
+
+    def get_chunk(swhids):
+        for i in range(0, len(swhids), chunk_size):
+            yield swhids[i : i + chunk_size]
+
+    if len(swhids) > chunk_size:
+        return dict(
+            itertools.chain.from_iterable(
+                make_request(swhids_chunk).items() for swhids_chunk in get_chunk(swhids)
+            )
+        )
+    else:
+        return make_request(swhids)
+
+
+def stopngo(source_tree: Tree, api_url: str, counter: collections.Counter):
+    nodes = []
+    nodes.append(source_tree)
+
+    while len(nodes) > 0:
+        parsed_nodes = query_swhids(nodes, api_url, counter)
+        for node in nodes.copy():
+            nodes.remove(node)
+            node.known = parsed_nodes[node.swhid]["known"]
+            node.status = Status.queried
+            if node.otype == DIRECTORY:
+                if not node.known:
+                    nodes.extend(list(node.children.values()))
+                else:
+                    set_children_status(node, [CONTENT, DIRECTORY], True)
+
+
+def set_father_status(node, known):
+    """
+    Recursively change father known and visited status of a given node
+    """
+    parent = node.father
+
+    if parent is None:
+        return
+    if parent.status != Status.unset:
+        return
+
+    parent.known = known
+    set_father_status(parent, known)
+
+
+def set_children_status(
+    node: Tree, node_types: Iterable[str], known: bool, status: Status = Status.unset
+):
+    """
+    Recursively change the status of the children of the provided node
+    """
+    for child_node in node.iterate():
+        if child_node.otype in node_types and child_node.status == status:
+            child_node.known = known
+
+
+def file_priority(source_tree: Tree, api_url: str, counter: collections.Counter):
+    # get all the files
+    all_contents = list(
+        filter(lambda node: node.otype == CONTENT, source_tree.iterate_bfs())
+    )
+    all_contents.reverse()  # we check nodes from the deepest
+
+    # query the backend to get all file contents status
+    parsed_contents = query_swhids(all_contents, api_url, counter)
+    # set all the file contents status
+    for cnt in all_contents:
+        cnt.known = parsed_contents[cnt.swhid]["known"]
+        cnt.status = Status.queried
+        # set all the upstream directories of unknown file contents to unknown
+        if not cnt.known:
+            set_father_status(cnt, False)
+
+    # get all unset directories and check their status
+    # (update children directories accordingly)
+    unset_dirs = list(
+        filter(
+            lambda node: node.otype == DIRECTORY and node.status == Status.unset,
+            source_tree.iterate(),
+        )
+    )
+
+    if source_tree.status == Status.unset:
+        unset_dirs.append(source_tree)
+
+    # check unset directories
+    for dir_ in unset_dirs:
+        if dir_.status == Status.unset:
+            # update directory status
+            dir_.known = query_swhids([dir_], api_url, counter)[dir_.swhid]["known"]
+            dir_.status = Status.queried
+            if dir_.known:
+                set_children_status(dir_, [DIRECTORY], True)
+
+
+def directory_priority(source_tree: Tree, api_url: str, counter: collections.Counter):
+    # get all directory contents that have at least one file content
+    unset_dirs = list(
+        filter(
+            lambda dir_: dir_.otype == DIRECTORY and dir_.has_contents,
+            source_tree.iterate_bfs(),
+        )
+    )
+    unset_dirs.reverse()
+
+    for dir_ in unset_dirs:
+        # if the directory is known set all the downstream file contents to known
+        if dir_.status == Status.unset:
+            dir_.known = query_swhids([dir_], api_url, counter)[dir_.swhid]["known"]
+            dir_.status = Status.queried
+            if dir_.known:
+                set_children_status(dir_, [CONTENT], True)
+            else:
+                set_father_status(dir_, False)
+
+    # get remaining directories that have no file contents
+    unset_dirs_no_cnts = list(
+        filter(
+            lambda node: node.otype == DIRECTORY and not node.has_contents,
+            source_tree.iterate_bfs(),
+        )
+    )
+    parsed_dirs_no_cnts = query_swhids(unset_dirs_no_cnts, api_url, counter)
+
+    # update status of directories that have no file contents
+    for dir_ in unset_dirs_no_cnts:
+        dir_.known = parsed_dirs_no_cnts[dir_.swhid]["known"]
+        dir_.status = Status.queried
+
+    # check unknown file contents
+    unset_files = list(
+        filter(
+            lambda node: node.otype == CONTENT and node.status == Status.unset,
+            source_tree.iterate(),
+        )
+    )
+    parsed_unset_files = query_swhids(unset_files, api_url, counter)
+
+    for file_ in unset_files:
+        file_.known = parsed_unset_files[file_.swhid]["known"]
+        file_.status = Status.queried
+
+
+def random_(
+    source_tree: Tree,
+    api_url: str,
+    counter: collections.Counter,
+    seed: Optional[int] = None,
+):
+
+    if seed:
+        random.seed(seed)
+    # get all directory/file contents
+    all_nodes = [node for node in source_tree.iterate()] + [source_tree]
+    # shuffle contents
+    random.shuffle(all_nodes)
+
+    while len(all_nodes):
+        node = all_nodes.pop()
+
+        if node.status != Status.unset:
+            continue
+
+        node.known = query_swhids([node], api_url, counter)[node.swhid]["known"]
+        node.status = Status.queried
+        if node.otype == DIRECTORY and node.known:
+            for child_node in node.iterate():
+                child_node.known = True
+        elif node.otype == CONTENT and not node.known:
+            set_father_status(node, False)
+
+
+def algo_min(source_tree: Tree, api_url: str):
+    """
+    The minimal number of queries knowing the known/unknown status of every node
+    """
+
+    def remove_parents(node, nodes):
+        parent = node.father
+        if parent is None or parent not in nodes:
+            return
+        else:
+            nodes.remove(parent)
+            remove_parents(parent, nodes)
+
+    def remove_children(node, nodes):
+        for child_node in node.iterate():
+            nodes.remove(child_node)
+
+    all_nodes = [node for node in source_tree.iterate_bfs()]
+
+    parsed_nodes = query_swhids(all_nodes, api_url)
+    for node in all_nodes:
+        node.known = parsed_nodes[node.swhid]["known"]
+
+    all_nodes_copy = all_nodes.copy()
+
+    for node in all_nodes:
+        if node.otype == CONTENT and not node.known:
+            remove_parents(node, all_nodes_copy)
+
+    all_nodes.reverse()
+    for node in all_nodes:
+        if node.otype == DIRECTORY and not node.known:
+            remove_parents(node, all_nodes_copy)
+
+    for node in all_nodes_copy:
+        if node.otype == DIRECTORY and node.known:
+            remove_children(node, all_nodes_copy)
+
+    return len(all_nodes_copy)
+
+
+def get_swhids(paths: Iterable[Path], exclude_patterns):
+    def swhid_of(path):
+        if path.is_dir():
+            if exclude_patterns:
+
+                def dir_filter(dirpath, *args):
+                    return directory_filter(dirpath, exclude_patterns)
+
+            else:
+                dir_filter = accept_all_directories
+
+            obj = Directory.from_disk(
+                path=bytes(path), dir_filter=dir_filter
+            ).get_data()
+
+            return swhid(DIRECTORY, obj)
+        else:
+            obj = Content.from_file(path=bytes(path)).get_data()
+            return swhid(CONTENT, obj)
+
+    for path in paths:
+        yield str(path), swhid_of(path)
+
+
+def load_source(root, sre_patterns):
+    """
+    Load the source code inside the Tree data structure
+    """
+
+    def _scan(root_path, source_tree, sre_patterns):
+        files = []
+        dirs = []
+        for elem in os.listdir(root_path):
+            cnt = Path(root_path).joinpath(elem)
+            if not os.path.islink(cnt):
+                if os.path.isfile(cnt):
+                    files.append(cnt)
+                elif os.path.isdir(cnt):
+                    dirs.append(cnt)
+
+        if files:
+            parsed_file_swhids = dict(get_swhids(files, sre_patterns))
+
+            for path, swhid_ in parsed_file_swhids.items():
+                source_tree.add_node(Path(path), swhid_)
+
+        if dirs:
+            parsed_dirs_swhids = dict(get_swhids(dirs, sre_patterns))
+
+            for path, swhid_ in parsed_dirs_swhids.items():
+                if not directory_filter(path, sre_patterns):
+                    continue
+                source_tree.add_node(Path(path), swhid_)
+                _scan(path, source_tree, sre_patterns)
+
+    source_tree = Tree(root)
+    root_swhid = dict(get_swhids([root], sre_patterns))
+    source_tree.swhid = root_swhid[str(root)]
+    _scan(root, source_tree, sre_patterns)
+    return source_tree
+
+
+def run(
+    root: str,
+    api_url: str,
+    backend_name: str,
+    exclude_patterns: Iterable[str],
+    algo: str,
+    origin: str,
+    commit: str,
+    seed: Optional[int] = None,
+):
+    sre_patterns = set()
+    if exclude_patterns:
+        sre_patterns = {
+            reg_obj for reg_obj in extract_regex_objs(Path(root), exclude_patterns)
+        }
+
+    # temporary directory prefix
+    repo_id = Path(root).parts[-1].split("_")[0]
+    counter: collections.Counter = collections.Counter()
+    counter["api_calls"] = 0
+    counter["queries"] = 0
+    source_tree = load_source(Path(root), sre_patterns)
+
+    if algo == "random":
+        if seed:
+            random_(source_tree, api_url, counter, seed)
+        else:
+            random_(source_tree, api_url, counter)
+    elif algo == "algo_min":
+        min_queries = algo_min(source_tree, api_url)
+        min_result = (
+            repo_id,
+            origin,
+            commit,
+            backend_name,
+            len(source_tree),
+            algo,
+            -1,
+            min_queries,
+        )
+        print(*min_result, sep=",")
+        return
+    elif algo == "stopngo":
+        stopngo(source_tree, api_url, counter)
+    elif algo == "file_priority":
+        file_priority(source_tree, api_url, counter)
+    elif algo == "directory_priority":
+        directory_priority(source_tree, api_url, counter)
+    else:
+        raise Exception(f'Algorithm "{algo}" not found')
+
+    result = (
+        repo_id,
+        origin,
+        commit,
+        backend_name,
+        len(source_tree),
+        algo,
+        counter["api_calls"],
+        counter["queries"],
+    )
+
+    print(*result, sep=",")
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -234,6 +234,79 @@
     db.close()
 
 
+@scanner.command()
+@click.argument("root_path", required=True, type=click.Path(exists=True))
+@click.option(
+    "-u",
+    "--api-url",
+    default=None,
+    metavar="API_URL",
+    show_default=True,
+    required=True,
+    help="URL for the api request.",
+)
+@click.option(
+    "-n",
+    "--backend-name",
+    default=None,
+    metavar="BACKEND_NAME",
+    show_default=True,
+    required=True,
+    help="The backend name.",
+)
+@click.option(
+    "--origin",
+    "-o",
+    "origin_url",
+    metavar="ORIGIN_URL",
+    required=True,
+    help="Repository origin url.",
+)
+@click.option(
+    "--commit", "-c", metavar="COMMIT", required=True, help="Commit identifier.",
+)
+@click.option(
+    "--exclude",
+    "-x",
+    "patterns",
+    metavar="PATTERN",
+    multiple=True,
+    show_default=True,
+    help="Exclude directories using glob patterns \
+    (e.g., '*.git' to exclude all .git directories).",
+)
+@click.option(
+    "--algo", "-a", metavar="ALGO NAME", required=True, help="Algorithm name.",
+)
+@click.option(
+    "--seed", "-s", metavar="SEED", type=int, help="Seed for the random algorithm"
+)
+@click.pass_context
+def benchmark(
+    ctx, root_path, api_url, backend_name, origin_url, commit, patterns, algo, seed
+):
+    from importlib import reload
+    import logging
+
+    from swh.scanner.benchmark_algos import run
+
+    # reload logging module avoid conflict with benchmark.py logging
+    reload(logging)
+    logging.basicConfig(
+        filename="experiments.log",
+        format="%(asctime)s %(message)s",
+        datefmt="%m/%d/%Y %I:%M:%S %p",
+    )
+
+    try:
+        run(root_path, api_url, backend_name, patterns, algo, origin_url, commit, seed)
+    except Exception as e:
+        logging.exception(
+            f'Repository: "{root_path}" using "{algo}" '
+            f'algorithm on "{api_url}" FAILED: {e}'
+        )
+
+
 def main():
     return scanner(auto_envvar_prefix="SWH_SCANNER")
 
diff --git a/swh/scanner/model.py b/swh/scanner/model.py
--- a/swh/scanner/model.py
+++ b/swh/scanner/model.py
@@ -22,6 +22,8 @@
 class Color(Enum):
     blue = "\033[94m"
     green = "\033[92m"
+    yellow = "\033[93m"
+    magenta = "\033[95m"
     red = "\033[91m"
     end = "\033[0m"
 
@@ -30,6 +32,12 @@
     return color.value + text + Color.end.value
 
 
+class Status(Enum):
+    unset = 0
+    set = 1
+    queried = 2
+
+
 class Tree:
     """Representation of a file system structure
     """
@@ -40,23 +48,26 @@
         self.otype = DIRECTORY if path.is_dir() else CONTENT
         self.swhid = ""
         self.known = False
+        self.status = Status.unset
         self.children: Dict[Path, Tree] = {}
 
-    def add_node(self, path: Path, swhid: str, known: bool) -> None:
+    def __len__(self):
+        return sum(1 for node in self.iterate()) + 1  # the root node
+
+    def add_node(self, path: Path, swhid: str) -> None:
         """Recursively add a new path.
         """
         relative_path = path.relative_to(self.path)
 
         if relative_path == Path("."):
             self.swhid = swhid
-            self.known = known
             return
 
         new_path = self.path.joinpath(relative_path.parts[0])
         if new_path not in self.children:
             self.children[new_path] = Tree(new_path, self)
 
-        self.children[new_path].add_node(path, swhid, known)
+        self.children[new_path].add_node(path, swhid)
 
     def show(self, fmt) -> None:
         """Show tree in different formats"""
@@ -90,15 +101,28 @@
         end = "/" if node.otype == DIRECTORY else ""
 
         if isatty:
-            if not node.known:
-                rel_path = colorize(rel_path, Color.red)
-            elif node.otype == DIRECTORY:
+            if node.status == Status.unset:
+                rel_path = colorize(rel_path, Color.magenta)
+            elif node.status == Status.set and not node.known:
+                rel_path = colorize(rel_path, Color.yellow)
+            elif node.status == Status.set and node.known:
                 rel_path = colorize(rel_path, Color.blue)
-            elif node.otype == CONTENT:
+            elif node.status == Status.queried and not node.known:
+                rel_path = colorize(rel_path, Color.red)
+            elif node.status == Status.queried and node.known:
                 rel_path = colorize(rel_path, Color.green)
 
         print(f"{begin}{rel_path}{end}")
 
+    @property
+    def known(self):
+        return self._known
+
+    @known.setter
+    def known(self, value: bool):
+        self._known = value
+        self.status = Status.set
+
     @property
     def attributes(self) -> Dict[str, Dict[str, Any]]:
         """
@@ -158,6 +182,19 @@
             if child_node.otype == DIRECTORY:
                 yield from child_node.iterate()
 
+    def iterate_bfs(self) -> Iterator[Tree]:
+        """Get nodes in BFS order
+        """
+        nodes = []
+        nodes.append(self)
+
+        while len(nodes) > 0:
+            for node in nodes.copy():
+                yield node
+                nodes.remove(node)
+                if node.otype == DIRECTORY:
+                    nodes.extend(list(node.children.values()))
+
     def get_files_from_dir(self, dir_path: Path) -> List:
         """
         Retrieve files information about a specific directory path
@@ -249,3 +286,9 @@
             if child_node.otype == DIRECTORY:
                 return True
         return False
+
+    def has_contents(self) -> bool:
+        for _, child_node in self.children.items():
+            if child_node.otype == CONTENT:
+                return True
+        return False