diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py
index d9d8919..309f982 100644
--- a/swh/provenance/graph.py
+++ b/swh/provenance/graph.py
@@ -1,275 +1,275 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from __future__ import annotations
 
 from datetime import datetime, timezone
 import os
 from typing import Any, Dict, Optional, Set
 
 from swh.core.statsd import statsd
 from swh.model.hashutil import hash_to_hex
 from swh.model.model import Sha1Git
 
 from .archive import ArchiveInterface
 from .interface import ProvenanceInterface
 from .model import DirectoryEntry, RevisionEntry
 
 GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
 GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
 
 UTCMIN = datetime.min.replace(tzinfo=timezone.utc)
 
 
 class HistoryNode:
     def __init__(
         self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False
     ) -> None:
         self.entry = entry
         # A revision is `is_head` if it is directly pointed by an origin (ie. a head
         # revision for some snapshot)
         self.is_head = is_head
         # A revision is `in_history` if it appears in the history graph of an already
         # processed revision in the provenance database
         self.in_history = in_history
         # XXX: the current simplified version of the origin-revision layer algorithm
         # does not use this previous two flags at all. They are kept for now but might
         # be removed in the future (hence, RevisionEntry might be used instead of
         # HistoryNode).
 
     def __str__(self) -> str:
         return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>"
 
     def as_dict(self) -> Dict[str, Any]:
         return {
             "rev": hash_to_hex(self.entry.id),
             "is_head": self.is_head,
             "in_history": self.in_history,
         }
 
 
 class HistoryGraph:
     @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
     def __init__(
         self,
-        archive: ArchiveInterface,
         provenance: ProvenanceInterface,
+        archive: ArchiveInterface,
         revision: RevisionEntry,
     ) -> None:
         self._head = HistoryNode(
             revision,
             is_head=provenance.revision_visited(revision),
             in_history=provenance.revision_in_history(revision),
         )
         self._graph: Dict[HistoryNode, Set[HistoryNode]] = {}
 
         stack = [self._head]
         while stack:
             current = stack.pop()
 
             if current not in self._graph:
                 self._graph[current] = set()
                 current.entry.retrieve_parents(archive)
                 for parent in current.entry.parents:
                     node = HistoryNode(
                         parent,
                         is_head=provenance.revision_visited(parent),
                         in_history=provenance.revision_in_history(parent),
                     )
                     self._graph[current].add(node)
                     stack.append(node)
 
     @property
     def head(self) -> HistoryNode:
         return self._head
 
     @property
     def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]:
         return self._graph
 
     def __str__(self) -> str:
         return f"<HistoryGraph: head={self._head}, graph={self._graph}"
 
     def as_dict(self) -> Dict[str, Any]:
         return {
             "head": self.head.as_dict(),
             "graph": {
                 hash_to_hex(node.entry.id): sorted(
                     [parent.as_dict() for parent in parents],
                     key=lambda d: d["rev"],
                 )
                 for node, parents in self._graph.items()
             },
         }
 
 
 class IsochroneNode:
     def __init__(
         self,
         entry: DirectoryEntry,
         dbdate: Optional[datetime] = None,
         depth: int = 0,
         prefix: bytes = b"",
     ) -> None:
         self.entry = entry
         self.depth = depth
 
         # dbdate is the maxdate for this node that comes from the DB
         self._dbdate: Optional[datetime] = dbdate
 
         # maxdate is set by the maxdate computation algorithm
         self.maxdate: Optional[datetime] = None
 
         # known is True if this node is already known in the db; either because
         # the current directory actually exists in the database, or because all
         # the content of the current directory is known (subdirectories and files)
         self.known = self.dbdate is not None
         self.invalid = False
         self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name
         self.children: Set[IsochroneNode] = set()
 
     @property
     def dbdate(self) -> Optional[datetime]:
         # use a property to make this attribute (mostly) read-only
         return self._dbdate
 
     def invalidate(self) -> None:
         statsd.increment(
             metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"}
         )
         self._dbdate = None
         self.maxdate = None
         self.known = False
         self.invalid = True
 
     def add_directory(
         self, child: DirectoryEntry, date: Optional[datetime] = None
     ) -> IsochroneNode:
         # we should not be processing this node (ie add subdirectories or files) if it's
         # actually known by the provenance DB
         assert self.dbdate is None
         node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path)
         self.children.add(node)
         return node
 
     def __str__(self) -> str:
         return (
             f"<{self.entry}: depth={self.depth}, "
             f"dbdate={self.dbdate}, maxdate={self.maxdate}, "
             f"known={self.known}, invalid={self.invalid}, path={self.path!r}, "
             f"children=[{', '.join(str(child) for child in self.children)}]>"
         )
 
     def __eq__(self, other: Any) -> bool:
         return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__
 
     def __hash__(self) -> int:
         # only immutable attributes are considered to compute hash
         return hash((self.entry, self.depth, self.path))
 
 
 @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_isochrone_graph"})
 def build_isochrone_graph(
-    archive: ArchiveInterface,
     provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
     revision: RevisionEntry,
     directory: DirectoryEntry,
     minsize: int = 0,
 ) -> IsochroneNode:
     assert revision.date is not None
     assert revision.root == directory.id
 
     # this function process a revision in 2 steps:
     #
     # 1. build the tree structure of IsochroneNode objects (one INode per
     #    directory under the root directory of the revision but not following
     #    known subdirectories), and gather the dates from the DB for already
     #    known objects; for files, just keep all the dates in a global 'fdates'
     #    dict; note that in this step, we will only recurse the directories
     #    that are not already known.
     #
     # 2. compute the maxdate for each node of the tree that was not found in the DB.
 
     # Build the nodes structure
     root_date = provenance.directory_get_date_in_isochrone_frontier(directory)
     root = IsochroneNode(directory, dbdate=root_date)
     stack = [root]
     fdates: Dict[Sha1Git, datetime] = {}  # map {file_id: date}
     while stack:
         current = stack.pop()
         if current.dbdate is None or current.dbdate > revision.date:
             # If current directory has an associated date in the isochrone frontier that
             # is greater or equal to the current revision's one, it should be ignored as
             # the revision is being processed out of order.
             if current.dbdate is not None and current.dbdate > revision.date:
                 current.invalidate()
 
             # Pre-query all known dates for directories in the current directory
             # for the provenance object to have them cached and (potentially) improve
             # performance.
             current.entry.retrieve_children(archive, minsize=minsize)
             ddates = provenance.directory_get_dates_in_isochrone_frontier(
                 current.entry.dirs
             )
             for dir in current.entry.dirs:
                 # Recursively analyse subdirectory nodes
                 node = current.add_directory(dir, date=ddates.get(dir.id, None))
                 stack.append(node)
 
             fdates.update(provenance.content_get_early_dates(current.entry.files))
 
     # Precalculate max known date for each node in the graph (only directory nodes are
     # pushed to the stack).
     stack = [root]
 
     while stack:
         current = stack.pop()
         # Current directory node is known if it already has an assigned date (ie. it was
         # already seen as an isochrone frontier).
         if current.known:
             assert current.maxdate is None
             current.maxdate = current.dbdate
         else:
             if any(x.maxdate is None for x in current.children):
                 # at least one child of current has no maxdate yet
                 # Current node needs to be analysed again after its children.
                 stack.append(current)
                 for child in current.children:
                     if child.maxdate is None:
                         # if child.maxdate is None, it must be processed
                         stack.append(child)
             else:
                 # all the files and directories under current have a maxdate,
                 # we can infer the maxdate for current directory
                 assert current.maxdate is None
                 # if all content is already known, update current directory info.
                 current.maxdate = max(
                     [UTCMIN]
                     + [
                         child.maxdate
                         for child in current.children
                         if child.maxdate is not None  # unnecessary, but needed for mypy
                     ]
                     + [
                         fdates.get(file.id, revision.date)
                         for file in current.entry.files
                     ]
                 )
                 if current.maxdate <= revision.date:
                     current.known = (
                         # true if all subdirectories are known
                         all(child.known for child in current.children)
                         # true if all files are in fdates, i.e. if all files were known
                         # *before building this isochrone graph node*
                         # Note: the 'all()' is lazy: will stop iterating as soon as
                         # possible
                         and all((file.id in fdates) for file in current.entry.files)
                     )
                 else:
                     # at least one content is being processed out-of-order, then current
                     # node should be treated as unknown
                     current.maxdate = revision.date
                     current.known = False
     return root
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
index c7e05e6..5d24568 100644
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -1,103 +1,103 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from itertools import islice
 from typing import Generator, Iterable, Iterator, List, Optional, Tuple
 
 from swh.core.statsd import statsd
 from swh.model.model import Sha1Git
 
 from .archive import ArchiveInterface
 from .graph import HistoryGraph
 from .interface import ProvenanceInterface
 from .model import OriginEntry, RevisionEntry
 
 ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
 
 
 class CSVOriginIterator:
     """Iterator over origin visit statuses typically present in the given CSV
     file.
 
     The input is an iterator that produces 2 elements per row:
 
       (url, snap)
 
     where:
     - url: is the origin url of the visit
     - snap: sha1_git of the snapshot pointed by the visit status
     """
 
     def __init__(
         self,
         statuses: Iterable[Tuple[str, Sha1Git]],
         limit: Optional[int] = None,
     ) -> None:
         self.statuses: Iterator[Tuple[str, Sha1Git]]
         if limit is not None:
             self.statuses = islice(statuses, limit)
         else:
             self.statuses = iter(statuses)
 
     def __iter__(self) -> Generator[OriginEntry, None, None]:
         return (OriginEntry(url, snapshot) for url, snapshot in self.statuses)
 
 
 @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "main"})
 def origin_add(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     origins: List[OriginEntry],
 ) -> None:
     for origin in origins:
         provenance.origin_add(origin)
         origin.retrieve_revisions(archive)
         for revision in origin.revisions:
-            graph = HistoryGraph(archive, provenance, revision)
+            graph = HistoryGraph(provenance, archive, revision)
             origin_add_revision(provenance, origin, graph)
     provenance.flush()
 
 
 @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"})
 def origin_add_revision(
     provenance: ProvenanceInterface,
     origin: OriginEntry,
     graph: HistoryGraph,
 ) -> None:
     # XXX: simplified version of the origin-revision algorithm. This is generating flat
     # models for the history of all head revisions. No previous result is reused now!
     # The previous implementation was missing some paths from origins to certain
     # revisions due to a wrong reuse logic.
 
     # head is treated separately since it should always be added to the given origin
     check_preferred_origin(provenance, origin, graph.head.entry)
     provenance.revision_add_to_origin(origin, graph.head.entry)
     visited = {graph.head}
 
     # head's history should be recursively iterated starting from its parents
     stack = list(graph.parents[graph.head])
     while stack:
         current = stack.pop()
         check_preferred_origin(provenance, origin, current.entry)
 
         # create a link between it and the head, and recursively walk its history
         provenance.revision_add_before_revision(graph.head.entry, current.entry)
         visited.add(current)
         for parent in graph.parents[current]:
             if parent not in visited:
                 stack.append(parent)
 
 
 @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "check_preferred_origin"})
 def check_preferred_origin(
     provenance: ProvenanceInterface,
     origin: OriginEntry,
     revision: RevisionEntry,
 ) -> None:
     # if the revision has no preferred origin just set the given origin as the
     # preferred one. TODO: this should be improved in the future!
     preferred = provenance.revision_get_preferred_origin(revision)
     if preferred is None:
         provenance.revision_set_preferred_origin(origin, revision)
diff --git a/swh/provenance/revision.py b/swh/provenance/revision.py
index 3f7527f..a6c81ec 100644
--- a/swh/provenance/revision.py
+++ b/swh/provenance/revision.py
@@ -1,246 +1,246 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import os
 from typing import Generator, Iterable, Iterator, List, Optional, Tuple
 
 from swh.core.statsd import statsd
 from swh.model.model import Sha1Git
 
 from .archive import ArchiveInterface
 from .graph import IsochroneNode, build_isochrone_graph
 from .interface import ProvenanceInterface
 from .model import DirectoryEntry, RevisionEntry
 
 REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
 
 
 class CSVRevisionIterator:
     """Iterator over revisions typically present in the given CSV file.
 
     The input is an iterator that produces 3 elements per row:
 
       (id, date, root)
 
     where:
     - id: is the id (sha1_git) of the revision
     - date: is the author date
     - root: sha1 of the directory
     """
 
     def __init__(
         self,
         revisions: Iterable[Tuple[Sha1Git, datetime, Sha1Git]],
         limit: Optional[int] = None,
     ) -> None:
         self.revisions: Iterator[Tuple[Sha1Git, datetime, Sha1Git]]
         if limit is not None:
             from itertools import islice
 
             self.revisions = islice(revisions, limit)
         else:
             self.revisions = iter(revisions)
 
     def __iter__(self) -> Generator[RevisionEntry, None, None]:
         for id, date, root in self.revisions:
             if date.tzinfo is None:
                 date = date.replace(tzinfo=timezone.utc)
             yield RevisionEntry(id, date=date, root=root)
 
 
 @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"})
 def revision_add(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     revisions: List[RevisionEntry],
     trackall: bool = True,
     lower: bool = True,
     mindepth: int = 1,
     minsize: int = 0,
     commit: bool = True,
 ) -> None:
     for revision in revisions:
         assert revision.date is not None
         assert revision.root is not None
         # Processed content starting from the revision's root directory.
         date = provenance.revision_get_date(revision)
         if date is None or revision.date < date:
             graph = build_isochrone_graph(
-                archive,
                 provenance,
+                archive,
                 revision,
                 DirectoryEntry(revision.root),
                 minsize=minsize,
             )
             revision_process_content(
-                archive,
                 provenance,
+                archive,
                 revision,
                 graph,
                 trackall=trackall,
                 lower=lower,
                 mindepth=mindepth,
                 minsize=minsize,
             )
     if commit:
         provenance.flush()
 
 
 @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"})
 def revision_process_content(
-    archive: ArchiveInterface,
     provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
     revision: RevisionEntry,
     graph: IsochroneNode,
     trackall: bool = True,
     lower: bool = True,
     mindepth: int = 1,
     minsize: int = 0,
 ) -> None:
     assert revision.date is not None
     provenance.revision_add(revision)
 
     stack = [graph]
     while stack:
         current = stack.pop()
         if current.dbdate is not None:
             assert current.dbdate <= revision.date
             if trackall:
                 # Current directory is an outer isochrone frontier for a previously
                 # processed revision. It should be reused as is.
                 provenance.directory_add_to_revision(
                     revision, current.entry, current.path
                 )
         else:
             assert current.maxdate is not None
             # Current directory is not an outer isochrone frontier for any previous
             # revision. It might be eligible for this one.
             if is_new_frontier(
                 current,
                 revision=revision,
                 trackall=trackall,
                 lower=lower,
                 mindepth=mindepth,
             ):
                 # Outer frontier should be moved to current position in the isochrone
                 # graph. This is the first time this directory is found in the isochrone
                 # frontier.
                 provenance.directory_set_date_in_isochrone_frontier(
                     current.entry, current.maxdate
                 )
                 if trackall:
                     provenance.directory_add_to_revision(
                         revision, current.entry, current.path
                     )
                     flatten_directory(
-                        archive, provenance, current.entry, minsize=minsize
+                        provenance, archive, current.entry, minsize=minsize
                     )
             else:
                 # If current node is an invalidated frontier, update its date for future
                 # revisions to get the proper value.
                 if current.invalid:
                     provenance.directory_set_date_in_isochrone_frontier(
                         current.entry, current.maxdate
                     )
                 # No point moving the frontier here. Either there are no files or they
                 # are being seen for the first time here. Add all blobs to current
                 # revision updating date if necessary, and recursively analyse
                 # subdirectories as candidates to the outer frontier.
                 for blob in current.entry.files:
                     date = provenance.content_get_early_date(blob)
                     if date is None or revision.date < date:
                         provenance.content_set_early_date(blob, revision.date)
                     provenance.content_add_to_revision(revision, blob, current.path)
                 for child in current.children:
                     stack.append(child)
 
 
 @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"})
 def flatten_directory(
-    archive: ArchiveInterface,
     provenance: ProvenanceInterface,
+    archive: ArchiveInterface,
     directory: DirectoryEntry,
     minsize: int = 0,
 ) -> None:
     """Recursively retrieve all the files of 'directory' and insert them in the
     'provenance' database in the 'content_to_directory' table.
     """
     stack = [(directory, b"")]
     while stack:
         current, prefix = stack.pop()
         current.retrieve_children(archive, minsize=minsize)
         for f_child in current.files:
             # Add content to the directory with the computed prefix.
             provenance.content_add_to_directory(directory, f_child, prefix)
         for d_child in current.dirs:
             # Recursively walk the child directory.
             stack.append((d_child, os.path.join(prefix, d_child.name)))
 
 
 def is_new_frontier(
     node: IsochroneNode,
     revision: RevisionEntry,
     trackall: bool = True,
     lower: bool = True,
     mindepth: int = 1,
 ) -> bool:
     assert node.maxdate is not None  # for mypy
     assert revision.date is not None  # idem
     if trackall:
         # The only real condition for a directory to be a frontier is that its content
         # is already known and its maxdate is less (or equal) than current revision's
         # date. Checking mindepth is meant to skip root directories (or any arbitrary
         # depth) to improve the result. The option lower tries to maximize the reuse
         # rate of previously defined  frontiers by keeping them low in the directory
         # tree.
         return (
             node.known
             and node.maxdate <= revision.date  # all content is earlier than revision
             and node.depth
             >= mindepth  # current node is deeper than the min allowed depth
             and (has_blobs(node) if lower else True)  # there is at least one blob in it
         )
     else:
         # If we are only tracking first occurrences, we want to ensure that all first
         # occurrences end up in the content_early_in_rev relation. Thus, we force for
         # every blob outside a frontier to have an strictly earlier date.
         return (
             node.maxdate < revision.date  # all content is earlier than revision
             and node.depth >= mindepth  # deeper than the min allowed depth
             and (has_blobs(node) if lower else True)  # there is at least one blob
         )
 
 
 def has_blobs(node: IsochroneNode) -> bool:
     # We may want to look for files in different ways to decide whether to define a
     # frontier or not:
     # 1. Only files in current node:
     return any(node.entry.files)
     # 2. Files anywhere in the isochrone graph
     # stack = [node]
     # while stack:
     #     current = stack.pop()
     #     if any(
     #         map(lambda child: isinstance(child.entry, FileEntry), current.children)):
     #         return True
     #     else:
     #         # All children are directory entries.
     #         stack.extend(current.children)
     # return False
     # 3. Files in the intermediate directories between current node and any previously
     #    defined frontier:
     # TODO: complete this case!
     # return any(
     #     map(lambda child: isinstance(child.entry, FileEntry), node.children)
     # ) or all(
     #     map(
     #         lambda child: (
     #             not (isinstance(child.entry, DirectoryEntry) and child.date is None)
     #         )
     #         or has_blobs(child),
     #         node.children,
     #     )
     # )
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
index 808c4f2..ca721e1 100644
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -1,55 +1,55 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 import yaml
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.graph import HistoryGraph
 from swh.provenance.interface import ProvenanceInterface
 from swh.provenance.model import OriginEntry, RevisionEntry
 from swh.provenance.origin import origin_add_revision
 from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
 
 
 @pytest.mark.parametrize(
     "repo, visit",
     (("with-merges", "visits-01"),),
 )
 @pytest.mark.parametrize("batch", (True, False))
 def test_history_graph(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     repo: str,
     visit: str,
     batch: bool,
 ) -> None:
     # read data/README.md for more details on how these datasets are generated
     data = load_repo_data(repo)
     fill_storage(archive.storage, data)
 
     filename = f"history_graphs_{repo}_{visit}.yaml"
 
     with open(get_datafile(filename)) as file:
         for expected in yaml.full_load(file):
             entry = OriginEntry(expected["origin"], hash_to_bytes(expected["snapshot"]))
             provenance.origin_add(entry)
 
             for expected_graph_as_dict in expected["graphs"]:
                 print("Expected graph:", expected_graph_as_dict)
 
                 computed_graph = HistoryGraph(
-                    archive,
                     provenance,
+                    archive,
                     RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"]["rev"])),
                 )
                 print("Computed graph:", computed_graph.as_dict())
                 assert computed_graph.as_dict() == expected_graph_as_dict
 
                 origin_add_revision(provenance, entry, computed_graph)
 
             if not batch:
                 provenance.flush()
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
index 79374d1..364a564 100644
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -1,114 +1,114 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from copy import deepcopy
 from datetime import datetime, timezone
 from typing import Any, Dict
 
 import pytest
 import yaml
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.graph import IsochroneNode, build_isochrone_graph
 from swh.provenance.interface import ProvenanceInterface
 from swh.provenance.model import DirectoryEntry, RevisionEntry
 from swh.provenance.revision import revision_add
 from swh.provenance.tests.conftest import (
     fill_storage,
     get_datafile,
     load_repo_data,
     ts2dt,
 )
 
 
 def isochrone_graph_from_dict(d: Dict[str, Any], depth: int = 0) -> IsochroneNode:
     """Takes a dictionary representing a tree of IsochroneNode objects, and
     recursively builds the corresponding graph."""
     d = deepcopy(d)
 
     d["entry"]["id"] = hash_to_bytes(d["entry"]["id"])
     d["entry"]["name"] = bytes(d["entry"]["name"], encoding="utf-8")
 
     dbdate = d.get("dbdate", None)
     if dbdate is not None:
         dbdate = datetime.fromtimestamp(d["dbdate"], timezone.utc)
 
     children = d.get("children", [])
 
     node = IsochroneNode(
         entry=DirectoryEntry(**d["entry"]),
         dbdate=dbdate,
         depth=depth,
     )
     node.maxdate = datetime.fromtimestamp(d["maxdate"], timezone.utc)
     node.known = d.get("known", False)
     node.invalid = d.get("invalid", False)
     node.path = bytes(d["path"], encoding="utf-8")
     node.children = set(
         isochrone_graph_from_dict(child, depth=depth + 1) for child in children
     )
     return node
 
 
 @pytest.mark.parametrize(
     "repo, lower, mindepth",
     (
         ("cmdbts2", True, 1),
         ("cmdbts2", False, 1),
         ("cmdbts2", True, 2),
         ("cmdbts2", False, 2),
         ("out-of-order", True, 1),
     ),
 )
 @pytest.mark.parametrize("batch", (True, False))
 def test_isochrone_graph(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     repo: str,
     lower: bool,
     mindepth: int,
     batch: bool,
 ) -> None:
     # read data/README.md for more details on how these datasets are generated
     data = load_repo_data(repo)
     fill_storage(archive.storage, data)
 
     revisions = {rev["id"]: rev for rev in data["revision"]}
     filename = f"graphs_{repo}_{'lower' if lower else 'upper'}_{mindepth}.yaml"
 
     with open(get_datafile(filename)) as file:
         for expected in yaml.full_load(file):
             print("# Processing revision", expected["rev"])
             revision = revisions[hash_to_bytes(expected["rev"])]
             entry = RevisionEntry(
                 id=revision["id"],
                 date=ts2dt(revision["date"]),
                 root=revision["directory"],
             )
             expected_graph = isochrone_graph_from_dict(expected["graph"])
             print("Expected graph:", expected_graph)
 
             # Create graph for current revision and check it has the expected structure.
             assert entry.root is not None
             computed_graph = build_isochrone_graph(
-                archive,
                 provenance,
+                archive,
                 entry,
                 DirectoryEntry(entry.root),
             )
             print("Computed graph:", computed_graph)
             assert computed_graph == expected_graph
 
             # Add current revision so that provenance info is kept up to date for the
             # following ones.
             revision_add(
                 provenance,
                 archive,
                 [entry],
                 lower=lower,
                 mindepth=mindepth,
                 commit=not batch,
             )
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
index 9ab1304..9efca78 100644
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -1,468 +1,468 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import inspect
 import os
 from typing import Any, Dict, Iterable, Optional, Set, Tuple
 
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Origin, Sha1Git
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.interface import (
     DirectoryData,
     EntityType,
     ProvenanceInterface,
     ProvenanceResult,
     ProvenanceStorageInterface,
     RelationData,
     RelationType,
     RevisionData,
 )
 from swh.provenance.model import OriginEntry, RevisionEntry
 from swh.provenance.mongo.backend import ProvenanceStorageMongoDb
 from swh.provenance.origin import origin_add
 from swh.provenance.provenance import Provenance
 from swh.provenance.revision import revision_add
 from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
 
 
 def test_provenance_storage_content(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests content methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Add all content present in the current repo to the storage, just assigning their
     # creation dates. Then check that the returned results when querying are the same.
     cnt_dates = {
         cnt["sha1_git"]: cnt["ctime"] for idx, cnt in enumerate(data["content"])
     }
     assert provenance_storage.content_add(cnt_dates)
     assert provenance_storage.content_get(set(cnt_dates.keys())) == cnt_dates
     assert provenance_storage.entity_get_all(EntityType.CONTENT) == set(
         cnt_dates.keys()
     )
 
 
 def test_provenance_storage_directory(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests directory methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Of all directories present in the current repo, only assign a date to those
     # containing blobs (picking the max date among the available ones). Then check that
     # the returned results when querying are the same.
     def getmaxdate(
         directory: Dict[str, Any], contents: Iterable[Dict[str, Any]]
     ) -> Optional[datetime]:
         dates = [
             content["ctime"]
             for entry in directory["entries"]
             for content in contents
             if entry["type"] == "file" and entry["target"] == content["sha1_git"]
         ]
         return max(dates) if dates else None
 
     flat_values = (False, True)
     dir_dates = {}
     for idx, dir in enumerate(data["directory"]):
         date = getmaxdate(dir, data["content"])
         if date is not None:
             dir_dates[dir["id"]] = DirectoryData(date=date, flat=flat_values[idx % 2])
     assert provenance_storage.directory_add(dir_dates)
     assert provenance_storage.directory_get(set(dir_dates.keys())) == dir_dates
     assert provenance_storage.entity_get_all(EntityType.DIRECTORY) == set(
         dir_dates.keys()
     )
 
 
 def test_provenance_storage_location(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests location methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Add all names of entries present in the directories of the current repo as paths
     # to the storage. Then check that the returned results when querying are the same.
     paths = {entry["name"] for dir in data["directory"] for entry in dir["entries"]}
     assert provenance_storage.location_add(paths)
 
     if isinstance(provenance_storage, ProvenanceStorageMongoDb):
         # TODO: remove this when `location_add` is properly implemented for MongoDb.
         return
 
     if provenance_storage.with_path():
         assert provenance_storage.location_get_all() == paths
     else:
         assert provenance_storage.location_get_all() == set()
 
 
 def test_provenance_storage_origin(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests origin methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Test origin methods.
     # Add all origins present in the current repo to the storage. Then check that the
     # returned results when querying are the same.
     orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]}
     assert orgs
     assert provenance_storage.origin_add(orgs)
     assert provenance_storage.origin_get(set(orgs.keys())) == orgs
     assert provenance_storage.entity_get_all(EntityType.ORIGIN) == set(orgs.keys())
 
 
 def test_provenance_storage_revision(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests revision methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Test revision methods.
     # Add all revisions present in the current repo to the storage, assigning their
     # dates and an arbitrary origin to each one. Then check that the returned results
     # when querying are the same.
     origin = Origin(url=next(iter(data["origin"]))["url"])
     # Origin must be inserted in advance.
     assert provenance_storage.origin_add({origin.id: origin.url})
 
     revs = {rev["id"] for idx, rev in enumerate(data["revision"]) if idx % 6 == 0}
     rev_data = {
         rev["id"]: RevisionData(
             date=ts2dt(rev["date"]) if idx % 2 != 0 else None,
             origin=origin.id if idx % 3 != 0 else None,
         )
         for idx, rev in enumerate(data["revision"])
         if idx % 6 != 0
     }
     assert revs
     assert provenance_storage.revision_add(revs)
     assert provenance_storage.revision_add(rev_data)
     assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data
     assert provenance_storage.entity_get_all(EntityType.REVISION) == revs | set(
         rev_data.keys()
     )
 
 
 def dircontent(
     data: Dict[str, Any],
     ref: Sha1Git,
     dir: Dict[str, Any],
     prefix: bytes = b"",
 ) -> Iterable[Tuple[Sha1Git, RelationData]]:
     content = {
         (
             entry["target"],
             RelationData(dst=ref, path=os.path.join(prefix, entry["name"])),
         )
         for entry in dir["entries"]
         if entry["type"] == "file"
     }
     for entry in dir["entries"]:
         if entry["type"] == "dir":
             child = next(
                 subdir
                 for subdir in data["directory"]
                 if subdir["id"] == entry["target"]
             )
             content.update(
                 dircontent(data, ref, child, os.path.join(prefix, entry["name"]))
             )
     return content
 
 
 def entity_add(
     storage: ProvenanceStorageInterface, entity: EntityType, ids: Set[Sha1Git]
 ) -> bool:
     now = datetime.now(tz=timezone.utc)
     if entity == EntityType.CONTENT:
         return storage.content_add({sha1: now for sha1 in ids})
     elif entity == EntityType.DIRECTORY:
         return storage.directory_add(
             {sha1: DirectoryData(date=now, flat=False) for sha1 in ids}
         )
     else:  # entity == EntityType.REVISION:
         return storage.revision_add(
             {sha1: RevisionData(date=None, origin=None) for sha1 in ids}
         )
 
 
 def relation_add_and_compare_result(
     storage: ProvenanceStorageInterface,
     relation: RelationType,
     data: Dict[Sha1Git, Set[RelationData]],
 ) -> None:
     # Source, destinations and locations must be added in advance.
     src, *_, dst = relation.value.split("_")
     srcs = {sha1 for sha1 in data}
     if src != "origin":
         assert entity_add(storage, EntityType(src), srcs)
     dsts = {rel.dst for rels in data.values() for rel in rels}
     if dst != "origin":
         assert entity_add(storage, EntityType(dst), dsts)
     if storage.with_path():
         assert storage.location_add(
             {rel.path for rels in data.values() for rel in rels if rel.path is not None}
         )
 
     assert data
     assert storage.relation_add(relation, data)
 
     for src_sha1 in srcs:
         relation_compare_result(
             storage.relation_get(relation, [src_sha1]),
             {src_sha1: data[src_sha1]},
             storage.with_path(),
         )
     for dst_sha1 in dsts:
         relation_compare_result(
             storage.relation_get(relation, [dst_sha1], reverse=True),
             {
                 src_sha1: {
                     RelationData(dst=dst_sha1, path=rel.path)
                     for rel in rels
                     if dst_sha1 == rel.dst
                 }
                 for src_sha1, rels in data.items()
                 if dst_sha1 in {rel.dst for rel in rels}
             },
             storage.with_path(),
         )
     relation_compare_result(
         storage.relation_get_all(relation), data, storage.with_path()
     )
 
 
 def relation_compare_result(
     computed: Dict[Sha1Git, Set[RelationData]],
     expected: Dict[Sha1Git, Set[RelationData]],
     with_path: bool,
 ) -> None:
     assert {
         src_sha1: {
             RelationData(dst=rel.dst, path=rel.path if with_path else None)
             for rel in rels
         }
         for src_sha1, rels in expected.items()
     } == computed
 
 
 def test_provenance_storage_relation(
     provenance_storage: ProvenanceStorageInterface,
 ) -> None:
     """Tests relation methods for every `ProvenanceStorageInterface` implementation."""
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
 
     # Test content-in-revision relation.
     # Create flat models of every root directory for the revisions in the dataset.
     cnt_in_rev: Dict[Sha1Git, Set[RelationData]] = {}
     for rev in data["revision"]:
         root = next(
             subdir for subdir in data["directory"] if subdir["id"] == rev["directory"]
         )
         for cnt, rel in dircontent(data, rev["id"], root):
             cnt_in_rev.setdefault(cnt, set()).add(rel)
     relation_add_and_compare_result(
         provenance_storage, RelationType.CNT_EARLY_IN_REV, cnt_in_rev
     )
 
     # Test content-in-directory relation.
     # Create flat models for every directory in the dataset.
     cnt_in_dir: Dict[Sha1Git, Set[RelationData]] = {}
     for dir in data["directory"]:
         for cnt, rel in dircontent(data, dir["id"], dir):
             cnt_in_dir.setdefault(cnt, set()).add(rel)
     relation_add_and_compare_result(
         provenance_storage, RelationType.CNT_IN_DIR, cnt_in_dir
     )
 
     # Test content-in-directory relation.
     # Add root directories to their correspondent revision in the dataset.
     dir_in_rev: Dict[Sha1Git, Set[RelationData]] = {}
     for rev in data["revision"]:
         dir_in_rev.setdefault(rev["directory"], set()).add(
             RelationData(dst=rev["id"], path=b".")
         )
     relation_add_and_compare_result(
         provenance_storage, RelationType.DIR_IN_REV, dir_in_rev
     )
 
     # Test revision-in-origin relation.
     # Origins must be inserted in advance (cannot be done by `entity_add` inside
     # `relation_add_and_compare_result`).
     orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]}
     assert provenance_storage.origin_add(orgs)
     # Add all revisions that are head of some snapshot branch to the corresponding
     # origin.
     rev_in_org: Dict[Sha1Git, Set[RelationData]] = {}
     for status in data["origin_visit_status"]:
         if status["snapshot"] is not None:
             for snapshot in data["snapshot"]:
                 if snapshot["id"] == status["snapshot"]:
                     for branch in snapshot["branches"].values():
                         if branch["target_type"] == "revision":
                             rev_in_org.setdefault(branch["target"], set()).add(
                                 RelationData(
                                     dst=Origin(url=status["origin"]).id,
                                     path=None,
                                 )
                             )
     relation_add_and_compare_result(
         provenance_storage, RelationType.REV_IN_ORG, rev_in_org
     )
 
     # Test revision-before-revision relation.
     # For each revision in the data set add an entry for each parent to the relation.
     rev_before_rev: Dict[Sha1Git, Set[RelationData]] = {}
     for rev in data["revision"]:
         for parent in rev["parents"]:
             rev_before_rev.setdefault(parent, set()).add(
                 RelationData(dst=rev["id"], path=None)
             )
     relation_add_and_compare_result(
         provenance_storage, RelationType.REV_BEFORE_REV, rev_before_rev
     )
 
 
 def test_provenance_storage_find(
-    archive: ArchiveInterface,
     provenance: ProvenanceInterface,
     provenance_storage: ProvenanceStorageInterface,
+    archive: ArchiveInterface,
 ) -> None:
     """Tests `content_find_first` and `content_find_all` methods for every
     `ProvenanceStorageInterface` implementation.
     """
 
     # Read data/README.md for more details on how these datasets are generated.
     data = load_repo_data("cmdbts2")
     fill_storage(archive.storage, data)
 
     # Test content_find_first and content_find_all, first only executing the
     # revision-content algorithm, then adding the origin-revision layer.
     def adapt_result(
         result: Optional[ProvenanceResult], with_path: bool
     ) -> Optional[ProvenanceResult]:
         if result is not None:
             return ProvenanceResult(
                 result.content,
                 result.revision,
                 result.date,
                 result.origin,
                 result.path if with_path else b"",
             )
         return result
 
     # Execute the revision-content algorithm on both storages.
     revisions = [
         RevisionEntry(id=rev["id"], date=ts2dt(rev["date"]), root=rev["directory"])
         for rev in data["revision"]
     ]
     revision_add(provenance, archive, revisions)
     revision_add(Provenance(provenance_storage), archive, revisions)
 
     assert adapt_result(
         ProvenanceResult(
             content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"),
             revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"),
             date=datetime.fromtimestamp(1000000000.0, timezone.utc),
             origin=None,
             path=b"A/B/C/a",
         ),
         provenance_storage.with_path(),
     ) == provenance_storage.content_find_first(
         hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494")
     )
 
     for cnt in {cnt["sha1_git"] for cnt in data["content"]}:
         assert adapt_result(
             provenance.storage.content_find_first(cnt), provenance_storage.with_path()
         ) == provenance_storage.content_find_first(cnt)
         assert {
             adapt_result(occur, provenance_storage.with_path())
             for occur in provenance.storage.content_find_all(cnt)
         } == set(provenance_storage.content_find_all(cnt))
 
     # Execute the origin-revision algorithm on both storages.
     origins = [
         OriginEntry(url=sta["origin"], snapshot=sta["snapshot"])
         for sta in data["origin_visit_status"]
         if sta["snapshot"] is not None
     ]
     origin_add(provenance, archive, origins)
     origin_add(Provenance(provenance_storage), archive, origins)
 
     assert adapt_result(
         ProvenanceResult(
             content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"),
             revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"),
             date=datetime.fromtimestamp(1000000000.0, timezone.utc),
             origin="https://cmdbts2",
             path=b"A/B/C/a",
         ),
         provenance_storage.with_path(),
     ) == provenance_storage.content_find_first(
         hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494")
     )
 
     for cnt in {cnt["sha1_git"] for cnt in data["content"]}:
         assert adapt_result(
             provenance.storage.content_find_first(cnt), provenance_storage.with_path()
         ) == provenance_storage.content_find_first(cnt)
         assert {
             adapt_result(occur, provenance_storage.with_path())
             for occur in provenance.storage.content_find_all(cnt)
         } == set(provenance_storage.content_find_all(cnt))
 
 
 def test_types(provenance_storage: ProvenanceInterface) -> None:
     """Checks all methods of ProvenanceStorageInterface are implemented by this
     backend, and that they have the same signature."""
     # Create an instance of the protocol (which cannot be instantiated
     # directly, so this creates a subclass, then instantiates it)
     interface = type("_", (ProvenanceStorageInterface,), {})()
 
     assert "content_find_first" in dir(interface)
 
     missing_methods = []
 
     for meth_name in dir(interface):
         if meth_name.startswith("_"):
             continue
         interface_meth = getattr(interface, meth_name)
         try:
             concrete_meth = getattr(provenance_storage, meth_name)
         except AttributeError:
             if not getattr(interface_meth, "deprecated_endpoint", False):
                 # The backend is missing a (non-deprecated) endpoint
                 missing_methods.append(meth_name)
             continue
 
         expected_signature = inspect.signature(interface_meth)
         actual_signature = inspect.signature(concrete_meth)
 
         assert expected_signature == actual_signature, meth_name
 
     assert missing_methods == []
 
     # If all the assertions above succeed, then this one should too.
     # But there's no harm in double-checking.
     # And we could replace the assertions above by this one, but unlike
     # the assertions above, it doesn't explain what is missing.
     assert isinstance(provenance_storage, ProvenanceStorageInterface)