diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py
index 53bf275..6336483 100644
--- a/swh/provenance/tests/test_cli.py
+++ b/swh/provenance/tests/test_cli.py
@@ -1,164 +1,165 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Dict, List, Set
 
 from _pytest.monkeypatch import MonkeyPatch
 from click.testing import CliRunner
 import psycopg2.extensions
 import pytest
 
 from swh.core.cli import swh as swhmain
 import swh.core.cli.db  # noqa ; ensure cli is loaded
 from swh.core.db import BaseDb
 from swh.core.db.db_utils import init_admin_extensions
 from swh.model.hashutil import MultiHash
 import swh.provenance.cli  # noqa ; ensure cli is loaded
 from swh.provenance.tests.conftest import fill_storage, load_repo_data
 from swh.storage.interface import StorageInterface
 
 from .conftest import get_datafile
 from .test_utils import invoke, write_configuration_path
 
 
 def test_cli_swh_db_help() -> None:
     # swhmain.add_command(provenance_cli)
     result = CliRunner().invoke(swhmain, ["provenance", "-h"])
     assert result.exit_code == 0
     assert "Commands:" in result.output
     commands = result.output.split("Commands:")[1]
     for command in (
         "find-all",
         "find-first",
         "iter-frontiers",
         "iter-origins",
         "iter-revisions",
     ):
         assert f"  {command} " in commands
 
 
 TABLES = {
     "dbflavor",
     "dbmodule",
     "dbversion",
     "content",
     "content_in_revision",
     "content_in_directory",
     "directory",
     "directory_in_revision",
     "location",
     "origin",
     "revision",
     "revision_before_revision",
     "revision_in_origin",
 }
 
 
 @pytest.mark.parametrize(
     "flavor, dbtables", (("with-path", TABLES), ("without-path", TABLES))
 )
 def test_cli_db_create_and_init_db_with_flavor(
     monkeypatch: MonkeyPatch,
     postgresql: psycopg2.extensions.connection,
     flavor: str,
     dbtables: Set[str],
 ) -> None:
     """Test that 'swh db init provenance' works with flavors
 
     for both with-path and without-path flavors"""
 
     dbname = f"{flavor}-db"
 
     # DB creation using 'swh db create'
     db_params = postgresql.get_dsn_parameters()
     monkeypatch.setenv("PGHOST", db_params["host"])
     monkeypatch.setenv("PGUSER", db_params["user"])
     monkeypatch.setenv("PGPORT", db_params["port"])
     result = CliRunner().invoke(swhmain, ["db", "create", "-d", dbname, "provenance"])
     assert result.exit_code == 0, result.output
 
     # DB init using 'swh db init'
     result = CliRunner().invoke(
         swhmain, ["db", "init", "-d", dbname, "--flavor", flavor, "provenance"]
     )
     assert result.exit_code == 0, result.output
     assert f"(flavor {flavor})" in result.output
 
     db_params["dbname"] = dbname
     cnx = BaseDb.connect(**db_params).conn
     # check the DB looks OK (check for db_flavor and expected tables)
     with cnx.cursor() as cur:
         cur.execute("select swh_get_dbflavor()")
         assert cur.fetchone() == (flavor,)
 
         cur.execute(
             "select table_name from information_schema.tables "
             "where table_schema = 'public' "
             f"and table_catalog = '{dbname}'"
         )
         tables = set(x for (x,) in cur.fetchall())
         assert tables == dbtables
 
 
 def test_cli_init_db_default_flavor(postgresql: psycopg2.extensions.connection) -> None:
     "Test that 'swh db init provenance' defaults to a with-path flavored DB"
 
     dbname = postgresql.dsn
     init_admin_extensions("swh.provenance", dbname)
     result = CliRunner().invoke(swhmain, ["db", "init", "-d", dbname, "provenance"])
     assert result.exit_code == 0, result.output
 
     with postgresql.cursor() as cur:
         cur.execute("select swh_get_dbflavor()")
         assert cur.fetchone() == ("with-path",)
 
 
+@pytest.mark.origin_layer
 @pytest.mark.parametrize(
     "subcommand",
     (["origin", "from-csv"], ["iter-origins"]),
 )
 def test_cli_origin_from_csv(
     swh_storage: StorageInterface,
     subcommand: List[str],
     swh_storage_backend_config: Dict,
     provenance,
     tmp_path,
 ):
     repo = "cmdbts2"
     origin_url = f"https://{repo}"
     data = load_repo_data(repo)
     fill_storage(swh_storage, data)
 
     assert len(data["origin"]) >= 1
     assert origin_url in [o["url"] for o in data["origin"]]
 
     cfg = {
         "provenance": {
             "archive": {
                 "cls": "api",
                 "storage": swh_storage_backend_config,
             },
             "storage": {
                 "cls": "postgresql",
                 # "db": provenance.storage.conn.dsn,
                 "db": provenance.storage.conn.get_dsn_parameters(),
             },
         },
     }
 
     config_path = write_configuration_path(cfg, tmp_path)
 
     csv_filepath = get_datafile("origins.csv")
     subcommand = subcommand + [csv_filepath]
 
     result = invoke(subcommand, config_path)
     assert result.exit_code == 0, f"Unexpected result: {result.output}"
 
     origin_sha1 = MultiHash.from_data(
         origin_url.encode(), hash_names=["sha1"]
     ).digest()["sha1"]
     actual_result = provenance.storage.origin_get([origin_sha1])
 
     assert actual_result == {origin_sha1: origin_url}
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
index 8aaf489..92fbedc 100644
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -1,54 +1,55 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 import yaml
 
 from swh.model.hashutil import hash_to_bytes
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.graph import HistoryGraph
 from swh.provenance.interface import ProvenanceInterface
 from swh.provenance.model import OriginEntry, RevisionEntry
 from swh.provenance.origin import origin_add_revision
 from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
 
 
+@pytest.mark.origin_layer
 @pytest.mark.parametrize(
     "repo, visit",
     (("with-merges", "visits-01"),),
 )
 @pytest.mark.parametrize("batch", (True, False))
 def test_history_graph(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     repo: str,
     visit: str,
     batch: bool,
 ) -> None:
     # read data/README.md for more details on how these datasets are generated
     data = load_repo_data(repo)
     fill_storage(archive.storage, data)
 
     filename = f"history_graphs_{repo}_{visit}.yaml"
 
     with open(get_datafile(filename)) as file:
         for expected in yaml.full_load(file):
             entry = OriginEntry(expected["origin"], hash_to_bytes(expected["snapshot"]))
             provenance.origin_add(entry)
 
             for expected_graph_as_dict in expected["graphs"]:
                 print("Expected graph:", expected_graph_as_dict)
 
                 computed_graph = HistoryGraph(
                     archive,
                     RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"])),
                 )
                 print("Computed graph:", computed_graph.as_dict())
                 assert computed_graph.as_dict() == expected_graph_as_dict
 
                 origin_add_revision(provenance, entry, computed_graph)
 
             if not batch:
                 provenance.flush()
diff --git a/swh/provenance/tests/test_journal_client.py b/swh/provenance/tests/test_journal_client.py
index 4fd6854..27bb7af 100644
--- a/swh/provenance/tests/test_journal_client.py
+++ b/swh/provenance/tests/test_journal_client.py
@@ -1,135 +1,136 @@
 # Copyright (C) 2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Dict
 
 from confluent_kafka import Consumer
 import pytest
 
 from swh.model.hashutil import MultiHash
 from swh.provenance.tests.conftest import fill_storage, load_repo_data
 from swh.storage.interface import StorageInterface
 
 from .test_utils import invoke, write_configuration_path
 
 
 @pytest.fixture
 def swh_storage_backend_config(swh_storage_backend_config, kafka_server, kafka_prefix):
     writer_config = {
         "cls": "kafka",
         "brokers": [kafka_server],
         "client_id": "kafka_writer",
         "prefix": kafka_prefix,
         "anonymize": False,
     }
     yield {**swh_storage_backend_config, "journal_writer": writer_config}
 
 
+@pytest.mark.origin_layer
 def test_cli_origin_from_journal_client(
     swh_storage: StorageInterface,
     swh_storage_backend_config: Dict,
     kafka_prefix: str,
     kafka_server: str,
     consumer: Consumer,
     tmp_path: str,
     provenance,
     postgres_provenance,
 ) -> None:
     """Test origin journal client cli"""
 
     # Prepare storage data
     data = load_repo_data("cmdbts2")
     assert len(data["origin"]) >= 1
     origin_url = data["origin"][0]["url"]
     fill_storage(swh_storage, data)
 
     # Prepare configuration for cli call
     swh_storage_backend_config.pop("journal_writer", None)  # no need for that config
     storage_config_dict = swh_storage_backend_config
     cfg = {
         "journal_client": {
             "cls": "kafka",
             "brokers": [kafka_server],
             "group_id": "toto",
             "prefix": kafka_prefix,
             "stop_on_eof": True,
         },
         "provenance": {
             "archive": {
                 "cls": "api",
                 "storage": storage_config_dict,
             },
             "storage": {
                 "cls": "postgresql",
                 "db": postgres_provenance.get_dsn_parameters(),
             },
         },
     }
     config_path = write_configuration_path(cfg, tmp_path)
 
     # call the cli 'swh provenance origin from-journal'
     result = invoke(["origin", "from-journal"], config_path)
     assert result.exit_code == 0, f"Unexpected result: {result.output}"
 
     origin_sha1 = MultiHash.from_data(
         origin_url.encode(), hash_names=["sha1"]
     ).digest()["sha1"]
     actual_result = provenance.storage.origin_get([origin_sha1])
 
     assert actual_result == {origin_sha1: origin_url}
 
 
 def test_cli_revision_from_journal_client(
     swh_storage: StorageInterface,
     swh_storage_backend_config: Dict,
     kafka_prefix: str,
     kafka_server: str,
     consumer: Consumer,
     tmp_path: str,
     provenance,
     postgres_provenance,
 ) -> None:
     """Test revision journal client cli"""
 
     # Prepare storage data
     data = load_repo_data("cmdbts2")
     assert len(data["origin"]) >= 1
     fill_storage(swh_storage, data)
 
     # Prepare configuration for cli call
     swh_storage_backend_config.pop("journal_writer", None)  # no need for that config
     storage_config_dict = swh_storage_backend_config
     cfg = {
         "journal_client": {
             "cls": "kafka",
             "brokers": [kafka_server],
             "group_id": "toto",
             "prefix": kafka_prefix,
             "stop_on_eof": True,
         },
         "provenance": {
             "archive": {
                 "cls": "api",
                 "storage": storage_config_dict,
             },
             "storage": {
                 "cls": "postgresql",
                 "db": postgres_provenance.get_dsn_parameters(),
             },
         },
     }
     config_path = write_configuration_path(cfg, tmp_path)
 
     revisions = [rev["id"] for rev in data["revision"]]
     result = provenance.storage.revision_get(revisions)
     assert not result
 
     # call the cli 'swh provenance revision from-journal'
     cli_result = invoke(["revision", "from-journal"], config_path)
     assert cli_result.exit_code == 0, f"Unexpected result: {result.output}"
 
     result = provenance.storage.revision_get(revisions)
 
     assert set(result.keys()) == set(revisions)
diff --git a/swh/provenance/tests/test_origin_iterator.py b/swh/provenance/tests/test_origin_iterator.py
index 020a07d..581aaed 100644
--- a/swh/provenance/tests/test_origin_iterator.py
+++ b/swh/provenance/tests/test_origin_iterator.py
@@ -1,46 +1,47 @@
 # Copyright (C) 2021-2022  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 
 from swh.provenance.origin import CSVOriginIterator
 from swh.provenance.tests.conftest import fill_storage, load_repo_data
 from swh.storage.algos.origin import (
     iter_origin_visit_statuses,
     iter_origin_visits,
     iter_origins,
 )
 from swh.storage.interface import StorageInterface
 
 
+@pytest.mark.origin_layer
 @pytest.mark.parametrize(
     "repo",
     (
         "cmdbts2",
         "out-of-order",
     ),
 )
 def test_origin_iterator(swh_storage: StorageInterface, repo: str) -> None:
     """Test CSVOriginIterator"""
     data = load_repo_data(repo)
     fill_storage(swh_storage, data)
 
     origins_csv = []
     for origin in iter_origins(swh_storage):
         for visit in iter_origin_visits(swh_storage, origin.url):
             if visit.visit is not None:
                 for status in iter_origin_visit_statuses(
                     swh_storage, origin.url, visit.visit
                 ):
                     if status.snapshot is not None:
                         origins_csv.append((status.origin, status.snapshot))
     origins = list(CSVOriginIterator(origins_csv))
 
     assert origins
     # there can be more origins, depending on the additional extra visits.yaml
     # file used during dataset generation (see data/generate_storage_from_git)
     assert len(origins) >= len(data["origin"])
     # but we can check it's a subset
     assert set(o.url for o in origins) <= set(o["url"] for o in data["origin"])
diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py
index 1bbdf41..38c4bff 100644
--- a/swh/provenance/tests/test_origin_revision_layer.py
+++ b/swh/provenance/tests/test_origin_revision_layer.py
@@ -1,194 +1,195 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 from typing import Any, Dict, Iterable, Iterator, List, Set
 
 import pytest
 from typing_extensions import TypedDict
 
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Sha1Git
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType
 from swh.provenance.model import OriginEntry
 from swh.provenance.origin import origin_add
 from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
 
 
 class SynthRelation(TypedDict):
     src: Sha1Git
     dst: Sha1Git
     name: str
 
 
 class SynthOrigin(TypedDict):
     sha1: Sha1Git
     url: str
     snap: Sha1Git
     O_R: List[SynthRelation]
     R_R: List[SynthRelation]
 
 
 def synthetic_origin_revision_result(filename: str) -> Iterator[SynthOrigin]:
     """Generates dict representations of synthetic origin visits found in the
     synthetic file (from the data/ directory) given as argument of the generator.
 
     Generated SynthOrigin (typed dict) with the following elements:
 
       "sha1": (Sha1Git) sha1 of the origin,
       "url": (str) url of the origin,
       "snap": (Sha1Git) sha1 of the visit's snapshot,
       "O_R": (list) new O-R   relations added by this origin visit
       "R_R": (list) new   R-R relations added by this origin visit
 
     Each relation above is a SynthRelation typed dict with:
 
       "src": (Sha1Git) sha1 of the source of the relation
       "dst": (Sha1Git) sha1 of the destination of the relation
 
     """
 
     with open(get_datafile(filename), "r") as fobj:
         yield from _parse_synthetic_origin_revision_file(fobj)
 
 
 def _parse_synthetic_origin_revision_file(fobj: Iterable[str]) -> Iterator[SynthOrigin]:
     """Read a 'synthetic' file and generate a dict representation of the synthetic
     origin visit for each snapshot listed in the synthetic file.
     """
     regs = [
         "(?P<url>[^ ]+)?",
         "(?P<reltype>[^| ]*)",
         "(?P<revname>R[0-9]{2,4})?",
         "(?P<type>[ORS]) (?P<sha1>[0-9a-f]{40})",
     ]
     regex = re.compile("^ *" + r" *[|] *".join(regs) + r" *(#.*)?$")
     current_org: List[dict] = []
     for m in (regex.match(line) for line in fobj):
         if m:
             d = m.groupdict()
             if d["url"]:
                 if current_org:
                     yield _mk_synth_org(current_org)
                 current_org.clear()
             current_org.append(d)
     if current_org:
         yield _mk_synth_org(current_org)
 
 
 def _mk_synth_org(synth_org: List[Dict[str, str]]) -> SynthOrigin:
     assert synth_org[0]["type"] == "O"
     assert synth_org[1]["type"] == "S"
     org = SynthOrigin(
         sha1=hash_to_bytes(synth_org[0]["sha1"]),
         url=synth_org[0]["url"],
         snap=hash_to_bytes(synth_org[1]["sha1"]),
         O_R=[],
         R_R=[],
     )
 
     for row in synth_org[2:]:
         if row["reltype"] == "O-R":
             assert row["type"] == "R"
             org["O_R"].append(
                 SynthRelation(
                     src=org["sha1"],
                     dst=hash_to_bytes(row["sha1"]),
                     name=row["revname"],
                 )
             )
         elif row["reltype"] == "R-R":
             assert row["type"] == "R"
             org["R_R"].append(
                 SynthRelation(
                     src=org["O_R"][-1]["dst"],
                     dst=hash_to_bytes(row["sha1"]),
                     name=row["revname"],
                 )
             )
     return org
 
 
+@pytest.mark.origin_layer
 @pytest.mark.parametrize(
     "repo, visit",
     (("with-merges", "visits-01"),),
 )
 def test_origin_revision_layer(
     provenance: ProvenanceInterface,
     archive: ArchiveInterface,
     repo: str,
     visit: str,
 ) -> None:
     # read data/README.md for more details on how these datasets are generated
     data = load_repo_data(repo)
     fill_storage(archive.storage, data)
     syntheticfile = get_datafile(f"origin-revision_{repo}_{visit}.txt")
 
     origins = [
         {"url": status["origin"], "snap": status["snapshot"]}
         for status in data["origin_visit_status"]
         if status["snapshot"] is not None
     ]
 
     rows: Dict[str, Set[Any]] = {
         "origin": set(),
         "revision_in_origin": set(),
         "revision_before_revision": set(),
         "revision": set(),
     }
 
     for synth_org in synthetic_origin_revision_result(syntheticfile):
         for origin in (
             org
             for org in origins
             if org["url"] == synth_org["url"] and org["snap"] == synth_org["snap"]
         ):
             entry = OriginEntry(url=origin["url"], snapshot=origin["snap"])
             origin_add(provenance, archive, [entry])
 
             # each "entry" in the synth file is one new origin visit
             rows["origin"].add(synth_org["sha1"])
             assert rows["origin"] == provenance.storage.entity_get_all(
                 EntityType.ORIGIN
             ), synth_org["url"]
             # check the url of the origin
             assert (
                 provenance.storage.origin_get([synth_org["sha1"]])[synth_org["sha1"]]
                 == synth_org["url"]
             ), synth_org["snap"]
 
             # this origin visit might have added new revision objects
             rows["revision"] |= set(x["dst"] for x in synth_org["O_R"])
             rows["revision"] |= set(x["dst"] for x in synth_org["R_R"])
             assert rows["revision"] == provenance.storage.entity_get_all(
                 EntityType.REVISION
             ), synth_org["snap"]
 
             # check for O-R (head) entries
             # these are added in the revision_in_origin relation
             rows["revision_in_origin"] |= set(
                 (x["dst"], x["src"], None) for x in synth_org["O_R"]
             )
             assert rows["revision_in_origin"] == {
                 (src, rel.dst, rel.path)
                 for src, rels in provenance.storage.relation_get_all(
                     RelationType.REV_IN_ORG
                 ).items()
                 for rel in rels
             }, synth_org["snap"]
 
             # check for R-R entries
             # these are added in the revision_before_revision relation
             rows["revision_before_revision"] |= set(
                 (x["dst"], x["src"], None) for x in synth_org["R_R"]
             )
             assert rows["revision_before_revision"] == {
                 (src, rel.dst, rel.path)
                 for src, rels in provenance.storage.relation_get_all(
                     RelationType.REV_BEFORE_REV
                 ).items()
                 for rel in rels
             }, synth_org["snap"]
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
index fee0a88..d4ec7a4 100644
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -1,470 +1,521 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import inspect
 import os
 from typing import Any, Dict, Iterable, Optional, Set, Tuple
 
+import pytest
+
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Origin, Sha1Git
 from swh.provenance.archive import ArchiveInterface
 from swh.provenance.interface import (
     DirectoryData,
     EntityType,
     ProvenanceInterface,
     ProvenanceResult,
     ProvenanceStorageInterface,
     RelationData,
     RelationType,
     RevisionData,
 )
 from swh.provenance.model import OriginEntry, RevisionEntry
 from swh.provenance.origin import origin_add
 from swh.provenance.provenance import Provenance
 from swh.provenance.revision import revision_add
 from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
 
 
 class TestProvenanceStorage:
     def test_provenance_storage_content(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests content methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Add all content present in the current repo to the storage, just assigning their
         # creation dates. Then check that the returned results when querying are the same.
         cnt_dates = {
             cnt["sha1_git"]: cnt["ctime"] for idx, cnt in enumerate(data["content"])
         }
         assert provenance_storage.content_add(cnt_dates)
         assert provenance_storage.content_get(set(cnt_dates.keys())) == cnt_dates
         assert provenance_storage.entity_get_all(EntityType.CONTENT) == set(
             cnt_dates.keys()
         )
 
     def test_provenance_storage_directory(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests directory methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Of all directories present in the current repo, only assign a date to those
         # containing blobs (picking the max date among the available ones). Then check that
         # the returned results when querying are the same.
         def getmaxdate(
             directory: Dict[str, Any], contents: Iterable[Dict[str, Any]]
         ) -> Optional[datetime]:
             dates = [
                 content["ctime"]
                 for entry in directory["entries"]
                 for content in contents
                 if entry["type"] == "file" and entry["target"] == content["sha1_git"]
             ]
             return max(dates) if dates else None
 
         flat_values = (False, True)
         dir_dates = {}
         for idx, dir in enumerate(data["directory"]):
             date = getmaxdate(dir, data["content"])
             if date is not None:
                 dir_dates[dir["id"]] = DirectoryData(
                     date=date, flat=flat_values[idx % 2]
                 )
         assert provenance_storage.directory_add(dir_dates)
         assert provenance_storage.directory_get(set(dir_dates.keys())) == dir_dates
         assert provenance_storage.entity_get_all(EntityType.DIRECTORY) == set(
             dir_dates.keys()
         )
 
     def test_provenance_storage_location(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests location methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Add all names of entries present in the directories of the current repo as paths
         # to the storage. Then check that the returned results when querying are the same.
         paths = {entry["name"] for dir in data["directory"] for entry in dir["entries"]}
         assert provenance_storage.location_add(paths)
 
         if provenance_storage.with_path():
             assert provenance_storage.location_get_all() == paths
         else:
             assert provenance_storage.location_get_all() == set()
 
+    @pytest.mark.origin_layer
     def test_provenance_storage_origin(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests origin methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Test origin methods.
         # Add all origins present in the current repo to the storage. Then check that the
         # returned results when querying are the same.
         orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]}
         assert orgs
         assert provenance_storage.origin_add(orgs)
         assert provenance_storage.origin_get(set(orgs.keys())) == orgs
         assert provenance_storage.entity_get_all(EntityType.ORIGIN) == set(orgs.keys())
 
     def test_provenance_storage_revision(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests revision methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Test revision methods.
         # Add all revisions present in the current repo to the storage, assigning their
         # dates and an arbitrary origin to each one. Then check that the returned results
         # when querying are the same.
         origin = Origin(url=next(iter(data["origin"]))["url"])
         # Origin must be inserted in advance.
         assert provenance_storage.origin_add({origin.id: origin.url})
 
         revs = {rev["id"] for idx, rev in enumerate(data["revision"]) if idx % 6 == 0}
         rev_data = {
             rev["id"]: RevisionData(
                 date=ts2dt(rev["date"]) if idx % 2 != 0 else None,
                 origin=origin.id if idx % 3 != 0 else None,
             )
             for idx, rev in enumerate(data["revision"])
             if idx % 6 != 0
         }
         assert revs
         assert provenance_storage.revision_add(revs)
         assert provenance_storage.revision_add(rev_data)
         assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data
         assert provenance_storage.entity_get_all(EntityType.REVISION) == revs | set(
             rev_data.keys()
         )
 
-    def test_provenance_storage_relation(
+    def test_provenance_storage_relation_revision_layer(
         self,
         provenance_storage: ProvenanceStorageInterface,
     ) -> None:
         """Tests relation methods for every `ProvenanceStorageInterface` implementation."""
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
 
         # Test content-in-revision relation.
         # Create flat models of every root directory for the revisions in the dataset.
         cnt_in_rev: Dict[Sha1Git, Set[RelationData]] = {}
         for rev in data["revision"]:
             root = next(
                 subdir
                 for subdir in data["directory"]
                 if subdir["id"] == rev["directory"]
             )
             for cnt, rel in dircontent(data, rev["id"], root):
                 cnt_in_rev.setdefault(cnt, set()).add(rel)
         relation_add_and_compare_result(
             provenance_storage, RelationType.CNT_EARLY_IN_REV, cnt_in_rev
         )
 
         # Test content-in-directory relation.
         # Create flat models for every directory in the dataset.
         cnt_in_dir: Dict[Sha1Git, Set[RelationData]] = {}
         for dir in data["directory"]:
             for cnt, rel in dircontent(data, dir["id"], dir):
                 cnt_in_dir.setdefault(cnt, set()).add(rel)
         relation_add_and_compare_result(
             provenance_storage, RelationType.CNT_IN_DIR, cnt_in_dir
         )
 
         # Test content-in-directory relation.
         # Add root directories to their correspondent revision in the dataset.
         dir_in_rev: Dict[Sha1Git, Set[RelationData]] = {}
         for rev in data["revision"]:
             dir_in_rev.setdefault(rev["directory"], set()).add(
                 RelationData(dst=rev["id"], path=b".")
             )
         relation_add_and_compare_result(
             provenance_storage, RelationType.DIR_IN_REV, dir_in_rev
         )
 
+    @pytest.mark.origin_layer
+    def test_provenance_storage_relation_orign_layer(
+        self,
+        provenance_storage: ProvenanceStorageInterface,
+    ) -> None:
+        """Tests relation methods for every `ProvenanceStorageInterface` implementation."""
+
+        # Read data/README.md for more details on how these datasets are generated.
+        data = load_repo_data("cmdbts2")
+
         # Test revision-in-origin relation.
         # Origins must be inserted in advance (cannot be done by `entity_add` inside
         # `relation_add_and_compare_result`).
         orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]}
         assert provenance_storage.origin_add(orgs)
         # Add all revisions that are head of some snapshot branch to the corresponding
         # origin.
         rev_in_org: Dict[Sha1Git, Set[RelationData]] = {}
         for status in data["origin_visit_status"]:
             if status["snapshot"] is not None:
                 for snapshot in data["snapshot"]:
                     if snapshot["id"] == status["snapshot"]:
                         for branch in snapshot["branches"].values():
                             if branch["target_type"] == "revision":
                                 rev_in_org.setdefault(branch["target"], set()).add(
                                     RelationData(
                                         dst=Origin(url=status["origin"]).id,
                                         path=None,
                                     )
                                 )
         relation_add_and_compare_result(
             provenance_storage, RelationType.REV_IN_ORG, rev_in_org
         )
 
         # Test revision-before-revision relation.
         # For each revision in the data set add an entry for each parent to the relation.
         rev_before_rev: Dict[Sha1Git, Set[RelationData]] = {}
         for rev in data["revision"]:
             for parent in rev["parents"]:
                 rev_before_rev.setdefault(parent, set()).add(
                     RelationData(dst=rev["id"], path=None)
                 )
         relation_add_and_compare_result(
             provenance_storage, RelationType.REV_BEFORE_REV, rev_before_rev
         )
 
-    def test_provenance_storage_find(
+    def test_provenance_storage_find_revision_layer(
         self,
         provenance: ProvenanceInterface,
         provenance_storage: ProvenanceStorageInterface,
         archive: ArchiveInterface,
     ) -> None:
         """Tests `content_find_first` and `content_find_all` methods for every
         `ProvenanceStorageInterface` implementation.
         """
 
         # Read data/README.md for more details on how these datasets are generated.
         data = load_repo_data("cmdbts2")
         fill_storage(archive.storage, data)
 
         # Test content_find_first and content_find_all, first only executing the
         # revision-content algorithm, then adding the origin-revision layer.
         def adapt_result(
             result: Optional[ProvenanceResult], with_path: bool
         ) -> Optional[ProvenanceResult]:
             if result is not None:
                 return ProvenanceResult(
                     result.content,
                     result.revision,
                     result.date,
                     result.origin,
                     result.path if with_path else b"",
                 )
             return result
 
         # Execute the revision-content algorithm on both storages.
         revisions = [
             RevisionEntry(id=rev["id"], date=ts2dt(rev["date"]), root=rev["directory"])
             for rev in data["revision"]
         ]
         revision_add(provenance, archive, revisions)
         revision_add(Provenance(provenance_storage), archive, revisions)
 
         assert adapt_result(
             ProvenanceResult(
                 content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"),
                 revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"),
                 date=datetime.fromtimestamp(1000000000.0, timezone.utc),
                 origin=None,
                 path=b"A/B/C/a",
             ),
             provenance_storage.with_path(),
         ) == provenance_storage.content_find_first(
             hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494")
         )
 
         for cnt in {cnt["sha1_git"] for cnt in data["content"]}:
             assert adapt_result(
                 provenance.storage.content_find_first(cnt),
                 provenance_storage.with_path(),
             ) == provenance_storage.content_find_first(cnt)
             assert {
                 adapt_result(occur, provenance_storage.with_path())
                 for occur in provenance.storage.content_find_all(cnt)
             } == set(provenance_storage.content_find_all(cnt))
 
+    @pytest.mark.origin_layer
+    def test_provenance_storage_find_origin_layer(
+        self,
+        provenance: ProvenanceInterface,
+        provenance_storage: ProvenanceStorageInterface,
+        archive: ArchiveInterface,
+    ) -> None:
+        """Tests `content_find_first` and `content_find_all` methods for every
+        `ProvenanceStorageInterface` implementation.
+        """
+
+        # Read data/README.md for more details on how these datasets are generated.
+        data = load_repo_data("cmdbts2")
+        fill_storage(archive.storage, data)
+
+        # Execute the revision-content algorithm on both storages.
+        revisions = [
+            RevisionEntry(id=rev["id"], date=ts2dt(rev["date"]), root=rev["directory"])
+            for rev in data["revision"]
+        ]
+        revision_add(provenance, archive, revisions)
+        revision_add(Provenance(provenance_storage), archive, revisions)
+
+        # Test content_find_first and content_find_all, first only executing the
+        # revision-content algorithm, then adding the origin-revision layer.
+        def adapt_result(
+            result: Optional[ProvenanceResult], with_path: bool
+        ) -> Optional[ProvenanceResult]:
+            if result is not None:
+                return ProvenanceResult(
+                    result.content,
+                    result.revision,
+                    result.date,
+                    result.origin,
+                    result.path if with_path else b"",
+                )
+            return result
+
         # Execute the origin-revision algorithm on both storages.
         origins = [
             OriginEntry(url=sta["origin"], snapshot=sta["snapshot"])
             for sta in data["origin_visit_status"]
             if sta["snapshot"] is not None
         ]
         origin_add(provenance, archive, origins)
         origin_add(Provenance(provenance_storage), archive, origins)
 
         assert adapt_result(
             ProvenanceResult(
                 content=hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494"),
                 revision=hash_to_bytes("c0d8929936631ecbcf9147be6b8aa13b13b014e4"),
                 date=datetime.fromtimestamp(1000000000.0, timezone.utc),
                 origin="https://cmdbts2",
                 path=b"A/B/C/a",
             ),
             provenance_storage.with_path(),
         ) == provenance_storage.content_find_first(
             hash_to_bytes("20329687bb9c1231a7e05afe86160343ad49b494")
         )
 
         for cnt in {cnt["sha1_git"] for cnt in data["content"]}:
             assert adapt_result(
                 provenance.storage.content_find_first(cnt),
                 provenance_storage.with_path(),
             ) == provenance_storage.content_find_first(cnt)
             assert {
                 adapt_result(occur, provenance_storage.with_path())
                 for occur in provenance.storage.content_find_all(cnt)
             } == set(provenance_storage.content_find_all(cnt))
 
     def test_types(self, provenance_storage: ProvenanceStorageInterface) -> None:
         """Checks all methods of ProvenanceStorageInterface are implemented by this
         backend, and that they have the same signature."""
         # Create an instance of the protocol (which cannot be instantiated
         # directly, so this creates a subclass, then instantiates it)
         interface = type("_", (ProvenanceStorageInterface,), {})()
 
         assert "content_find_first" in dir(interface)
 
         missing_methods = []
 
         for meth_name in dir(interface):
             if meth_name.startswith("_"):
                 continue
             interface_meth = getattr(interface, meth_name)
             try:
                 concrete_meth = getattr(provenance_storage, meth_name)
             except AttributeError:
                 if not getattr(interface_meth, "deprecated_endpoint", False):
                     # The backend is missing a (non-deprecated) endpoint
                     missing_methods.append(meth_name)
                 continue
 
             expected_signature = inspect.signature(interface_meth)
             actual_signature = inspect.signature(concrete_meth)
 
             assert expected_signature == actual_signature, meth_name
 
         assert missing_methods == []
 
         # If all the assertions above succeed, then this one should too.
         # But there's no harm in double-checking.
         # And we could replace the assertions above by this one, but unlike
         # the assertions above, it doesn't explain what is missing.
         assert isinstance(provenance_storage, ProvenanceStorageInterface)
 
 
 def dircontent(
     data: Dict[str, Any],
     ref: Sha1Git,
     dir: Dict[str, Any],
     prefix: bytes = b"",
 ) -> Iterable[Tuple[Sha1Git, RelationData]]:
     content = {
         (
             entry["target"],
             RelationData(dst=ref, path=os.path.join(prefix, entry["name"])),
         )
         for entry in dir["entries"]
         if entry["type"] == "file"
     }
     for entry in dir["entries"]:
         if entry["type"] == "dir":
             child = next(
                 subdir
                 for subdir in data["directory"]
                 if subdir["id"] == entry["target"]
             )
             content.update(
                 dircontent(data, ref, child, os.path.join(prefix, entry["name"]))
             )
     return content
 
 
 def entity_add(
     storage: ProvenanceStorageInterface, entity: EntityType, ids: Set[Sha1Git]
 ) -> bool:
     now = datetime.now(tz=timezone.utc)
     if entity == EntityType.CONTENT:
         return storage.content_add({sha1: now for sha1 in ids})
     elif entity == EntityType.DIRECTORY:
         return storage.directory_add(
             {sha1: DirectoryData(date=now, flat=False) for sha1 in ids}
         )
     else:  # entity == EntityType.REVISION:
         return storage.revision_add(
             {sha1: RevisionData(date=None, origin=None) for sha1 in ids}
         )
 
 
 def relation_add_and_compare_result(
     storage: ProvenanceStorageInterface,
     relation: RelationType,
     data: Dict[Sha1Git, Set[RelationData]],
 ) -> None:
     # Source, destinations and locations must be added in advance.
     src, *_, dst = relation.value.split("_")
     srcs = {sha1 for sha1 in data}
     if src != "origin":
         assert entity_add(storage, EntityType(src), srcs)
     dsts = {rel.dst for rels in data.values() for rel in rels}
     if dst != "origin":
         assert entity_add(storage, EntityType(dst), dsts)
     if storage.with_path():
         assert storage.location_add(
             {rel.path for rels in data.values() for rel in rels if rel.path is not None}
         )
 
     assert data
     assert storage.relation_add(relation, data)
 
     for src_sha1 in srcs:
         relation_compare_result(
             storage.relation_get(relation, [src_sha1]),
             {src_sha1: data[src_sha1]},
             storage.with_path(),
         )
     for dst_sha1 in dsts:
         relation_compare_result(
             storage.relation_get(relation, [dst_sha1], reverse=True),
             {
                 src_sha1: {
                     RelationData(dst=dst_sha1, path=rel.path)
                     for rel in rels
                     if dst_sha1 == rel.dst
                 }
                 for src_sha1, rels in data.items()
                 if dst_sha1 in {rel.dst for rel in rels}
             },
             storage.with_path(),
         )
     relation_compare_result(
         storage.relation_get_all(relation), data, storage.with_path()
     )
 
 
 def relation_compare_result(
     computed: Dict[Sha1Git, Set[RelationData]],
     expected: Dict[Sha1Git, Set[RelationData]],
     with_path: bool,
 ) -> None:
     assert {
         src_sha1: {
             RelationData(dst=rel.dst, path=rel.path if with_path else None)
             for rel in rels
         }
         for src_sha1, rels in expected.items()
     } == computed