diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 /conf/graph-replayer.yml
 /conf/content-replayer.yml
+/env/tests.env
diff --git a/conf/web.yml b/conf/web.yml
--- a/conf/web.yml
+++ b/conf/web.yml
@@ -1,7 +1,7 @@
 storage:
   cls: remote
   url: http://storage:5002/
-  timeout: 5
+  timeout: 30
 
 objstorage:
   cls: remote
diff --git a/env/tests.env.template b/env/tests.env.template
new file mode 100644
--- /dev/null
+++ b/env/tests.env.template
@@ -0,0 +1,5 @@
+SWH_IMAGE_TAG=20220921-113342
+SWH_MIRROR_TEST_KAFKA_USERNAME=mirror-test-ro
+SWH_MIRROR_TEST_KAFKA_PASSWORD=SOME_INTERESTING_SECRET
+SWH_MIRROR_TEST_KAFKA_BROKER=broker1.journal.staging.swh.network:9093
+SWH_MIRROR_TEST_OBJSTORAGE_URL=https://swh-prod:SOME_INTERESTING_SECRET@objstorage.softwareheritage.org/
diff --git a/images/Dockerfile b/images/Dockerfile
--- a/images/Dockerfile
+++ b/images/Dockerfile
@@ -126,12 +126,3 @@
 
 COPY replayer/entrypoint.sh /
 ENTRYPOINT ["/entrypoint.sh"]
-
-
-###
-# Test image
-###
-FROM swh-base as swh-test
-
-COPY restore_kafka.py /
-#ENTRYPOINT ["python3", "-u", "/restore_kafka.py"]
diff --git a/mirror.yml b/mirror.yml
--- a/mirror.yml
+++ b/mirror.yml
@@ -6,6 +6,8 @@
     image: memcached
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
 
   storage-db:
     # the main storage database
@@ -19,6 +21,8 @@
         max_replicas_per_node: 1
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.storage-db == true
+    networks:
+      - swhtest-mirror
     command: ['-c', 'shared_buffers=4GB', '-c', 'effective_cache_size=4GB', '-c', 'random_page_cost=1.5', '-c', 'max_wal_size=4GB']
     environment:
       POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
@@ -44,6 +48,8 @@
         max_replicas_per_node: 1
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.web-db == true
+    networks:
+      - swhtest-mirror
     command: ['-c', 'shared_buffers=4GB', '-c', 'effective_cache_size=4GB', '-c', 'random_page_cost=1.5', '-c', 'max_wal_size=4GB']
     environment:
       POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
@@ -60,6 +66,8 @@
   web:
     # the web app; serves both the web navigation interface and the public web API
     image: softwareheritage/web:${SWH_IMAGE_TAG:-latest}
+    networks:
+      - swhtest-mirror
     configs:
       - source: web
         target: /etc/softwareheritage/config.yml
@@ -94,6 +102,8 @@
         # etc.) you want to remove this placement constraint
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.objstorage == true
+    networks:
+      - swhtest-mirror
     volumes:
       - "objstorage:/srv/softwareheritage/objects:rw,Z"
     configs:
@@ -116,6 +126,8 @@
       # an upgrade of the base image that comes with a database migration script
       # is upgraded in a consistent way
       replicas: 1
+    networks:
+      - swhtest-mirror
     configs:
       - source: storage
         target: /etc/softwareheritage/config.yml
@@ -137,6 +149,8 @@
 
   nginx:
     image: nginx
+    networks:
+      - swhtest-mirror
     configs:
       - source: nginx
         target: /etc/nginx/nginx.conf
@@ -147,6 +161,8 @@
 
   prometheus:
     image: prom/prometheus
+    networks:
+      - swhtest-mirror
     depends_on:
     - prometheus-statsd-exporter
     command:
@@ -166,6 +182,8 @@
 
   prometheus-statsd-exporter:
     image: prom/statsd-exporter
+    networks:
+      - swhtest-mirror
     command:
       - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml"
     configs:
@@ -181,6 +199,8 @@
 
   grafana:
     image: grafana/grafana
+    networks:
+      - swhtest-mirror
     depends_on:
     - prometheus
     environment:
@@ -215,6 +235,8 @@
         max_replicas_per_node: 1
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.redis == true
+    networks:
+      - swhtest-mirror
     command:
       - redis-server
       - --save 60 1
@@ -234,6 +256,8 @@
       replicas: 0
       restart_policy:
         condition: "none"
+    networks:
+      - swhtest-mirror
     env_file:
       - ./env/common-python.env
     environment:
@@ -252,6 +276,8 @@
     deploy:
       # do not start replayers by default; see above
       replicas: 0
+    networks:
+      - swhtest-mirror
     env_file:
       - ./env/common-python.env
     environment:
@@ -269,6 +295,8 @@
 
   amqp:
     image: rabbitmq:3.6-management
+    networks:
+      - swhtest-mirror
     ports:
       - 5072:5672
 
@@ -285,6 +313,8 @@
         max_replicas_per_node: 1
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.web-db == true
+    networks:
+      - swhtest-mirror
     command: ['-c', 'shared_buffers=4GB', '-c', 'effective_cache_size=4GB', '-c', 'random_page_cost=1.5', '-c', 'max_wal_size=4GB']
     environment:
       POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
@@ -302,6 +332,8 @@
     image: softwareheritage/base:${SWH_IMAGE_TAG:-latest}
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
     env_file:
       - ./env/common-python.env
     configs:
@@ -324,6 +356,8 @@
     image: softwareheritage/base:${SWH_IMAGE_TAG:-latest}
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
     env_file:
       - ./env/common-python.env
       - ./env/celery-worker.env
@@ -339,6 +373,8 @@
   # vault do really need someone to talk to via SMTP
   mailhog:
     image: mailhog/mailhog
+    networks:
+      - swhtest-mirror
 
 ### scheduler services
 
@@ -353,6 +389,8 @@
         max_replicas_per_node: 1
         constraints:
           - node.labels.org.softwareheritage.mirror.volumes.web-db == true
+    networks:
+      - swhtest-mirror
     command: ['-c', 'shared_buffers=4GB', '-c', 'effective_cache_size=4GB', '-c', 'random_page_cost=1.5', '-c', 'max_wal_size=4GB']
     environment:
       POSTGRES_PASSWORD_FILE: /run/secrets/postgres-password
@@ -370,6 +408,8 @@
     image: softwareheritage/base:${SWH_IMAGE_TAG:-latest}
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
     configs:
       - source: scheduler
         target: /etc/softwareheritage/config.yml
@@ -391,6 +431,8 @@
     image: softwareheritage/base:${SWH_IMAGE_TAG:-latest}
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
     configs:
       - source: scheduler
         target: /etc/softwareheritage/config.yml
@@ -412,6 +454,8 @@
     image: softwareheritage/base:${SWH_IMAGE_TAG:-latest}
     deploy:
       replicas: 1
+    networks:
+      - swhtest-mirror
     configs:
       - source: scheduler
         target: /etc/softwareheritage/config.yml
@@ -482,3 +526,9 @@
     file: conf/grafana/dashboards/content-replayer.json
   grafana-dashboards-backend-stats:
     file: conf/grafana/dashboards/backend-stats.json
+
+
+networks:
+  swhtest-mirror:
+    driver: overlay
+    attachable: true
diff --git a/pyproject.toml b/pyproject.toml
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,3 +10,9 @@
 line_length = 88
 force_sort_within_sections = true
 
+[tool.pytest.ini_options]
+env_files = 'env/tests.env'
+log_cli = true
+log_cli_level = "INFO"
+log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
+log_cli_date_format = "%Y-%m-%d %H:%M:%S"
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,5 @@
+pytest
+pytest-testinfra
+pytest-dotenv
+requests
+msgpack
diff --git a/tests/README.md b/tests/README.md
--- a/tests/README.md
+++ b/tests/README.md
@@ -76,6 +76,7 @@
     for the content replication; it would typically include access credentials,
     e.g. `https://login:password@objstorage.softwareheritage.org/`,
   - `SWH_IMAGE_TAG`: the docker image tag to be tested.
+  You can copy the template `env/tests.env.template` to `env/tests.env` to set them.
 
 - the `softwareheritage/base`, `softwareheritage/web`,
   `softwareheritage/replayer` and `softwareheritage/test` images must be built
@@ -130,3 +131,63 @@
 ```
 
 Note the test takes quite some time to execute, so be patient.
+
+
+Troubleshooting
+===============
+
+### Watch out for stale services
+
+If something goes wrong, you might want to check if you have any remaining Docker services setup:
+
+    docker service ls
+
+If you want to shut them all down, you can use:
+
+    docker service rm $(docker service ls --format '{{.Name}}')
+
+### I want a shell!
+
+To run a shell in an image in the Swarm context, use the following:
+
+    docker run --network=swhtest_mirror0_swhtest-mirror -ti --env-file env/common-python.env --env STATSD_TAGS="role:content-replayer,hostname:${HOSTNAME}"  -v /tmp/pytest-of-lunar/pytest-current/mirrorcurrent/conf/content-replayer.yml:/etc/softwareheritage/config.yml  softwareheritage/replayer:20220915-163058  shell
+
+### Some containers are never started
+
+If you notice that some container stay at 0 replicas in `docker service ls`, it probably means the rule for services, as described in `mirror.yml`, cannot be fulfilled by the current nodes part of the swarm.
+
+Most likely, you are missing the labels locating the volumes needed by the containers. You might want to run:
+
+    docker node update $HOSTNAME \
+      --label-add org.softwareheritage.mirror.volumes.storage-db=true \
+      --label-add org.softwareheritage.mirror.volumes.web-db=true \
+      --label-add org.softwareheritage.mirror.volumes.objstorage=true \
+      --label-add org.softwareheritage.mirror.volumes.redis=true
+
+### SWH services keep restarting
+
+If SWH services keep restarting, look at the service logs, but don’t forget to look at the logs for Docker service (using `journalctl -u docker.service` for example).
+
+If you see:
+
+    error="task: non-zero exit (124)"
+
+It means that `wait-for-it` has reached its timeout. You should double check the network configuration, including the firewall.
+
+### Failure while checking the Vault service
+
+If the test fail with the following exception:
+
+~~~
+>           assert isinstance(tarfilecontent, bytes)
+E           assert False
+E            +  where False = isinstance({'exception': 'NotFoundExc', 'reason': 'Cooked archive for swh:1:dir:c1695cab57e5bfe64ea4b0900c4575bf7240483d not found.', 'traceback': 'Traceback (most recent call last):\n  File "/usr/lib/python3/dist-packages/rest_framework/views.py", line 492, in dispatch\n    response = handler(request, *args, **kwargs)\n  File "/usr/lib/python3/dist-packages/rest_framework/decorators.py", line 54, in handler\n    return func(*args, **kwargs)\n  File "/usr/lib/python3/dist-pac→
+
+…/swh-mirror/tests/test_graph_replayer.py:423: AssertionError
+~~~
+
+It is most likely because of a stale database. Remove the vault volume using:
+
+    docker volume rm swhtest_mirror0_vault-db
+
+In general, the test has been designed to be run on empty volumes.
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -3,6 +3,7 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import logging
 from os import chdir, environ
 from pathlib import Path
 from shutil import copy, copytree
@@ -23,6 +24,8 @@
 OBJSTORAGE_URL = environ["SWH_MIRROR_TEST_OBJSTORAGE_URL"]
 WFI_TIMEOUT = 60
 
+LOGGER = logging.getLogger(__name__)
+
 
 def pytest_addoption(parser, pluginmanager):
     parser.addoption(
@@ -59,7 +62,7 @@
     # start the whole cluster
     stack_name = f"swhtest_{tmp_path.name}"
 
-    print("Create missing secrets")
+    LOGGER.info("Create missing secrets")
     existing_secrets = [
         line.strip()
         for line in docker_host.check_output(
@@ -69,11 +72,11 @@
     for srv in ("storage", "web", "vault", "scheduler"):
         secret = f"swh-mirror-{srv}-db-password"
         if secret not in existing_secrets:
-            print("Creating secret {secret}")
+            LOGGER.info(f"Creating secret {secret}")
             docker_host.check_output(
                 f"echo not-so-secret | docker secret create {secret} -"
             )
-    print("Remove config objects (if any)")
+    LOGGER.info("Remove config objects (if any)")
     existing_configs = [
         line.strip()
         for line in docker_host.check_output(
@@ -84,17 +87,17 @@
         if cfg.startswith(f"{stack_name}_"):
             docker_host.check_output(f"docker config rm {cfg}")
 
-    print(f"Deploy docker stack {stack_name}")
+    LOGGER.info(f"Deploy docker stack {stack_name}")
     docker_host.check_output(f"docker stack deploy -c mirror.yml {stack_name}")
 
     yield stack_name
 
     # breakpoint()
     if not request.config.getoption("keep_stack"):
-        print(f"Remove stack {stack_name}")
+        LOGGER.info(f"Remove stack {stack_name}")
         docker_host.check_output(f"docker stack rm {stack_name}")
         # wait for services to be down
-        print(f"Wait for all services of {stack_name} to be down")
+        LOGGER.info(f"Wait for all services of {stack_name} to be down")
         while docker_host.check_output(
             "docker service ls --format {{.Name}} "
             f"--filter label=com.docker.stack.namespace={stack_name}"
@@ -103,8 +106,8 @@
 
         # give a bit of time to docker to sync the state of service<->volumes
         # relations so the next step runs ok
-        time.sleep(1)
-        print(f"Remove volumes of stack {stack_name}")
+        time.sleep(20)
+        LOGGER.info(f"Remove volumes of stack {stack_name}")
         for volume in docker_host.check_output(
             "docker volume ls --format {{.Name}} "
             f"--filter label=com.docker.stack.namespace={stack_name}"
@@ -113,6 +116,6 @@
             try:
                 docker_host.check_output(f"docker volume rm {volume}")
             except AssertionError:
-                print(f"Failed to remove volume {volume}")
+                LOGGER.error(f"Failed to remove volume {volume}")
 
     chdir(cwd)
diff --git a/tests/test_graph_replayer.py b/tests/test_graph_replayer.py
--- a/tests/test_graph_replayer.py
+++ b/tests/test_graph_replayer.py
@@ -14,7 +14,7 @@
 import msgpack
 import requests
 
-from .conftest import KAFKA_GROUPID, KAFKA_PASSWORD, KAFKA_USERNAME
+from .conftest import KAFKA_GROUPID, KAFKA_PASSWORD, KAFKA_USERNAME, LOGGER
 
 SERVICES = {
     "{}_content-replayer": "0/0",
@@ -37,8 +37,8 @@
     "{}_scheduler-listener": "1/1",
     "{}_scheduler-runner": "1/1",
 }
-ATTEMPTS = 200
-DELAY = 0.5
+ATTEMPTS = 600
+DELAY = 1
 SCALE = 2
 API_URL = "http://127.0.0.1:5081/api/1"
 
@@ -55,16 +55,16 @@
 
 
 def check_running_services(host, stack, services):
-    print("Waiting for service", services)
+    LOGGER.info("Waiting for services %s", services)
     mirror_services_ = {}
     for i in range(ATTEMPTS):
         mirror_services = running_services(host, stack)
         mirror_services = {k: v for k, v in mirror_services.items() if k in services}
         if mirror_services == services:
-            print("Got them all!")
+            LOGGER.info("Got them all!")
             break
         if mirror_services != mirror_services_:
-            print("Not yet there", mirror_services)
+            LOGGER.info("Not yet there %s", mirror_services)
             mirror_services_ = mirror_services
         time.sleep(0.5)
     return mirror_services == services
@@ -98,8 +98,7 @@
     try:
         data = get(content["data_url"])
     except Exception as exc:
-        print("Failed loading", content["data_url"])
-        print(exc)
+        LOGGER.error("Failed loading %s", content["data_url"], exc_info=exc)
         raise
     assert len(data) == content["length"]
     assert sha1(data).hexdigest() == content["checksums"]["sha1"]
@@ -278,7 +277,7 @@
     partitions = set()
 
     def on_assign(cons, parts):
-        print("assignment", parts)
+        LOGGER.info("assignment %s", parts)
         for p in parts:
             partitions.add(p.partition)
 
@@ -303,9 +302,12 @@
                 # Proper message
                 k = msgpack.unpackb(msg.key())
                 v = msgpack.unpackb(msg.value())
-                print(
-                    "%% %s [%d] at offset %d with key %s:\n"
-                    % (msg.topic(), msg.partition(), msg.offset(), k)
+                LOGGER.info(
+                    "%% %s [%d] at offset %d with key %s:\n",
+                    msg.topic(),
+                    msg.partition(),
+                    msg.offset(),
+                    k,
                 )
                 assert k == v["origin"]
                 stats[k] = v
@@ -321,7 +323,7 @@
     # run replayer services
     for service_type in ("content", "graph"):
         service = f"{mirror_stack}_{service_type}-replayer"
-        print(f"Scale {service} to 1")
+        LOGGER.info(f"Scale {service} to 1")
         host.check_output(f"docker service scale -d {service}=1")
         if not check_running_services(host, mirror_stack, {service: "1/1"}):
             breakpoint()
@@ -330,7 +332,7 @@
         )
         assert len(logs) == 1
 
-        print(f"Scale {service} to {SCALE}")
+        LOGGER.info(f"Scale {service} to {SCALE}")
         host.check_output(f"docker service scale -d {service}={SCALE}")
         check_running_services(host, mirror_stack, {service: f"{SCALE}/{SCALE}"})
         logs = wait_for_log_entry(
@@ -339,14 +341,14 @@
         assert len(logs) == SCALE
 
         # wait for the replaying to be done (stop_on_oef is true)
-        print(f"Wait for {service} to be done")
+        LOGGER.info(f"Wait for {service} to be done")
         logs = wait_for_log_entry(host, service, "Done.", SCALE)
         # >= SCALE below because replayer services may have been restarted
         # (once done) before we scale them to 0
         if not (len(logs) >= SCALE):
             breakpoint()
         assert len(logs) >= SCALE
-        print(f"Scale {service} to 0")
+        LOGGER.info(f"Scale {service} to 0")
         check_running_services(host, mirror_stack, {service: f"0/{SCALE}"})
 
         # TODO: check there are no error reported in redis after the replayers are done
@@ -355,7 +357,7 @@
     if False:
         # check replicated archive is in good shape
         expected_stats = get_expected_stats()
-        print("Check replicated archive")
+        LOGGER.info("Check replicated archive")
         # seems the graph replayer is OK, let's check the archive can tell something
         expected_origins = sorted(expected_stats)
         assert len(origins) == len(expected_origins)
@@ -365,16 +367,16 @@
             timing_stats.clear()
             assert origin == expected["origin"]
             origin_stats, swhids = get_stats(origin)
-            print(origin_stats)
-            print(f"{len(timing_stats)} REQS took {sum(timing_stats)}s")
+            LOGGER.info("%s", origin_stats)
+            LOGGER.info(f"{len(timing_stats)} REQS took {sum(timing_stats)}s")
             assert origin_stats == expected
-            print(f"{origin} is OK")
+            LOGGER.info(f"{origin} is OK")
 
     # test the vault service
     cooks = []
     # first start all the cookings
     for origin in origins:
-        print(f"Cook HEAD for {origin['url']}")
+        LOGGER.info(f"Cook HEAD for {origin['url']}")
         visit = get(
             f"{API_URL}/origin/{origin['url']}/visit/latest/?require_snapshot=true"
         )
@@ -400,7 +402,7 @@
             else:
                 breakpoint()
 
-        print(f"Directory is {swhid}")
+        LOGGER.info(f"Directory is {swhid}")
         cook = post(f"{API_URL}/vault/flat/{swhid}/")
         assert cook
         assert cook["status"] in ("new", "pending")
@@ -421,9 +423,9 @@
         assert all(fname.startswith(swhid) for fname in filelist)
         for path in filelist[1:]:
             tarinfo = tarfileobj.getmember(path)
-            expected = get(
-                f"{API_URL}/directory/{quote(path[10:])}"
-            )  # remove the 'swh:1:dir:' part
+            url = f"{API_URL}/directory/{quote(path[10:])}"
+            expected = get(url)  # remove the 'swh:1:dir:' part
+            LOGGER.info(f"Retrieved from storage: {url} → {expected}")
             if expected["type"] == "dir":
                 assert tarinfo.isdir()
             elif expected["type"] == "file":