diff --git a/swh/provenance/tests/data/generate_repo.py b/swh/provenance/tests/data/generate_repo.py
--- a/swh/provenance/tests/data/generate_repo.py
+++ b/swh/provenance/tests/data/generate_repo.py
@@ -61,6 +61,19 @@
     )
 
     for rev_d in repo_desc:
+        parents = rev_d.get("parents")
+        if parents:
+            # move at the proper (first) parent position, if any
+            check_call(["git", "checkout", parents[0]], stdout=PIPE)
+
+        # give a branch name (the msg) to each commit to make it esier to
+        # navigate in history
+        check_call(["git", "checkout", "-b", rev_d["msg"]], stdout=PIPE)
+
+        if parents and len(parents) > 1:
+            # it's a merge
+            check_call(["git", "merge", "--no-commit", *parents[1:]], stdout=PIPE)
+
         clean_wd()
         for path, content in rev_d["content"].items():
             p = pathlib.Path(path)
@@ -78,6 +91,7 @@
                 "git",
                 "commit",
                 "--all",
+                "--allow-empty",
                 "-m",
                 rev_d["msg"],
             ],
@@ -92,7 +106,7 @@
 @click.option("-C", "--clean-output/--no-clean-output", default=False)
 def main(input_file, output_dir, clean_output):
     repo_desc = yaml.load(open(input_file))
-    if clean_output:
+    if clean_output and os.path.exists(output_dir):
         shutil.rmtree(output_dir)
     generate_repo(repo_desc, output_dir)
 
diff --git a/swh/provenance/tests/data/repo_with_merges.yaml b/swh/provenance/tests/data/repo_with_merges.yaml
new file mode 100644
--- /dev/null
+++ b/swh/provenance/tests/data/repo_with_merges.yaml
@@ -0,0 +1,73 @@
+# generate a git history with a multi-merge revision
+# *-.   R08
+# |\ \
+# | * | R07
+# | | |
+# | | * R06
+# | | |
+# * | | R05
+# | | |
+# * | | R04
+# |/  |
+# *   | R03
+# |  /
+# * /   R02
+# |/
+# *     R01
+# |
+# *     R00
+- msg: R00
+  date: 1000000000
+  content:
+    A/B/C/a: "content a"
+- msg: R01
+  date: 1000000010
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+- msg: R02
+  date: 1000000020
+  content:
+    A/C/a: "content a"
+    A/C/b: "content b"
+- msg: R03
+  date: 1000000030
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+- msg: R04
+  date: 1000000040
+  content:
+    A/C/a: "content a"
+    A/C/b: "content b"
+- msg: R05
+  date: 1000000050
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+    A/B/c: "content c"
+- msg: R06
+  parents:
+    - R01
+  date: 1000000005  # /!\ we add an earlier version of the 'b' file
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+- msg: R07
+  parents:
+    - R03
+  date: 1000000035  # /!\ we add an earlier version of the 'b' file
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+    A/B/c: "content c"
+- msg: R08
+  parents:
+    - R05
+    - R06
+    - R07
+  date: 1000000060
+  content:
+    A/B/C/a: "content a"
+    A/B/C/b: "content b"
+    A/B/c: "content c"