diff --git a/ardumont/sentry/.gitignore b/ardumont/sentry/.gitignore
new file mode 100644
index 0000000..a6c57f5
--- /dev/null
+++ b/ardumont/sentry/.gitignore
@@ -0,0 +1 @@
+*.json
diff --git a/ardumont/sentry/analyse_hash_collision.py b/ardumont/sentry/analyse_hash_collision.py
new file mode 100644
index 0000000..7482855
--- /dev/null
+++ b/ardumont/sentry/analyse_hash_collision.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import ast
+import json
+
+from collections import defaultdict
+from typing import Any, Dict, List, Tuple
+
+import click
+
+from swh.model.hashutil import hash_to_hex, DEFAULT_ALGORITHMS
+
+
+def import_data(f):
+    return json.loads(open(f).read())
+
+
+def content_get_metadata(
+        content_ids: List[bytes]) -> Dict[bytes, Dict[str, Any]]:
+    """Retrieve contents from the storage
+
+    """
+    from swh.storage import get_storage
+    storage = get_storage(
+        cls='remote', url='http://uffizi.internal.softwareheritage.org:5002')
+    contents = storage.content_get_metadata(content_ids)
+    result = {}
+    for hash_id, all_contents in contents.items():
+        count = len(all_contents)
+        if count > 1:
+            click.echo(f'hash_id {hash_id} has multiple entries: {count}')
+        # to ease comparison:
+        # - take only 1 of the contents (most cases i guess)
+        # - drop the length
+        hashes = all_contents[0]
+        hashes.pop('length', None)
+        result[hash_id] = hashes
+
+    return result
+
+
+def content_hex_hashes(content: Dict[str, bytes]) -> Dict[str, str]:
+    return {
+        algo: hash_to_hex(content[algo]) for algo in DEFAULT_ALGORITHMS
+    }
+
+
+@click.command()
+@click.option('--data-file', default='hash-collision-all-sentry-id-1438.json')
+def main(data_file):
+    data = import_data(data_file)
+
+    # how many collisions skipped due to incomplete message
+    summary_skipped = 0
+    # how many collisions
+    summary_count = defaultdict(int)
+    # one hash ends up with multiple collisions
+    detailed_collisions = defaultdict(list)
+    count = 0
+    for entry_id, entry in data.items():
+        message = entry['message']
+        count += 1
+        if message.endswith('...'):
+            # TOOD: Find a way to retrieve the full message
+            # because it can't be parsed for now
+            summary_skipped += 1
+            # incomplete message, skipping for now
+            continue
+
+        msg: Tuple[str, bytes, Dict[str, bytes]] = ast.literal_eval(message)
+        algo, hash_id, colliding_contents = msg
+        assert algo == 'sha1'
+
+        summary_count[hash_id] += 1
+
+        # take only 1 content, on previous iteration, the list was multiple
+        # occurences of the same hash
+        # TODO: ensure it remains true
+        detailed_collisions[hash_id] = colliding_contents[0]
+
+    # Retrieve the contents from storage to compare
+    full_contents = content_get_metadata(list(summary_count.keys()))
+
+    count_collisions = 0
+    collisions = {}
+    for hash_id, stored_content in full_contents.items():
+        collision_content = content_hex_hashes(detailed_collisions[hash_id])
+        stored_content = content_hex_hashes(stored_content)
+
+        if collision_content != stored_content:
+            count_collisions += 1
+            hex_hash_id = hash_to_hex(hash_id)
+            collisions[hex_hash_id] = [stored_content, collision_content]
+
+    summary = {
+        'total-collisions-raises-in-sentry': count,
+        'total-real-collisions-on-sha1': count_collisions,
+        'detailed-collisions': collisions,
+    }
+
+    click.echo(json.dumps(summary))
+
+
+if __name__ == '__main__':
+    main()