diff --git a/ardumont/nixguix/analyze-result.py b/ardumont/nixguix/analyze-result.py
new file mode 100644
index 0000000..29f6a42
--- /dev/null
+++ b/ardumont/nixguix/analyze-result.py
@@ -0,0 +1,59 @@
+# Copyright (C) 2022 the Software Heritage developers
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List
+
+import click
+
+DATASET_DIR = "/var/tmp/nixguix/dataset"
+
+
+def read_dataset(date_dir: str, obj_type: str, dataset_name: str) -> List[str]:
+    """Read the dataset file."""
+    filepath = f"{DATASET_DIR}/{date_dir}/list-contents-{dataset_name}.csv"
+    with open(filepath, "r") as f:
+        data = [line.rstrip() for line in f]
+    return data
+
+
+def group_by_extensions(data: List[str]) -> Dict[str, int]:
+    """Group the data read by extensions."""
+    extensions: Dict[str, int] = defaultdict(int)
+    for url in data:
+        suffixes = Path(url).suffixes
+        if suffixes:
+            if ".patch" in suffixes or ".patch" in suffixes[-1]:
+                key = ".patch"
+            elif ".git" in suffixes or ".git" in suffixes[-1]:
+                key = ".git"
+            elif ".cgi" in suffixes or ".cgi" in suffixes[-1]:
+                key = ".cgi"
+            else:
+                key = suffixes[-1]
+            extensions[key] += 1
+    return dict(extensions)
+
+
+@click.command()
+@click.option("--dataset-date", help="The date of the extracted dataset e.g. 20221025")
+@click.option(
+    "--dataset", "datasets", multiple=True, type=click.Choice(["guix", "nixpkgs"])
+)
+@click.option("--obj-type", type=click.Choice(["contents", "directories"]))
+def main(dataset_date, datasets, obj_type):
+    """For each dataset required, read and group by extensions the dataset."""
+    for dataset_name in datasets:
+        data = read_dataset(dataset_date, obj_type, dataset_name)
+        print(f"dataset: {dataset_name}\n")
+        extensions = group_by_extensions(data)
+        from pprint import pprint
+
+        pprint(extensions)
+        print()
+
+
+if __name__ == "__main__":
+    main()