diff --git a/ardumont/nixguix/analyze-result.py b/ardumont/nixguix/analyze-result.py
index e446642..680d5d8 100644
--- a/ardumont/nixguix/analyze-result.py
+++ b/ardumont/nixguix/analyze-result.py
@@ -1,59 +1,73 @@
 # Copyright (C) 2022 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import re
 from collections import defaultdict
 from pathlib import Path
 from typing import Dict, Iterable
+from urllib.parse import urlparse
 
 import click
 
 DATASET_DIR = "/var/tmp/nixguix/dataset"
 
 
+PATTERN_ONLY_VERSION = re.compile(r"(v*[0-9]+[.])([0-9]+[.]*)+")
+
+PATTERN_ENDING_VERSION = re.compile(r"(.*)([0-9]+[\.]*)+$")
+
+
 def read_dataset(filepath: str) -> Iterable[str]:
     """Read the dataset file."""
     with open(filepath, "r") as f:
         for line in f:
             yield line.rstrip()
 
 
 def group_by_extensions(data: Iterable[str]) -> Dict[str, int]:
     """Group the data read by extensions."""
     extensions: Dict[str, int] = defaultdict(int)
     for url in data:
-        suffixes = Path(url).suffixes
+        urlparsed = urlparse(url)
+        suffixes = Path(urlparsed.path).suffixes
         if suffixes:
             if ".patch" in suffixes or ".patch" in suffixes[-1]:
                 key = ".patch"
             elif ".git" in suffixes or ".git" in suffixes[-1]:
                 key = ".git"
             elif ".cgi" in suffixes or ".cgi" in suffixes[-1]:
                 key = ".cgi"
             else:
-                key = suffixes[-1]
+                name = Path(urlparsed.path).name
+                if PATTERN_ONLY_VERSION.match(name):
+                    key = "only-version-should-be-tarball"
+                elif PATTERN_ENDING_VERSION.match(name):
+                    key = "ending-version-ok"
+                else:
+                    key = suffixes[-1]
             extensions[key] += 1
     return dict(extensions)
 
 
 @click.command()
 @click.option("--dataset-date", help="The date of the extracted dataset e.g. 20221025")
 @click.option(
     "--dataset", "datasets", multiple=True, type=click.Choice(["guix", "nixpkgs"])
 )
 @click.option("--obj-type", type=click.Choice(["contents", "directories"]))
 def main(dataset_date, datasets, obj_type):
     """For each dataset required, read and group by extensions the dataset."""
     for dataset_name in datasets:
         filepath = f"{DATASET_DIR}/{dataset_date}/list-{obj_type}-{dataset_name}.csv"
         data = read_dataset(filepath)
         print(f"dataset <{dataset_name}> with type {obj_type}: {filepath}\n")
         extensions = group_by_extensions(data)
         from pprint import pprint
 
         pprint(extensions)
         print()
 
 
 if __name__ == "__main__":
     main()