diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py
--- a/swh/lister/gnu/lister.py
+++ b/swh/lister/gnu/lister.py
@@ -33,22 +33,23 @@
 
         .. code-block:: python
 
-            args: ['https://ftp.gnu.org/gnu/3dldf/']
+            args:
             kwargs: {
-                'tarballs': [{
-                    'archive': 'https://...',
+                'url': 'https://ftp.gnu.org/gnu/3dldf/',
+                'artifacts': [{
+                    'url': 'https://...',
                     'time': 1071002600,
                     'length': 128},
                 ...
             ]}
 
         """
-        tarballs = self.gnu_tree.artifacts[origin_url]
+        artifacts = self.gnu_tree.artifacts[origin_url]
         return utils.create_task_dict(
             'load-%s' % origin_type,
             kwargs.get('policy', 'oneshot'),
-            origin_url,
-            tarballs=tarballs)
+            url=origin_url,
+            artifacts=artifacts)
 
     def safely_issue_request(self, identifier):
         """Bypass the implementation. It's now the GNUTree which deals with
diff --git a/swh/lister/gnu/tests/test_lister.py b/swh/lister/gnu/tests/test_lister.py
--- a/swh/lister/gnu/tests/test_lister.py
+++ b/swh/lister/gnu/tests/test_lister.py
@@ -9,7 +9,7 @@
 logger = logging.getLogger(__name__)
 
 
-def test_lister_no_page_check_results(swh_listers, requests_mock_datadir):
+def test_gnu_lister(swh_listers, requests_mock_datadir):
     lister = swh_listers['gnu']
 
     lister.run()
@@ -21,21 +21,23 @@
         assert row['type'] == 'load-tar'
         # arguments check
         args = row['arguments']['args']
-        assert len(args) == 1
+        assert len(args) == 0
 
-        url = args[0]
+        # kwargs
+        kwargs = row['arguments']['kwargs']
+        assert set(kwargs.keys()) == {'url', 'artifacts'}
+
+        url = kwargs['url']
         assert url.startswith('https://ftp.gnu.org')
 
         url_suffix = url.split('https://ftp.gnu.org')[1]
         assert 'gnu' in url_suffix or 'old-gnu' in url_suffix
 
-        # kwargs
-        kwargs = row['arguments']['kwargs']
-        assert list(kwargs.keys()) == ['tarballs']
-
-        tarballs = kwargs['tarballs']
-        # check the tarball's structure
-        tarball = tarballs[0]
-        assert set(tarball.keys()) == set(['archive', 'length', 'time'])
+        artifacts = kwargs['artifacts']
+        # check the artifact's structure
+        artifact = artifacts[0]
+        assert set(artifact.keys()) == {
+            'url', 'length', 'time', 'filename', 'version'
+        }
 
         assert row['policy'] == 'oneshot'
diff --git a/swh/lister/gnu/tests/test_tree.py b/swh/lister/gnu/tests/test_tree.py
--- a/swh/lister/gnu/tests/test_tree.py
+++ b/swh/lister/gnu/tests/test_tree.py
@@ -9,7 +9,8 @@
 
 from os import path
 from swh.lister.gnu.tree import (
-    GNUTree, find_artifacts, check_filename_is_archive, load_raw_data
+    GNUTree, find_artifacts, check_filename_is_archive, load_raw_data,
+    get_version
 )
 
 
@@ -69,14 +70,18 @@
 
     assert tree_json.artifacts['https://ftp.gnu.org/old-gnu/zlibc/'] == [
         {
-            'archive': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz',  # noqa
+            'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9b.tar.gz',  # noqa
             'length': 90106,
-            'time': 857980800
+            'time': 857980800,
+            'filename': 'zlibc-0.9b.tar.gz',
+            'version': '0.9b',
         },
         {
-            'archive': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz',  # noqa
+            'url': 'https://ftp.gnu.org/old-gnu/zlibc/zlibc-0.9e.tar.gz',  # noqa
             'length': 89625,
-            'time': 860396400
+            'time': 860396400,
+            'filename': 'zlibc-0.9e.tar.gz',
+            'version': '0.9e',
         }
     ]
 
@@ -93,38 +98,46 @@
 
 
 def test_find_artifacts_small_sample(datadir):
-    expected_tarballs = [
+    expected_artifacts = [
         {
-            'archive': '/root/artanis/artanis-0.2.1.tar.bz2',
+            'url': '/root/artanis/artanis-0.2.1.tar.bz2',
             'time': 1495205979,
             'length': 424081,
+            'version': '0.2.1',
+            'filename': 'artanis-0.2.1.tar.bz2',
         },
         {
-            'archive': '/root/xboard/winboard/winboard-4_0_0-src.zip',  # noqa
+            'url': '/root/xboard/winboard/winboard-4_0_0-src.zip',  # noqa
             'time': 898422900,
-            'length': 1514448
+            'length': 1514448,
+            'version': '4_0_0-src',
+            'filename': 'winboard-4_0_0-src.zip',
         },
         {
-            'archive': '/root/xboard/xboard-3.6.2.tar.gz',  # noqa
+            'url': '/root/xboard/xboard-3.6.2.tar.gz',  # noqa
             'time': 869814000,
             'length': 450164,
+            'version': '3.6.2',
+            'filename': 'xboard-3.6.2.tar.gz',
         },
         {
-            'archive': '/root/xboard/xboard-4.0.0.tar.gz',  # noqa
+            'url': '/root/xboard/xboard-4.0.0.tar.gz',  # noqa
             'time': 898422900,
             'length': 514951,
+            'version': '4.0.0',
+            'filename': 'xboard-4.0.0.tar.gz',
         },
     ]
 
     file_structure = json.load(open(path.join(datadir, 'tree.min.json')))
-    actual_tarballs = find_artifacts(file_structure, '/root/')
-    assert actual_tarballs == expected_tarballs
+    actual_artifacts = find_artifacts(file_structure, '/root/')
+    assert actual_artifacts == expected_artifacts
 
 
 def test_find_artifacts(datadir):
     file_structure = json.load(open(path.join(datadir, 'tree.json')))
-    actual_tarballs = find_artifacts(file_structure, '/root/')
-    assert len(actual_tarballs) == 42 + 3  # tar + zip
+    actual_artifacts = find_artifacts(file_structure, '/root/')
+    assert len(actual_artifacts) == 42 + 3  # tar + zip
 
 
 def test_check_filename_is_archive():
@@ -133,3 +146,61 @@
 
     for ext in ['abc.tar.gz.sig', 'abc', 'something.zip2', 'foo.tar.']:
         assert check_filename_is_archive(ext) is False
+
+
+def test_get_version():
+    """From url to branch name should yield something relevant
+
+    """
+    for url, expected_branchname in [
+            ('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
+            ('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
+            ('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
+            ('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
+            ('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
+            ('https://ftp.org/gnu/aris-w32.zip', 'w32'),
+            ('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
+            ('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
+            ('https://ftp.org/gnu/crypto-build-demo.tar.gz',
+             'crypto-build-demo'),
+            ('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
+             'clue+clio+xit.clisp'),
+            ('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
+             'clue+clio.for-pcl'),
+            ('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
+             'hppa2.0-hp-hpux10.20'),
+            ('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
+            ('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
+            ('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
+            ('clisp-powerpc-unknown-linuxlibc6.tar.gz',
+             'powerpc-unknown-linuxlibc6'),
+
+            ('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
+            ('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
+            ('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
+            ('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
+             'sparc-sun-sunos4.1.3_U1'),
+            ('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
+             '2.25.1-powerpc-apple-MacOSX'),
+            ('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
+             '2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
+            ('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
+             '2.27-i686-unknown-Linux-2.2.19'),
+            ('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
+             '2.28-i386-i386-freebsd-4.3-RELEASE'),
+            ('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
+             '2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
+            ('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
+             '2.29-i386-i386-freebsd-4.6-STABLE'),
+            ('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
+             '2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
+            ('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
+             '2.5.3-ansi-japi-xdr.20030701_mingw32'),
+            ('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
+            ('sather-logo_images.tar.gz', 'sather-logo_images'),
+            ('sather-specification-000328.html.tar.gz', '000328.html')
+
+    ]:
+        actual_branchname = get_version(url)
+
+        assert actual_branchname == expected_branchname
diff --git a/swh/lister/gnu/tree.py b/swh/lister/gnu/tree.py
--- a/swh/lister/gnu/tree.py
+++ b/swh/lister/gnu/tree.py
@@ -7,15 +7,112 @@
 import json
 import logging
 import requests
+import re
 
+from os import path
 from pathlib import Path
-from typing import Dict, Tuple, List
+from typing import Any, Dict, List, Mapping, Tuple
 from urllib.parse import urlparse
 
 
 logger = logging.getLogger(__name__)
 
 
+# to recognize existing naming pattern
+extensions = [
+    'zip',
+    'tar',
+    'gz', 'tgz',
+    'bz2', 'bzip2',
+    'lzma', 'lz',
+    'xz',
+    'Z',
+]
+
+version_keywords = [
+    'cygwin_me',
+    'w32', 'win32', 'nt', 'cygwin', 'mingw',
+    'latest', 'alpha', 'beta',
+    'release', 'stable',
+    'hppa',
+    'solaris', 'sunos', 'sun4u', 'sparc', 'sun',
+    'aix', 'ibm', 'rs6000',
+    'i386', 'i686',
+    'linux', 'redhat', 'linuxlibc',
+    'mips',
+    'powerpc', 'macos', 'apple', 'darwin', 'macosx', 'powermacintosh',
+    'unknown',
+    'netbsd', 'freebsd',
+    'sgi', 'irix',
+]
+
+# Match a filename into components.
+#
+# We use Debian's release number heuristic: A release number starts
+# with a digit, and is followed by alphanumeric characters or any of
+# ., +, :, ~ and -
+#
+# We hardcode a list of possible extensions, as this release number
+# scheme would match them too... We match on any combination of those.
+#
+# Greedy matching is done right to left (we only match the extension
+# greedily with +, software_name and release_number are matched lazily
+# with +? and *?).
+
+pattern = r'''
+^
+(?:
+    # We have a software name and a release number, separated with a
+    # -, _ or dot.
+    (?P<software_name1>.+?[-_.])
+    (?P<release_number>(%(vkeywords)s|[0-9][0-9a-zA-Z_.+:~-]*?)+)
+|
+    # We couldn't match a release number, put everything in the
+    # software name.
+    (?P<software_name2>.+?)
+)
+(?P<extension>(?:\.(?:%(extensions)s))+)
+$
+''' % {
+    'extensions': '|'.join(extensions),
+    'vkeywords': '|'.join('%s[-]?' % k for k in version_keywords),
+}
+
+
+def get_version(uri: str) -> str:
+    """Extract branch name from tarball uri
+
+    Args:
+        uri (str): Tarball URI
+
+    Returns:
+        Version detected
+
+    Example:
+        For uri = https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz
+
+        >>> get_version(uri)
+        '0.2.0'
+
+        For uri = 8sync-0.3.0.tar.gz
+
+        >>> get_version(uri)
+        '0.3.0'
+
+    """
+    filename = path.split(uri)[-1]
+    m = re.match(pattern, filename,
+                 flags=re.VERBOSE | re.IGNORECASE)
+    if m:
+        d = m.groupdict()
+        if d['software_name1'] and d['release_number']:
+            return d['release_number']
+        if d['software_name2']:
+            return d['software_name2']
+
+    return ''
+
+
 def load_raw_data(url: str) -> List[Dict]:
     """Load the raw json from the tree.json.gz
 
@@ -99,7 +196,8 @@
         return projects, artifacts
 
 
-def find_artifacts(filesystem: List[Dict], url: str) -> List[Dict]:
+def find_artifacts(
+        filesystem: List[Dict], url: str) -> List[Mapping[str, Any]]:
     """Recursively list artifacts present in the folder and subfolders for a
     particular package url.
 
@@ -111,21 +209,33 @@
         url: URL of the corresponding package
 
     Returns
-        List of tarball urls and their associated metadata (time, length).
-        For example:
+        List of tarball urls and their associated metadata (time, length,
+        etc...). For example:
 
         .. code-block:: python
 
             [
-                {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
-                 'time': 1071002600,
-                 'length': 543},
-                {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
-                 'time': 1071078759,
-                 'length': 456},
-                {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.tar.gz',
-                 'time': 1074278633,
-                 'length': 251},
+                {
+                    'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
+                    'time': 1071002600,
+                    'filename': '3DLDF-1.1.3.tar.gz',
+                    'version': '1.1.3',
+                    'length': 543
+                },
+                {
+                    'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
+                    'time': 1071078759,
+                    'filename: '3DLDF-1.1.4.tar.gz',
+                    'version': '1.1.4',
+                    'length': 456
+                },
+                {
+                    'url': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.tar.gz',
+                    'time': 1074278633,
+                    'filename': '3DLDF-1.1.5.tar.gz',
+                    'version': '1.1.5'
+                    'length': 251
+                },
                 ...
             ]
 
@@ -136,10 +246,13 @@
         filename = info_file['name']
         if filetype == 'file':
             if check_filename_is_archive(filename):
+                uri = url + filename
                 artifacts.append({
-                    'archive': url + filename,
+                    'url': uri,
+                    'filename': filename,
                     'time': int(info_file['time']),
                     'length': int(info_file['size']),
+                    'version': get_version(filename),
                 })
         # It will recursively check for artifacts in all sub-folders
         elif filetype == 'directory':