diff --git a/swh/loader/tar/tests/test_utils.py b/swh/loader/tar/tests/test_utils.py
index 1aa16d5..80d4da7 100644
--- a/swh/loader/tar/tests/test_utils.py
+++ b/swh/loader/tar/tests/test_utils.py
@@ -1,160 +1,56 @@
 # Copyright (C) 2015  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import unittest
 
 from nose.tools import istest
 
 from swh.loader.tar import utils
 
 
 class TestUtils(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-
-        super().setUpClass()
-
-        cls.files = {
-            'free-ipmi-1.2.2.tar': ('free-ipmi-', '1.2.2', '.tar'),
-            'free-ipmi-1.2.2.tar.gz': ('free-ipmi-', '1.2.2', '.tar.gz'),
-            'free-ipmi-1.2.2.tar.tgz': ('free-ipmi-', '1.2.2', '.tar.tgz'),
-            'gcc-testsuite-4.4.2-4.4.3.diff.bz2': (
-                'gcc-testsuite-', '4.4.2-4.4.3', '.diff.bz2'),
-            'gcc-java-4.0.4.tar.gz': ('gcc-java-', '4.0.4', '.tar.gz'),
-            'gmp-2.0.tar.lzma': ('gmp-', '2.0', '.tar.lzma'),
-            'win-gerwin-0.6.zip': ('win-gerwin-', '0.6', '.zip'),
-            'ballandpaddle-0.8.0.tar.xz': (
-                'ballandpaddle-', '0.8.0', '.tar.xz'),
-            'mail-1.1.1.some.lz': ('mail-', '1.1.1.some', '.lz'),
-            'gmp-4.1.1-4.1.2.diff.tar.Z': (
-                'gmp-', '4.1.1-4.1.2', '.diff.tar.Z'),
-            'findutils-4.2.18.tar.bzip2': (
-                'findutils-', '4.2.18', '.tar.bzip2'),
-            'gnunet-java-0.9.4.jar': ('gnunet-java-', '0.9.4', '.jar'),
-            'pycdio-0.15-py2.5-linux-i686.egg': (
-                'pycdio-', '0.15-py2.5-linux-i686', '.egg'),
-            'rbcdio-0.04.gem': ('rbcdio-', '0.04', '.gem'),
-            'librejs-6.0.5.xpi': ('librejs-', '6.0.5', '.xpi'),
-            'icecat-31.8.0.csb.langpack.xpi': (
-                'icecat-', '31.8.0.csb.langpack', '.xpi'),
-            'icecatmobile-31.8.0.en-US.android-arm.apk': (
-                'icecatmobile-', '31.8.0.en-US.android-arm', '.apk'),
-            'icecat-31.8.0.en-US.mac.dmg': (
-                'icecat-', '31.8.0.en-US.mac', '.dmg'),
-            'gnutls-3.0.21-1gn.DevPak': ('gnutls-', '3.0.21-1gn', '.DevPak'),
-
-            # . separator
-            'greg-1.4.tar.gz': ('greg-', '1.4', '.tar.gz'),
-
-            # number in software product
-            'aspell6-pt_BR-20070411-0.tar.bz2': (
-                'aspell6-pt_BR-', '20070411-0', '.tar.bz2'),
-            'libosip2-3.3.0.tar.gz': ('libosip2-', '3.3.0', '.tar.gz'),
-
-            # other cases
-            'hurd-F2-main.iso': ('hurd-F2-main', None, '.iso'),
-
-            'winboard-4_0_5.exe': ('winboard-', '4_0_5', '.exe'),
-
-            # particular patterns...
-            'gift-0.1.9+3epsilon.tar.gz': (
-                'gift-', '0.1.9+3epsilon', '.tar.gz'),
-            'gift-0.1.6pre2.tgz': ('gift-', '0.1.6pre2', '.tgz'),
-            'binutils-2.19.1a.tar.bz2': ('binutils-', '2.19.1a', '.tar.bz2'),
-            'readline-4.2-4.2a.diff.gz': ('readline-', '4.2-4.2a', '.diff.gz'),
-
-            # with arch patterns
-            'cvs-1.12.6-BSD.bin.gz': ('cvs-', '1.12.6-BSD.bin', '.gz'),
-            'cvs-1.12.12-SunOS-5.8-i386.gz': (
-                'cvs-', '1.12.12-SunOS-5.8-i386', '.gz'),
-            'gnutls-3.0.20-w32.zip': ('gnutls-', '3.0.20-w32', '.zip'),
-            'mit-scheme_7.7.90+20080130-0gutsy1.diff.gz': (
-                'mit-scheme_', '7.7.90+20080130-0gutsy1', '.diff.gz'),
-
-            # no release number
-            'gnu.ps.gz': ('gnu', None, '.ps.gz'),
-            'direvent-latest.tar.gz': ('direvent-latest', None, '.tar.gz'),
-        }
-
-        cls.files_error = ['.tar', '.anything']
-
-    @istest
-    def parse_filename(self):
-        for f in self.files:
-            # when
-            actual_components = utils.parse_filename(f)
-
-            # then
-            name, version, ext = self.files[f]
-            expected_components = {
-                'software_name': name,
-                'release_number': version,
-                'extension': ext,
-            }
-
-            self.assertEquals(actual_components, expected_components)
-
-    @istest
-    def parse_filename_not_parseable_file(self):
-        for f in self.files_error:
-            with self.assertRaises(ValueError):
-                utils.parse_filename(f)
-
-    @istest
-    def release_number(self):
-        for f in self.files.keys():
-            # when
-            actual_ext = utils.release_number(f)
-
-            # then
-            _, expected_rel_num, _ = self.files[f]
-            self.assertEquals(
-                actual_ext,
-                expected_rel_num,
-                'for %s, the version should be %s' % (f, expected_rel_num))
-
     @istest
     def commonname(self):
         # when
         actual_commonname = utils.commonname('/some/where/to/',
                                              '/some/where/to/go/to')
         # then
         self.assertEquals('go/to', actual_commonname)
 
         # when
         actual_commonname2 = utils.commonname(b'/some/where/to/',
                                               b'/some/where/to/go/to')
         # then
         self.assertEquals(b'go/to', actual_commonname2)
 
     @istest
     def convert_to_hex(self):
         # given
         input_dict = {
             'sha1_git': b'\xf6\xb7 \x8b+\xcd \x9fq5E\xe6\x03\xffg\x87\xd7\xb9D\xa1',  # noqa
             'sha1': b'\xf4O\xf0\xd4\xc0\xb0\xae\xca\xe4C\xab%\x10\xf7\x12h\x1e\x9f\xac\xeb',  # noqa
             'sha256': b'\xa8\xf9=\xf3\xfek\xa2$\xee\xc7\x1b\xc2\x83\xca\x96\xae8\xaf&\xab\x08\xfa\xb1\x13\xec(.s]\xf6Yb'}  # noqa
 
         expected_dict = {'sha1_git': 'f6b7208b2bcd209f713545e603ff6'
                                      '787d7b944a1',
                          'sha1': 'f44ff0d4c0b0aecae443ab2510f712681e'
                                  '9faceb',
                          'sha256': 'a8f93df3fe6ba224eec71bc283ca96ae3'
                                    '8af26ab08fab113ec282e735df65962'}
 
         # when
         actual_dict = utils.convert_to_hex(input_dict)
 
         # then
         self.assertDictEqual(actual_dict, expected_dict)
 
     @istest
     def convert_to_hex_edge_cases(self):
         # when
         actual_dict = utils.convert_to_hex({})
         # then
         self.assertDictEqual(actual_dict, {})
 
         self.assertIsNone(utils.convert_to_hex(None))
diff --git a/swh/loader/tar/utils.py b/swh/loader/tar/utils.py
index 7e90536..67706d6 100644
--- a/swh/loader/tar/utils.py
+++ b/swh/loader/tar/utils.py
@@ -1,170 +1,78 @@
-# Copyright (C) 2015  The Software Heritage developers
+# Copyright (C) 2015-2017  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import itertools
 import random
-import re
 
 from swh.core import hashutil
 
 
-# FIXME; extract this in property
-# to recognize existing naming pattern
-extensions = [
-    'ps',
-    'zip',
-    'tar',
-    'gz', 'tgz',
-    'bz2', 'bzip2',
-    'lzma', 'lz',
-    'xz',
-    'Z',
-    'diff',
-    'iso',
-    'exe',
-    'jar',
-    'egg',
-    'gem',
-    'xpi',
-    'apk',
-    'dmg',
-    'DevPak',
-]
-
-
-pattern = re.compile(r'''
-^
-(?:
-    # We have a software name and a release number, separated with a
-    # -, _ or dot.
-    (?P<software_name1>.+?[-_.])
-    (?P<release_number>[0-9][0-9a-zA-Z_.+:~-]*?)
-|
-    # We couldn't match a release number, put everything in the
-    # software name.
-    (?P<software_name2>.+?)
-)
-(?P<extension>(?:\.(?:%s))+)
-$
-''' % '|'.join(extensions),
-     flags=re.VERBOSE)
-
-
-def parse_filename(filename):
-    """Parse a filename into its components.
-
-    Parsing policy:
-    We use Debian's release number heuristic: A release number starts
-    with a digit, and is followed by alphanumeric characters or any of
-    ., +, :, ~ and -
-
-    We hardcode a list of possible extensions, as this release number
-    scheme would match them too... We match on any combination of those.
-
-    Greedy matching is done right to left (we only match the extension
-    greedily with +, software_name and release_number are matched lazily
-    with +? and *?).
-
-    Args:
-        filename: filename without path.
-
-    Returns:
-        Dictionary with the following keys:
-        - software_name
-        - release_number: can be None if it could not be found.
-        - extension
-
-    Raises:
-        ValueError if the filename could not be parsed.
-
-"""
-    m = pattern.match(filename)
-    if not m:
-        raise ValueError('Filename %s could not be parsed.' % filename)
-
-    d = m.groupdict()
-    return {
-        'software_name': d['software_name1'] or d['software_name2'],
-        'release_number': d['release_number'],
-        'extension': d['extension'],
-    }
-
-
-def release_number(filename):
-    """Compute the release number from the filename.
-
-    cf. parse_filename's docstring
-
-    """
-    return parse_filename(filename)['release_number']
-
-
 def commonname(path0, path1, as_str=False):
     """Compute the commonname between the path0 and path1.
 
     """
     return path1.split(path0)[1]
 
 
 def convert_to_hex(d):
     """Convert a flat dictionary with bytes in values to the same dictionary
     with hex as values.
 
     Args:
         dict: flat dictionary with sha bytes in their values.
 
     Returns:
         Mirror dictionary with values as string hex.
 
     """
     if not d:
         return d
 
     checksums = {}
     for key, h in d.items():
         checksums[key] = hashutil.hash_to_hex(h)
 
     return checksums
 
 
 def grouper(iterable, n, fillvalue=None):
     """Collect data into fixed-length chunks or blocks.
 
     Args:
         iterable: an iterable
         n: size of block
         fillvalue: value to use for the last block
 
     Returns:
         fixed-length chunks of blocks as iterables
 
     """
     args = [iter(iterable)] * n
     return itertools.zip_longest(*args, fillvalue=fillvalue)
 
 
 def random_blocks(iterable, block=100, fillvalue=None):
     """Given an iterable:
     - slice the iterable in data set of block-sized elements
     - randomized the data set
     - yield each element
 
     Args:
         iterable: iterable of data
         block: number of elements per block
         fillvalue: a fillvalue for the last block if not enough values in
         last block
 
     Returns:
         An iterable of randomized per block-size elements.
 
     """
     count = 0
     for iterable in grouper(iterable, block, fillvalue=fillvalue):
         count += 1
         l = list(iterable)
         random.shuffle(l)
         for e in l:
             yield e