diff --git a/swh/loader/npm/client.py b/swh/loader/npm/client.py
index a4f51ca..e5a5522 100644
--- a/swh/loader/npm/client.py
+++ b/swh/loader/npm/client.py
@@ -1,219 +1,214 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import json
 import logging
 import os
 
-import chardet
 import requests
 
 from swh.core import tarball
 from swh.model import hashutil
 
-from swh.loader.npm.utils import extract_npm_package_author
+from swh.loader.npm.utils import extract_npm_package_author, load_json
 
 
 class NpmClient:
     """
     Helper class internally used by the npm loader to fetch
     metadata for a specific package hosted on the npm registry.
 
     Args:
         temp_dir (str): Path to the temporary disk location used
             to uncompress the package tarballs
     """
     def __init__(self, temp_dir, log=None):
         self.root_temp_dir = temp_dir
         self.session = requests.session()
         self.params = {
             'headers': {
                 'User-Agent': 'Software Heritage npm loader'
             }
         }
         self.log = log or logging
 
     def fetch_package_metadata(self, package_metadata_url):
         """
         Fetch metadata for a given package and make it the focused one.
         This must be called prior any other operations performed
         by the other methods below.
 
         Args:
             package_metadata_url: the package metadata url provided
                 by the npm loader
         """
         self.package_metadata_url = package_metadata_url
         self.package_metadata = self._request(self.package_metadata_url).json()
         self.package = self.package_metadata['name']
         self.temp_dir = os.path.join(self.root_temp_dir, self.package)
 
     def latest_package_version(self):
         """
         Return the last released version of the focused package.
 
         Returns:
             str: the last releases package version
         """
         latest = ''
         if 'latest' in self.package_metadata['dist-tags']:
             latest = self.package_metadata['dist-tags']['latest']
         return latest
 
     def package_versions(self, known_versions=None):
         """
         Return the available versions for the focused package.
 
         Args:
             known_versions (dict): may be provided by the loader, it enables
                 to filter out versions already ingested in the archive.
 
         Returns:
             dict: A dict whose keys are Tuple[version, tarball_sha1] and
             values dicts with the following entries:
 
                     * **name**: the package name
                     * **version**: the package version
                     * **filename**: the package source tarball filename
                     * **sha1**: the package source tarball sha1 checksum
                     * **date**: the package release date
                     * **url**: the package source tarball download url
         """
         versions = {}
         if 'versions' in self.package_metadata:
             for version, data in self.package_metadata['versions'].items():
                 sha1 = data['dist']['shasum']
                 key = (version, sha1)
                 if known_versions and key in known_versions:
                     continue
                 tarball_url = data['dist']['tarball']
                 filename = os.path.basename(tarball_url)
                 date = self.package_metadata['time'][version]
                 versions[key] = {
                     'name': self.package,
                     'version': version,
                     'filename': filename,
                     'sha1': sha1,
                     'date': date,
                     'url': tarball_url
                 }
         return versions
 
     def prepare_package_versions(self, known_versions=None):
         """
         Instantiate a generator that will process a specific package released
         version at each iteration step. The following operations will be
         performed:
 
             1. Create a temporary directory to download and extract the
                release tarball
             2. Download the tarball
             3. Check downloaded tarball integrity
             4. Uncompress the tarball
             5. Parse ``package.json`` file associated to the package version
             6. Extract author from the parsed ``package.json`` file
 
         Args:
             known_versions (dict): may be provided by the loader, it enables
                 to filter out versions already ingested in the archive.
 
         Yields:
             Tuple[dict, dict, dict, str]: tuples containing the following
             members:
 
                 * a dict holding the parsed ``package.json`` file
                 * a dict holding package author information
                 * a dict holding package tarball information
                 * a string holding the path of the uncompressed package to
                   load into the archive
         """
         new_versions = self.package_versions(known_versions)
         for version, package_source_data in sorted(new_versions.items()):
             # filter out version with missing tarball (cases exist),
             # package visit will be marked as partial at the end of
             # the loading process
             tarball_url = package_source_data['url']
             tarball_request = self._request(tarball_url,
                                             throw_error=False)
             if tarball_request.status_code == 404:
                 self.log.debug('Tarball url %s returns a 404 error.',
                                tarball_url)
                 self.log.debug(('Version %s of %s package will be missing and '
                                 'the visit will be marked as partial.'),
                                version[0], self.package)
                 continue
             version_data = self.package_metadata['versions'][version[0]]
             yield self._prepare_package_version(package_source_data,
                                                 version_data)
 
     def _prepare_package_version(self, package_source_data, version_data):
         version = version_data['version']
         self.log.debug('Processing version %s for npm package %s',
                        version, self.package)
 
         # create temp dir to download and extract package tarball
         path = os.path.join(self.temp_dir, version)
         os.makedirs(path, exist_ok=True)
         filepath = os.path.join(path, package_source_data['filename'])
 
         # download tarball
         url = package_source_data['url']
         response = self._request(url)
         hash_names = hashutil.DEFAULT_ALGORITHMS - {'sha1_git'}
         h = hashutil.MultiHash(hash_names=hash_names)
         with open(filepath, 'wb') as f:
             for chunk in response.iter_content(chunk_size=None):
                 h.update(chunk)
                 f.write(chunk)
 
         # check tarball integrity
         hashes = h.hexdigest()
         expected_digest = package_source_data['sha1']
         actual_digest = hashes['sha1']
         if actual_digest != expected_digest:
             raise ValueError(
                 '%s %s: Checksum mismatched: %s != %s' % (
                     self.package, version, expected_digest, actual_digest))
 
         # uncompress tarball
         tarball.uncompress(filepath, path)
 
         # remove tarball
         os.remove(filepath)
 
         # do not archive useless tarball root directory
         package_path = os.path.join(path, 'package')
-        # some old packages use their name as root directory
+        # some old packages use a root directory with a different name
         if not os.path.exists(package_path):
-            ver_pos = package_source_data['filename'].rfind(version)
-            package_name = package_source_data['filename'][:ver_pos-1]
-            package_path = os.path.join(path, package_name)
-        # fallback: archive root tarball directory
-        if not os.path.exists(package_path):
-            package_path = path
+            for _, dirnames, _ in os.walk(path):
+                if dirnames:
+                    package_path = os.path.join(path, dirnames[0])
+                break
 
         self.log.debug('Package local path: %s', package_path)
 
         package_source_data.update(hashes)
 
         # parse package.json file to add its content to revision metadata
         package_json_path = os.path.join(package_path, 'package.json')
         package_json = {}
         with open(package_json_path, 'rb') as package_json_file:
             package_json_bytes = package_json_file.read()
-            file_encoding = chardet.detect(package_json_bytes)['encoding']
-            package_json = json.loads(package_json_bytes.decode(file_encoding))
+            package_json = load_json(package_json_bytes)
 
         # extract author from package.json
         author = extract_npm_package_author(package_json)
 
         return (package_json, author, package_source_data, package_path)
 
     def _request(self, url, throw_error=True):
         response = self.session.get(url, **self.params, stream=True)
         if response.status_code != 200 and throw_error:
             raise ValueError("Fail to query '%s'. Reason: %s" % (
                 url, response.status_code))
         return response
diff --git a/swh/loader/npm/tests/common.py b/swh/loader/npm/tests/common.py
index 30cfb06..1a71b53 100644
--- a/swh/loader/npm/tests/common.py
+++ b/swh/loader/npm/tests/common.py
@@ -1,82 +1,80 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import json
 import os
 import os.path
 
-import chardet
+from swh.loader.npm.utils import load_json
 
 RESOURCES_PATH = os.path.join(os.path.dirname(__file__), 'resources')
 
 empty_package = '22u-validators'
 package = 'org'
 package_non_utf8_encoding = '0b'
 
 
 def package_url(package):
     return 'https://www.npmjs.com/package/%s' % package
 
 
 def package_metadata_url(package):
     return 'https://replicate.npmjs.com/%s/' % package
 
 
 def package_metadata_file(package, visit=''):
     json_filename = '%s_metadata' % package
     if visit:
         json_filename += '_visit%s' % visit
     json_filename += '.json'
     return json_filename
 
 
 class _MockedFileStream():
     def __init__(self, file_data):
         self.file_data = file_data
         self.closed = False
 
     def read(self):
         self.closed = True
         return self.file_data
 
 
 def init_test_data(m, package_metadata_json_file, package_metadata_url):
 
     package_metadata_filepath = os.path.join(RESOURCES_PATH,
                                              package_metadata_json_file)
 
     with open(package_metadata_filepath, 'rb') as json_file:
         json_file_bytes = json_file.read()
-        file_encoding = chardet.detect(json_file_bytes)['encoding']
-        package_metadata = json.loads(json_file_bytes.decode(file_encoding))
+        package_metadata = load_json(json_file_bytes)
 
     m.register_uri('GET', package_metadata_url, json=package_metadata)
 
     for v, v_data in package_metadata['versions'].items():
         tarball_url = v_data['dist']['tarball']
         tarball_filename = tarball_url.split('/')[-1]
         tarball_filepath = os.path.join(RESOURCES_PATH, 'tarballs',
                                         tarball_filename)
         with open(tarball_filepath, mode='rb') as tarball_file:
             tarball_content = tarball_file.read()
             m.register_uri('GET', tarball_url,
                            body=_MockedFileStream(tarball_content))
 
     return package_metadata
 
 
 def get_package_versions_data(package_metadata):
     versions_data = {}
     for v, v_data in package_metadata['versions'].items():
         shasum = v_data['dist']['shasum']
         versions_data[(v, shasum)] = {
             'name': package,
             'version': v,
             'sha1': shasum,
             'url': v_data['dist']['tarball'],
             'filename': v_data['dist']['tarball'].split('/')[-1],
             'date': package_metadata['time'][v]
         }
     return versions_data
diff --git a/swh/loader/npm/utils.py b/swh/loader/npm/utils.py
index 04d474c..263bb2a 100644
--- a/swh/loader/npm/utils.py
+++ b/swh/loader/npm/utils.py
@@ -1,122 +1,164 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+import json
 import re
 
+from codecs import BOM_UTF8
+
+import chardet
+
 _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
 
 # https://github.com/jonschlinkert/author-regex
 _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
 
 
 def parse_npm_package_author(author_str):
     """
     Parse npm package author string.
 
     It works with a flexible range of formats, as detailed below::
 
         name
         name <email> (url)
         name <email>(url)
         name<email> (url)
         name<email>(url)
         name (url) <email>
         name (url)<email>
         name(url) <email>
         name(url)<email>
         name (url)
         name(url)
         name <email>
         name<email>
         <email> (url)
         <email>(url)
         (url) <email>
         (url)<email>
         <email>
         (url)
 
     Args:
         author_str (str): input author string
 
     Returns:
         dict: A dict that may contain the following keys:
             * name
             * email
             * url
 
     """
     author = {}
     matches = re.findall(_author_regexp,
                          author_str.replace('<>', '').replace('()', ''),
                          re.M)
     for match in matches:
         if match[0].strip():
             author['name'] = match[0].strip()
         if match[1].strip():
             author['email'] = match[1].strip()
         if match[2].strip():
             author['url'] = match[2].strip()
     return author
 
 
 def extract_npm_package_author(package_json):
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json (dict): Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         dict: A dict with the following keys:
             * fullname
             * name
             * email
 
     """
 
     def _author_str(author_data):
         if type(author_data) is dict:
             author_str = ''
             if 'name' in author_data:
                 author_str += author_data['name']
             if 'email' in author_data:
                 author_str += ' <%s>' % author_data['email']
             return author_str
         elif type(author_data) is list:
             return _author_str(author_data[0]) if len(author_data) > 0 else ''
         else:
             return author_data
 
     author_data = {}
     for author_key in ('author', 'authors'):
         if author_key in package_json:
             author_str = _author_str(package_json[author_key])
             author_data = parse_npm_package_author(author_str)
 
     name = author_data.get('name')
     email = author_data.get('email')
 
     fullname = None
 
     if name and email:
         fullname = '%s <%s>' % (name, email)
     elif name:
         fullname = name
 
     if not fullname:
         return _EMPTY_AUTHOR
 
     if fullname:
         fullname = fullname.encode('utf-8')
 
     if name:
         name = name.encode('utf-8')
 
     if email:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
+
+
+def _lstrip_bom(s, bom=BOM_UTF8):
+    if s.startswith(bom):
+        return s[len(bom):]
+    else:
+        return s
+
+
+def load_json(json_bytes):
+    """
+    Try to load JSON from bytes and return a dictionary.
+
+    First try to decode from utf-8. If the decoding failed,
+    try to detect the encoding and decode again with replace
+    error handling.
+
+    If JSON is malformed, an empty dictionary will be returned.
+
+    Args:
+        json_bytes (bytes): binary content of a JSON file
+
+    Returns:
+        dict: JSON data loaded in a dictionary
+    """
+    json_data = {}
+    try:
+        json_str = _lstrip_bom(json_bytes).decode('utf-8')
+    except UnicodeDecodeError:
+        encoding = chardet.detect(json_bytes)['encoding']
+        if encoding:
+            json_str = json_bytes.decode(encoding, 'replace')
+    try:
+        json_data = json.loads(json_str)
+    except json.decoder.JSONDecodeError:
+        pass
+    return json_data