diff --git a/swh/loader/package/npm.py b/swh/loader/package/npm.py
index a0f04d9..2bb4852 100644
--- a/swh/loader/package/npm.py
+++ b/swh/loader/package/npm.py
@@ -1,361 +1,279 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 import os
 import re
 
 from codecs import BOM_UTF8
-from typing import Generator, Dict, Tuple, Sequence, List
+from typing import Generator, Dict, Tuple, Sequence
 
 import chardet
 import iso8601
-import requests
-import tempfile
 
 from swh.model.identifiers import normalize_timestamp
 from swh.loader.package.loader import PackageLoader
-from swh.loader.package.utils import download
+from swh.loader.package.utils import download, api_info
 
 
 logger = logging.getLogger(__name__)
 
 
-class NpmClient:
-    """
-    Helper class internally used by the npm loader to fetch
-    metadata for a specific package hosted on the npm registry.
-
-    Args:
-        temp_dir (str): Path to the temporary disk location used
-            to uncompress the package tarballs
-
-    """
-    def __init__(self, log=None):
-        self.root_temp_dir = tempfile.mkdtemp()
-        self.session = requests.session()
-        self.params = {
-            'headers': {
-                'User-Agent': 'Software Heritage npm loader'
-            }
-        }
-        self.log = log or logging
-
-    def fetch_package_metadata(self, package_metadata_url) -> None:
-        """
-        Fetch metadata for a given package and make it the focused one.
-        This must be called prior any other operations performed
-        by the other methods below.
-
-        Args:
-            package_metadata_url: the package metadata url provided
-                by the npm loader
-        """
-        self.package_metadata_url = package_metadata_url
-        self.package_metadata = self.session.get(
-            self.package_metadata_url).json()
-        self.package = self.package_metadata['name']
-        self.temp_dir = os.path.join(self.root_temp_dir, self.package)
-        return self.package_metadata
-
-    def package_versions(self, known_versions=None) -> List[Dict]:
-        """
-        Return the available versions for the focused package.
-
-        Args:
-            known_versions (dict): may be provided by the loader, it enables
-                to filter out versions already ingested in the archive.
-
-        Returns:
-            dict: A dict whose keys are Tuple[version, tarball_sha1] and
-            values dicts with the following entries:
-
-                    * **name**: the package name
-                    * **version**: the package version
-                    * **filename**: the package source tarball filename
-                    * **sha1**: the package source tarball sha1 checksum
-                    * **date**: the package release date
-                    * **url**: the package source tarball download url
-        """
-        versions = {}
-        if 'versions' in self.package_metadata:
-            for version, data in self.package_metadata['versions'].items():
-                sha1 = data['dist']['shasum']
-                key = (version, sha1)
-                if known_versions and key in known_versions:
-                    continue
-                tarball_url = data['dist']['tarball']
-                filename = os.path.basename(tarball_url)
-                date = self.package_metadata['time'][version]
-                versions[key] = {
-                    'name': self.package,
-                    'version': version,
-                    'filename': filename,
-                    'sha1': sha1,
-                    'date': date,
-                    'url': tarball_url
-                }
-        return versions
-
-
 _EMPTY_AUTHOR = {'fullname': b'', 'name': None, 'email': None}
 
 # https://github.com/jonschlinkert/author-regex
 _author_regexp = r'([^<(]+?)?[ \t]*(?:<([^>(]+?)>)?[ \t]*(?:\(([^)]+?)\)|$)'
 
 
 def parse_npm_package_author(author_str):
     """
     Parse npm package author string.
 
     It works with a flexible range of formats, as detailed below::
 
         name
         name <email> (url)
         name <email>(url)
         name<email> (url)
         name<email>(url)
         name (url) <email>
         name (url)<email>
         name(url) <email>
         name(url)<email>
         name (url)
         name(url)
         name <email>
         name<email>
         <email> (url)
         <email>(url)
         (url) <email>
         (url)<email>
         <email>
         (url)
 
     Args:
         author_str (str): input author string
 
     Returns:
         dict: A dict that may contain the following keys:
             * name
             * email
             * url
 
     """
     author = {}
     matches = re.findall(_author_regexp,
                          author_str.replace('<>', '').replace('()', ''),
                          re.M)
     for match in matches:
         if match[0].strip():
             author['name'] = match[0].strip()
         if match[1].strip():
             author['email'] = match[1].strip()
         if match[2].strip():
             author['url'] = match[2].strip()
     return author
 
 
 def extract_npm_package_author(package_json):
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json (dict): Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         dict: A dict with the following keys:
             * fullname
             * name
             * email
 
     """
 
     def _author_str(author_data):
         if type(author_data) is dict:
             author_str = ''
             if 'name' in author_data:
                 author_str += author_data['name']
             if 'email' in author_data:
                 author_str += ' <%s>' % author_data['email']
             return author_str
         elif type(author_data) is list:
             return _author_str(author_data[0]) if len(author_data) > 0 else ''
         else:
             return author_data
 
     author_data = {}
     for author_key in ('author', 'authors'):
         if author_key in package_json:
             author_str = _author_str(package_json[author_key])
             author_data = parse_npm_package_author(author_str)
 
     name = author_data.get('name')
     email = author_data.get('email')
 
     fullname = None
 
     if name and email:
         fullname = '%s <%s>' % (name, email)
     elif name:
         fullname = name
 
     if not fullname:
         return _EMPTY_AUTHOR
 
     if fullname:
         fullname = fullname.encode('utf-8')
 
     if name:
         name = name.encode('utf-8')
 
     if email:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
 
 
 def _lstrip_bom(s, bom=BOM_UTF8):
     if s.startswith(bom):
         return s[len(bom):]
     else:
         return s
 
 
 def load_json(json_bytes):
     """
     Try to load JSON from bytes and return a dictionary.
 
     First try to decode from utf-8. If the decoding failed,
     try to detect the encoding and decode again with replace
     error handling.
 
     If JSON is malformed, an empty dictionary will be returned.
 
     Args:
         json_bytes (bytes): binary content of a JSON file
 
     Returns:
         dict: JSON data loaded in a dictionary
     """
     json_data = {}
     try:
         json_str = _lstrip_bom(json_bytes).decode('utf-8')
     except UnicodeDecodeError:
         encoding = chardet.detect(json_bytes)['encoding']
         if encoding:
             json_str = json_bytes.decode(encoding, 'replace')
     try:
         json_data = json.loads(json_str)
     except json.decoder.JSONDecodeError:
         pass
     return json_data
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from npm.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) == 0:
         return {}
     project_dirname = lst[0]
     package_json_path = os.path.join(dir_path, project_dirname, 'package.json')
     if not os.path.exists(package_json_path):
         return {}
     with open(package_json_path, 'rb') as package_json_file:
         package_json_bytes = package_json_file.read()
         return load_json(package_json_bytes)
 
 
 class NpmLoader(PackageLoader):
     visit_type = 'npm'
 
     def __init__(self, package_name, package_url, package_metadata_url):
         super().__init__(url=package_url)
         self.package_metadata_url = package_metadata_url
 
         self._info = None
         self._versions = None
-        self.client = NpmClient()
 
         # if package_url is None:
         #     package_url = 'https://www.npmjs.com/package/%s' % package_name
         # if package_metadata_url is None:
         #     package_metadata_url = 'https://replicate.npmjs.com/%s/' %\
         #                             quote(package_name, safe='')
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from npm registry)
 
         """
         if not self._info:
-            # This initializes the metadata retrieval on npm api
-            self._info = self.client.fetch_package_metadata(
-                self.package_metadata_url)
+            self._info = api_info(self.package_metadata_url)
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return sorted(self.info['versions'].keys())
 
     def get_default_release(self) -> str:
         return self.info['dist-tags'].get('latest', '')
 
     def get_artifacts(self, version: str) -> Generator[
             Tuple[str, str, Dict], None, None]:
         meta = self.info['versions'][version]
         url = meta['dist']['tarball']
         filename = os.path.basename(url)
         yield filename, url, meta
 
     def fetch_artifact_archive(
             self, artifact_uri: str, dest: str) -> Tuple[str, Dict]:
         return download(artifact_uri, dest=dest)
 
     def build_revision(
             self, a_metadata: Dict, a_uncompressed_path: str) -> Dict:
         # Parse metadata (project, artifact metadata)
         i_metadata = extract_intrinsic_metadata(a_uncompressed_path)
 
         # from intrinsic metadata
         author = extract_npm_package_author(i_metadata)
         # extrinsic metadata
         version = i_metadata['version']
         date = self.info['time'][version]
         date = iso8601.parse_date(date)
         date = normalize_timestamp(int(date.timestamp()))
         message = version.encode('ascii')
 
         return {
             'author': author,
             'date': date,
             'committer': author,
             'committer_date': date,
             'message': message,
             'metadata': {
                 'intrinsic_metadata': i_metadata,
             },
             'parents': [],
         }
diff --git a/swh/loader/package/pypi.py b/swh/loader/package/pypi.py
index fde6e5c..eca0a3a 100644
--- a/swh/loader/package/pypi.py
+++ b/swh/loader/package/pypi.py
@@ -1,191 +1,165 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 from typing import Generator, Dict, Tuple, Sequence
 from urllib.parse import urlparse
 from pkginfo import UnpackedSDist
 
 import iso8601
-import requests
 
 from swh.model.identifiers import normalize_timestamp
-from swh.loader.package import DEFAULT_PARAMS
 from swh.loader.package.loader import PackageLoader
-from swh.loader.package.utils import download
+from swh.loader.package.utils import download, api_info
 
 
 def pypi_api_url(url: str) -> str:
     """Compute api url from a project url
 
     Args:
         url (str): PyPI instance's url (e.g: https://pypi.org/project/requests)
         This deals with correctly transforming the project's api url (e.g
         https://pypi.org/pypi/requests/json)
 
     Returns:
         api url
 
     """
     p_url = urlparse(url)
     project_name = p_url.path.split('/')[-1]
     url = '%s://%s/pypi/%s/json' % (p_url.scheme, p_url.netloc, project_name)
     return url
 
 
-def pypi_info(url: str) -> Dict:
-    """PyPI api client to retrieve information on project. This deals with
-       fetching json metadata about pypi projects.
-
-    Args:
-        url (str): PyPI instance's url (e.g: https://pypi.org/project/requests)
-        This deals with correctly transforming the project's api url (e.g
-        https://pypi.org/pypi/requests/json)
-
-    Raises:
-        ValueError in case of query failures (for some reasons: 404, ...)
-
-    Returns:
-        PyPI's information dict
-
-    """
-    api_url = pypi_api_url(url)
-    response = requests.get(api_url, **DEFAULT_PARAMS)
-    if response.status_code != 200:
-        raise ValueError("Fail to query '%s'. Reason: %s" % (
-            api_url, response.status_code))
-    return response.json()
-
-
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from pypi.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) != 1:
         return {}
     project_dirname = lst[0]
     pkginfo_path = os.path.join(dir_path, project_dirname, 'PKG-INFO')
     if not os.path.exists(pkginfo_path):
         return {}
     pkginfo = UnpackedSDist(pkginfo_path)
     raw = pkginfo.__dict__
     raw.pop('filename')  # this gets added with the ondisk location
     return raw
 
 
 def author(data: Dict) -> Dict:
     """Given a dict of project/release artifact information (coming from
        PyPI), returns an author subset.
 
     Args:
         data (dict): Representing either artifact information or
                      release information.
 
     Returns:
         swh-model dict representing a person.
 
     """
     name = data.get('author')
     email = data.get('author_email')
 
     if email:
         fullname = '%s <%s>' % (name, email)
     else:
         fullname = name
 
     if not fullname:
         return {'fullname': b'', 'name': None, 'email': None}
 
     fullname = fullname.encode('utf-8')
 
     if name is not None:
         name = name.encode('utf-8')
 
     if email is not None:
         email = email.encode('utf-8')
 
     return {'fullname': fullname, 'name': name, 'email': email}
 
 
 class PyPILoader(PackageLoader):
     """Load pypi origin's artifact releases into swh archive.
 
     """
     visit_type = 'pypi'
 
     def __init__(self, url):
         super().__init__(url=url)
         self._info = None
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from pypi registry)
 
         """
         if not self._info:
-            self._info = pypi_info(self.url)
+            self._info = api_info(pypi_api_url(self.url))
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return self.info['releases'].keys()
 
     def get_default_release(self) -> str:
         return self.info['info']['version']
 
     def get_artifacts(self, version: str) -> Generator[
             Tuple[str, str, Dict], None, None]:
         for meta in self.info['releases'][version]:
             yield meta['filename'], meta['url'], meta
 
     def fetch_artifact_archive(
             self, artifact_uri: str, dest: str) -> Tuple[str, Dict]:
         return download(artifact_uri, dest=dest)
 
     def build_revision(
             self, a_metadata: Dict, a_uncompressed_path: str) -> Dict:
         # Parse metadata (project, artifact metadata)
         metadata = extract_intrinsic_metadata(a_uncompressed_path)
 
         # from intrinsic metadata
         name = metadata['version']
         _author = author(metadata)
 
         # from extrinsic metadata
         message = a_metadata.get('comment_text', '')
         message = '%s: %s' % (name, message) if message else name
         date = normalize_timestamp(
             int(iso8601.parse_date(a_metadata['upload_time']).timestamp()))
 
         return {
             'message': message.encode('utf-8'),
             'author': _author,
             'date': date,
             'committer': _author,
             'committer_date': date,
             'parents': [],
             'metadata': {
                 'intrinsic_metadata': metadata,
             }
         }
diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py
index 7feea6c..793863e 100644
--- a/swh/loader/package/tests/test_pypi.py
+++ b/swh/loader/package/tests/test_pypi.py
@@ -1,557 +1,530 @@
 # Copyright (C) 2019 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 from os import path
 
 import pytest
 
 from unittest.mock import patch
 
 from swh.core.tarball import uncompress
 from swh.model.hashutil import hash_to_bytes
 from swh.loader.package.pypi import (
-    PyPILoader, pypi_api_url, pypi_info, author, extract_intrinsic_metadata
+    PyPILoader, pypi_api_url, author, extract_intrinsic_metadata
 )
 from swh.loader.package.tests.common import (
     check_snapshot, DATADIR
 )
 
 from swh.loader.package.tests.conftest import local_get_factory
 
 
 def test_author_basic():
     data = {
         'author': "i-am-groot",
         'author_email': 'iam@groot.org',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'i-am-groot <iam@groot.org>',
         'name': b'i-am-groot',
         'email': b'iam@groot.org',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_empty_email():
     data = {
         'author': 'i-am-groot',
         'author_email': '',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'i-am-groot',
         'name': b'i-am-groot',
         'email': b'',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_empty_name():
     data = {
         'author': "",
         'author_email': 'iam@groot.org',
     }
     actual_author = author(data)
 
     expected_author = {
         'fullname': b' <iam@groot.org>',
         'name': b'',
         'email': b'iam@groot.org',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed():
     data = {
         'author': "['pierre', 'paul', 'jacques']",
         'author_email': None,
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b"['pierre', 'paul', 'jacques']",
         'name': b"['pierre', 'paul', 'jacques']",
         'email': None,
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed_2():
     data = {
         'author': '[marie, jeanne]',
         'author_email': '[marie@some, jeanne@thing]',
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
         'name': b'[marie, jeanne]',
         'email': b'[marie@some, jeanne@thing]',
     }
 
     assert actual_author == expected_author
 
 
 def test_author_malformed_3():
     data = {
         'author': '[marie, jeanne, pierre]',
         'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
     }
 
     actual_author = author(data)
 
     expected_author = {
         'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>',  # noqa
         'name': b'[marie, jeanne, pierre]',
         'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
     }
 
     actual_author == expected_author
 
 
 # configuration error #
 
 def test_badly_configured_loader_raise(monkeypatch):
     """Badly configured loader should raise"""
     monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False)
     with pytest.raises(ValueError) as e:
         PyPILoader(url='some-url')
 
     assert 'Misconfiguration' in e.value.args[0]
 
 
 def test_pypi_api_url():
     """Compute pypi api url from the pypi project url should be ok"""
     url = pypi_api_url('https://pypi.org/project/requests')
     assert url == 'https://pypi.org/pypi/requests/json'
 
 
-def test_pypi_info_failure(requests_mock):
-    """Failure to fetch info/release information should raise"""
-    project_url = 'https://pypi.org/project/requests'
-    info_url = 'https://pypi.org/pypi/requests/json'
-    status_code = 400
-    requests_mock.get(info_url, status_code=status_code)
-
-    with pytest.raises(ValueError) as e0:
-        pypi_info(project_url)
-
-    assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (
-        info_url, status_code
-    )
-
-
-def test_pypi_info(requests_mock):
-    """Fetching json info from pypi project should be ok"""
-    url = 'https://pypi.org/project/requests'
-    info_url = 'https://pypi.org/pypi/requests/json'
-    requests_mock.get(info_url,
-                      text='{"version": "0.0.1"}')
-    actual_info = pypi_info(url)
-    assert actual_info == {
-        'version': '0.0.1',
-    }
-
-
 @pytest.mark.fs
 def test_extract_intrinsic_metadata(tmp_path):
     """Parsing existing archive's PKG-INFO should yield results"""
     uncompressed_archive_path = str(tmp_path)
     archive_path = path.join(
         DATADIR, 'files.pythonhosted.org', '0805nexter-1.1.0.zip')
     uncompress(archive_path, dest=uncompressed_archive_path)
 
     actual_sdist = extract_intrinsic_metadata(uncompressed_archive_path)
     expected_sdist = {
         'metadata_version': '1.0',
         'name': '0805nexter',
         'version': '1.1.0',
         'summary': 'a simple printer of nested lest',
         'home_page': 'http://www.hp.com',
         'author': 'hgtkpython',
         'author_email': '2868989685@qq.com',
         'platforms': ['UNKNOWN'],
     }
 
     assert actual_sdist == expected_sdist
 
 
 @pytest.mark.fs
 def test_extract_intrinsic_metadata_failures(tmp_path):
     """Parsing inexistant path/archive/PKG-INFO yield None"""
     # inexistant first level path
     assert extract_intrinsic_metadata('/something-inexistant') == {}
     # inexistant second level path (as expected by pypi archives)
     assert extract_intrinsic_metadata(tmp_path) == {}
     # inexistant PKG-INFO within second level path
     existing_path_no_pkginfo = str(tmp_path / 'something')
     os.mkdir(existing_path_no_pkginfo)
     assert extract_intrinsic_metadata(tmp_path) == {}
 
 
 # LOADER SCENARIO #
 
 # "edge" cases (for the same origin) #
 
 
 # no release artifact:
 # {visit full, status: uneventful, no contents, etc...}
 local_get_missing_all = local_get_factory(ignore_urls=[
     'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip',  # noqa
     'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip',  # noqa
 ])
 
 
 def test_no_release_artifact(swh_config, local_get_missing_all):
     """Load a pypi project with all artifacts missing ends up with no snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
 
     assert actual_load_status == {'status': 'uneventful'}
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 0,
         'directory': 0,
         'origin': 1,
         'origin_visit': 1,
         'person': 0,
         'release': 0,
         'revision': 0,
         'skipped_content': 0,
         'snapshot': 1,
     } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 # problem during loading:
 # {visit: partial, status: uneventful, no snapshot}
 
 
 def test_release_with_traceback(swh_config):
     url = 'https://pypi.org/project/0805nexter'
     with patch('swh.loader.package.pypi.PyPILoader.get_default_release',
                side_effect=ValueError('Problem')):
         loader = PyPILoader(url)
 
         actual_load_status = loader.load()
 
         assert actual_load_status == {'status': 'uneventful'}
 
         stats = loader.storage.stat_counters()
 
         assert {
             'content': 0,
             'directory': 0,
             'origin': 1,
             'origin_visit': 1,
             'person': 0,
             'release': 0,
             'revision': 0,
             'skipped_content': 0,
             'snapshot': 0,
         } == stats
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 # problem during loading: failure early enough in between swh contents...
 # some contents (contents, directories, etc...) have been written in storage
 # {visit: partial, status: eventful, no snapshot}
 
 # problem during loading: failure late enough we can have snapshots (some
 # revisions are written in storage already)
 # {visit: partial, status: eventful, snapshot}
 
 # "normal" cases (for the same origin) #
 
 
 local_get_missing_one = local_get_factory(ignore_urls=[
     'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip',  # noqa
 ])
 
 # some missing release artifacts:
 # {visit partial, status: eventful, 1 snapshot}
 
 
 def test_release_with_missing_artifact(swh_config, local_get_missing_one):
     """Load a pypi project with some missing artifacts ends up with 1 snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
 
     assert actual_load_status == {'status': 'eventful'}
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 3,
         'directory': 2,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 1,
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_contents = map(hash_to_bytes, [
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
     }
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.2.0',
             'target_type': 'alias',
         },
     }
 
     check_snapshot(
         'dd0e4201a232b1c104433741dbf45895b8ac9355',
         expected_branches,
         storage=loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'partial'
 
 
 def test_release_artifact_no_prior_visit(swh_config, local_get):
     """With no prior visit, load a pypi project ends up with 1 snapshot
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     actual_load_status = loader.load()
 
     assert actual_load_status == {'status': 'eventful'}
 
     stats = loader.storage.stat_counters()
     assert {
         'content': 6,
         'directory': 4,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1
     } == stats
 
     expected_contents = map(hash_to_bytes, [
         'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
         '938c33483285fd8ad57f15497f538320df82aeb8',
         'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
         'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'),  # noqa
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
     }
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.1.0': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.2.0',
             'target_type': 'alias',
         },
     }
 
     check_snapshot(
         'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a',
         expected_branches,
         storage=loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
 
 # release artifact, new artifact
 # {visit full, status full, new snapshot with shared history as prior snapshot}
 def test_release_artifact_with_2_visits(swh_config, local_get_visits):
     """With prior visit, 2nd load will result with a different snapshot
 
     with some shared history
 
     """
     url = 'https://pypi.org/project/0805nexter'
     loader = PyPILoader(url)
 
     visit1_actual_load_status = loader.load()
     visit1_stats = loader.storage.stat_counters()
     assert visit1_actual_load_status == {'status': 'eventful'}
     origin_visit1 = next(loader.storage.origin_visit_get(url))
     assert origin_visit1['status'] == 'full'
 
     assert {
         'content': 6,
         'directory': 4,
         'origin': 1,
         'origin_visit': 1,
         'person': 1,
         'release': 0,
         'revision': 2,
         'skipped_content': 0,
         'snapshot': 1
     } == visit1_stats
 
     # Reset internal state
     loader._info = None
 
     visit2_actual_load_status = loader.load()
     visit2_stats = loader.storage.stat_counters()
 
     assert visit2_actual_load_status == {'status': 'eventful'}
     visits = list(loader.storage.origin_visit_get(url))
     assert len(visits) == 2
     assert visits[1]['status'] == 'full'
 
     assert {
         'content': 6 + 1,     # 1 more content
         'directory': 4 + 2,   # 2 more directories
         'origin': 1,
         'origin_visit': 1 + 1,
         'person': 1,
         'release': 0,
         'revision': 2 + 1,    # 1 more revision
         'skipped_content': 0,
         'snapshot': 1 + 1,    # 1 more snapshot
     } == visit2_stats
 
     expected_contents = map(hash_to_bytes, [
         'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
         '938c33483285fd8ad57f15497f538320df82aeb8',
         'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
         '405859113963cb7a797642b45f171d6360425d16',
         'e5686aa568fdb1d19d7f1329267082fe40482d31',
         '83ecf6ec1114fd260ca7a833a2d165e71258c338',
         '92689fa2b7fb4d4fc6fb195bf73a50c87c030639'
     ])
 
     assert list(loader.storage.content_missing_per_sha1(expected_contents))\
         == []
 
     expected_dirs = map(hash_to_bytes, [
         '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
         'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
         'b178b66bd22383d5f16f4f5c923d39ca798861b4',
         'c3a58f8b57433a4b56caaa5033ae2e0931405338',
         'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a',
         '52604d46843b898f5a43208045d09fcf8731631b',
 
     ])
 
     assert list(loader.storage.directory_missing(expected_dirs)) == []
 
     # {revision hash: directory hash}
     expected_revs = {
         hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'),  # noqa
         hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'),  # noqa
         hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'),  # noqa
     }
 
     assert list(loader.storage.revision_missing(expected_revs)) == []
 
     expected_branches = {
         'releases/1.1.0': {
             'target': '4c99891f93b81450385777235a37b5e966dd1571',
             'target_type': 'revision',
         },
         'releases/1.2.0': {
             'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
             'target_type': 'revision',
         },
         'releases/1.3.0': {
             'target': '51247143b01445c9348afa9edfae31bf7c5d86b1',
             'target_type': 'revision',
         },
         'HEAD': {
             'target': 'releases/1.3.0',
             'target_type': 'alias',
         },
     }
 
     check_snapshot(
         '2e5149a7b0725d18231a37b342e9b7c4e121f283',
         expected_branches,
         storage=loader.storage)
 
     origin_visit = next(loader.storage.origin_visit_get(url))
     assert origin_visit['status'] == 'full'
 
 # release artifact, no new artifact
 # {visit full, status uneventful, same snapshot as before}
 
 # release artifact, old artifact with different checksums
 # {visit full, status full, new snapshot with shared history and some new
 # different history}
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
index 05e0596..92a4220 100644
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -1,72 +1,96 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
 import os
 import pytest
 
-from swh.loader.package.utils import download
+from swh.loader.package.utils import download, api_info
 
 
 @pytest.mark.fs
 def test_download_fail_to_download(tmp_path, requests_mock):
     url = 'https://pypi.org/pypi/arrow/json'
     status_code = 404
     requests_mock.get(url, status_code=status_code)
 
     with pytest.raises(ValueError) as e:
         download(url, tmp_path)
 
     assert e.value.args[0] == "Fail to query '%s'. Reason: %s" % (
         url, status_code)
 
 
 @pytest.mark.fs
 def test_download_fail_length_mismatch(tmp_path, requests_mock):
     """Mismatch length after download should raise
 
     """
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     wrong_size = len(data) - 3
     requests_mock.get(url, text=data, headers={
         'content-length': str(wrong_size)  # wrong size!
     })
 
     with pytest.raises(ValueError) as e:
         download(url, dest=str(tmp_path))
 
     assert e.value.args[0] == "Error when checking size: %s != %s" % (
         wrong_size, len(data)
     )
 
 
 @pytest.mark.fs
 def test_download_ok(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = 'requests-0.0.1.tar.gz'
     url = 'https://pypi.org/pypi/requests/%s' % filename
     data = 'this is something'
     requests_mock.get(url, text=data, headers={
         'content-length': str(len(data))
     })
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes['length'] == len(data)
     assert actual_hashes['sha1'] == 'fdd1ce606a904b08c816ba84f3125f2af44d92b2'
     assert (actual_hashes['sha256'] ==
             '1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5')
 
 
 @pytest.mark.fs
 def test_download_fail_hashes_mismatch(tmp_path, requests_mock):
     """Mismatch hash after download should raise
 
     """
     pass
+
+
+def test_api_info_failure(requests_mock):
+    """Failure to fetch info/release information should raise"""
+    url = 'https://pypi.org/pypi/requests/json'
+    status_code = 400
+    requests_mock.get(url, status_code=status_code)
+
+    with pytest.raises(ValueError) as e0:
+        api_info(url)
+
+    assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (
+        url, status_code
+    )
+
+
+def test_api_info(requests_mock):
+    """Fetching json info from pypi project should be ok"""
+    url = 'https://pypi.org/pypi/requests/json'
+    requests_mock.get(url, text='{"version": "0.0.1"}')
+    actual_info = api_info(url)
+    assert actual_info == {
+        'version': '0.0.1',
+    }
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
index d8d4e5b..b2c4210 100644
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -1,59 +1,80 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 import requests
 
 from typing import Dict, Tuple
 
 from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
 from swh.loader.package import DEFAULT_PARAMS
 
 
+def api_info(url: str) -> Dict:
+    """Basic api client to retrieve information on project. This deals with
+       fetching json metadata about pypi projects.
+
+    Args:
+        url (str): The api url (e.g PyPI, npm, etc...)
+
+    Raises:
+        ValueError in case of query failures (for some reasons: 404, ...)
+
+    Returns:
+        The associated response's information dict
+
+    """
+    response = requests.get(url, **DEFAULT_PARAMS)
+    if response.status_code != 200:
+        raise ValueError("Fail to query '%s'. Reason: %s" % (
+            url, response.status_code))
+    return response.json()
+
+
 def download(url: str, dest: str) -> Tuple[str, Dict]:
     """Download a remote tarball from url, uncompresses and computes swh hashes
        on it.
 
     Args:
         url: Artifact uri to fetch, uncompress and hash
         dest: Directory to write the archive to
 
     Raises:
         ValueError in case of any error when fetching/computing
 
     Returns:
         Tuple of local (filepath, hashes of filepath)
 
     """
     response = requests.get(url, **DEFAULT_PARAMS, stream=True)
     if response.status_code != 200:
         raise ValueError("Fail to query '%s'. Reason: %s" % (
             url, response.status_code))
     length = int(response.headers['content-length'])
 
     filepath = os.path.join(dest, os.path.basename(url))
 
     h = MultiHash(length=length)
     with open(filepath, 'wb') as f:
         for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE):
             h.update(chunk)
             f.write(chunk)
 
     actual_length = os.path.getsize(filepath)
     if length != actual_length:
         raise ValueError('Error when checking size: %s != %s' % (
             length, actual_length))
 
     # hashes = h.hexdigest()
     # actual_digest = hashes['sha256']
     # if actual_digest != artifact['sha256']:
     #     raise ValueError(
     #         '%s %s: Checksum mismatched: %s != %s' % (
     #             project, version, artifact['sha256'], actual_digest))
 
     return filepath, {
         'length': length,
         **h.hexdigest()
     }