diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
index 94aca5f..5817b79 100644
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -1,277 +1,272 @@
 # Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
-import requests
 from typing import Any, Dict, Iterator, Mapping, Optional, Tuple
 
 import attr
 
 from swh.model import hashutil
 from swh.model.model import (
     MetadataAuthority,
     MetadataAuthorityType,
     Revision,
     RevisionType,
     TargetType,
     Snapshot,
     BaseModel,
     Sha1Git,
 )
 
-from swh.loader.package.utils import EMPTY_AUTHOR
+from swh.loader.package.utils import EMPTY_AUTHOR, api_info
 from swh.loader.package.loader import (
     BasePackageInfo,
     PackageLoader,
     RawExtrinsicMetadataCore,
 )
 
 
 logger = logging.getLogger(__name__)
 
 
 @attr.s
 class NixGuixPackageInfo(BasePackageInfo):
     raw_info = attr.ib(type=Dict[str, Any])
 
     integrity = attr.ib(type=str)
     """Hash of the archive, formatted as in the Subresource Integrity
     specification."""
 
     @classmethod
     def from_metadata(cls, metadata: Dict[str, Any]) -> "NixGuixPackageInfo":
         return cls(
             url=metadata["url"],
             filename=None,
             integrity=metadata["integrity"],
             raw_info=metadata,
         )
 
 
 class NixGuixLoader(PackageLoader[NixGuixPackageInfo]):
     """Load sources from a sources.json file. This loader is used to load
     sources used by functional package manager (eg. Nix and Guix).
 
     """
 
     visit_type = "nixguix"
 
     def __init__(self, url):
         super().__init__(url=url)
         self.raw_sources = retrieve_sources(url)
         clean = clean_sources(parse_sources(self.raw_sources))
         self.sources = clean["sources"]
         self.provider_url = url
 
         self._integrityByUrl = {s["urls"][0]: s["integrity"] for s in self.sources}
 
         # The revision used to create the sources.json file. For Nix,
         # this revision belongs to the github.com/nixos/nixpkgs
         # repository
         self.revision = clean["revision"]
 
     # Note: this could be renamed get_artifacts in the PackageLoader
     # base class.
     def get_versions(self):
         """The first mirror of the mirror list is used as branch name in the
         snapshot.
 
         """
         return self._integrityByUrl.keys()
 
     def get_metadata_authority(self):
         return MetadataAuthority(
             type=MetadataAuthorityType.FORGE, url=self.url, metadata={},
         )
 
     def get_extrinsic_snapshot_metadata(self):
         return [
             RawExtrinsicMetadataCore(
                 format="nixguix-sources-json",
                 metadata=self.raw_sources,
                 discovery_date=None,
             ),
         ]
 
     # Note: this could be renamed get_artifact_info in the PackageLoader
     # base class.
     def get_package_info(self, url) -> Iterator[Tuple[str, NixGuixPackageInfo]]:
         # TODO: try all mirrors and not only the first one. A source
         # can be fetched from several urls, called mirrors. We
         # currently only use the first one, but if the first one
         # fails, we should try the second one and so on.
         integrity = self._integrityByUrl[url]
         p_info = NixGuixPackageInfo.from_metadata({"url": url, "integrity": integrity})
         yield url, p_info
 
     def known_artifacts(self, snapshot: Optional[Snapshot]) -> Dict[Sha1Git, BaseModel]:
         """Almost same implementation as the default one except it filters out the extra
         "evaluation" branch which does not have the right metadata structure.
 
         """
         if not snapshot:
             return {}
 
         # Skip evaluation revision which has no metadata
         revs = [
             rev.target
             for branch_name, rev in snapshot.branches.items()
             if (
                 rev
                 and rev.target_type == TargetType.REVISION
                 and branch_name != b"evaluation"
             )
         ]
         known_revisions = self.storage.revision_get(revs)
 
         ret = {}
         for revision in known_revisions:
             if not revision:  # revision_get can return None
                 continue
             ret[revision["id"]] = revision["metadata"]
         return ret
 
     def resolve_revision_from(
         self, known_artifacts: Dict, p_info: NixGuixPackageInfo,
     ) -> Optional[bytes]:
         for rev_id, known_artifact in known_artifacts.items():
             try:
                 known_integrity = known_artifact["extrinsic"]["raw"]["integrity"]
             except KeyError as e:
                 logger.exception(
                     "Unexpected metadata revision structure detected: %(context)s",
                     {
                         "context": {
                             "revision": hashutil.hash_to_hex(rev_id),
                             "reason": str(e),
                             "known_artifact": known_artifact,
                         }
                     },
                 )
                 # metadata field for the revision is not as expected by the loader
                 # nixguix. We consider this not the right revision and continue checking
                 # the other revisions
                 continue
             else:
                 if p_info.integrity == known_integrity:
                     return rev_id
         return None
 
     def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]:
         """We add a branch to the snapshot called 'evaluation' pointing to the
         revision used to generate the sources.json file. This revision
         is specified in the sources.json file itself. For the nixpkgs
         origin, this revision is coming from the
         github.com/nixos/nixpkgs repository.
 
         Note this repository is not loaded explicitly. So, this
         pointer can target a nonexistent revision for a time. However,
         the github and gnu loaders are supposed to load this revision
         and should create the revision pointed by this branch.
 
         This branch can be used to identify the snapshot associated to
         a Nix/Guix evaluation.
 
         """
         return {
             b"evaluation": {
                 "target_type": "revision",
                 "target": hashutil.hash_to_bytes(self.revision),
             }
         }
 
     def build_revision(
         self, p_info: NixGuixPackageInfo, uncompressed_path: str, directory: Sha1Git
     ) -> Optional[Revision]:
         return Revision(
             type=RevisionType.TAR,
             message=b"",
             author=EMPTY_AUTHOR,
             date=None,
             committer=EMPTY_AUTHOR,
             committer_date=None,
             parents=(),
             directory=directory,
             synthetic=True,
             metadata={
                 "extrinsic": {
                     "provider": self.provider_url,
                     "when": self.visit_date.isoformat(),
                     "raw": p_info.raw_info,
                 },
             },
         )
 
 
 def retrieve_sources(url: str) -> bytes:
-    response = requests.get(url, allow_redirects=True)
-    if response.status_code != 200:
-        raise ValueError("Got %d HTTP code on %s", response.status_code, url)
-
-    return response.content
+    return api_info(url, allow_redirects=True)
 
 
 def parse_sources(raw_sources: bytes) -> Dict[str, Any]:
     return json.loads(raw_sources.decode("utf-8"))
 
 
 def clean_sources(sources: Dict[str, Any]) -> Dict[str, Any]:
     """Validate and clean the sources structure. First, it ensures all top
     level keys are presents. Then, it walks on the sources list
     and removes sources that don't contain required keys.
 
     Raises:
       ValueError: if a top level key is missing
 
     """
     # Required top level keys
     required_keys = ["version", "revision", "sources"]
     missing_keys = []
     for required_key in required_keys:
         if required_key not in sources:
             missing_keys.append(required_key)
 
     if missing_keys != []:
         raise ValueError(
             "sources structure invalid, missing: %s", ",".join(missing_keys)
         )
 
     # Only the version 1 is currently supported
     version = int(sources["version"])
     if version != 1:
         raise ValueError(
             "The sources structure version '%d' is not supported", sources["version"]
         )
 
     # If a source doesn't contain required attributes, this source is
     # skipped but others could still be archived.
     verified_sources = []
     for source in sources["sources"]:
         valid = True
         required_keys = ["urls", "integrity", "type"]
         for required_key in required_keys:
             if required_key not in source:
                 logger.info(
                     "Skip source '%s' because key '%s' is missing", source, required_key
                 )
                 valid = False
         if valid and source["type"] != "url":
             logger.info(
                 "Skip source '%s' because the type %s is not supported",
                 source,
                 source["type"],
             )
             valid = False
         if valid and not isinstance(source["urls"], list):
             logger.info(
                 "Skip source '%s' because the urls attribute is not a list", source
             )
             valid = False
         if valid:
             verified_sources.append(source)
 
     sources["sources"] = verified_sources
     return sources
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
index 8a20566..02b95d5 100644
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -1,323 +1,323 @@
 # Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
 import os
 
 from codecs import BOM_UTF8
 from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union
 
 import attr
 import chardet
 
 from urllib.parse import quote
 from swh.model.model import (
     Person,
     RevisionType,
     Revision,
     TimestampWithTimezone,
     Sha1Git,
 )
 
 from swh.loader.package.loader import BasePackageInfo, PackageLoader
 from swh.loader.package.utils import api_info, release_name
 
 
 logger = logging.getLogger(__name__)
 
 
 EMPTY_PERSON = Person(fullname=b"", name=None, email=None)
 
 
 @attr.s
 class NpmPackageInfo(BasePackageInfo):
     raw_info = attr.ib(type=Dict[str, Any])
 
     date = attr.ib(type=Optional[str])
     shasum = attr.ib(type=str)
     """sha1 checksum"""
     version = attr.ib(type=str)
 
     @classmethod
     def from_metadata(
         cls, project_metadata: Dict[str, Any], version: str
     ) -> "NpmPackageInfo":
         package_metadata = project_metadata["versions"][version]
         url = package_metadata["dist"]["tarball"]
 
         # No date available in intrinsic metadata: retrieve it from the API
         # metadata, using the version number that the API claims this package
         # has.
         extrinsic_version = package_metadata["version"]
 
         if "time" in project_metadata:
             date = project_metadata["time"][extrinsic_version]
         elif "mtime" in package_metadata:
             date = package_metadata["mtime"]
         else:
             date = None
 
         return cls(
             url=url,
             filename=os.path.basename(url),
             date=date,
             shasum=package_metadata["dist"]["shasum"],
             version=extrinsic_version,
             raw_info=package_metadata,  # FIXME: loses some of the project metadata
         )
 
 
 class NpmLoader(PackageLoader[NpmPackageInfo]):
     """Load npm origin's artifact releases into swh archive.
 
     """
 
     visit_type = "npm"
 
     def __init__(self, url: str):
         """Constructor
 
         Args
             str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
         """
         super().__init__(url=url)
         package_name = url.split("https://www.npmjs.com/package/")[1]
         safe_name = quote(package_name, safe="")
         self.provider_url = f"https://replicate.npmjs.com/{safe_name}/"
         self._info: Dict[str, Any] = {}
         self._versions = None
 
     @property
     def info(self) -> Dict[str, Any]:
         """Return the project metadata information (fetched from npm registry)
 
         """
         if not self._info:
-            self._info = api_info(self.provider_url)
+            self._info = json.loads(api_info(self.provider_url))
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return sorted(list(self.info["versions"].keys()))
 
     def get_default_version(self) -> str:
         return self.info["dist-tags"].get("latest", "")
 
     def get_package_info(self, version: str) -> Iterator[Tuple[str, NpmPackageInfo]]:
         p_info = NpmPackageInfo.from_metadata(
             project_metadata=self.info, version=version
         )
         yield release_name(version), p_info
 
     def resolve_revision_from(
         self, known_artifacts: Dict, p_info: NpmPackageInfo
     ) -> Optional[bytes]:
         return artifact_to_revision_id(known_artifacts, p_info)
 
     def build_revision(
         self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git
     ) -> Optional[Revision]:
         i_metadata = extract_intrinsic_metadata(uncompressed_path)
         if not i_metadata:
             return None
         author = extract_npm_package_author(i_metadata)
         message = i_metadata["version"].encode("ascii")
 
         if p_info.date is None:
             url = p_info.url
             artifact_name = os.path.basename(url)
             raise ValueError(
                 "Origin %s: Cannot determine upload time for artifact %s."
                 % (p_info.url, artifact_name)
             )
 
         date = TimestampWithTimezone.from_iso8601(p_info.date)
 
         # FIXME: this is to remain bug-compatible with earlier versions:
         date = attr.evolve(date, timestamp=attr.evolve(date.timestamp, microseconds=0))
 
         r = Revision(
             type=RevisionType.TAR,
             message=message,
             author=author,
             date=date,
             committer=author,
             committer_date=date,
             parents=(),
             directory=directory,
             synthetic=True,
             metadata={
                 "intrinsic": {"tool": "package.json", "raw": i_metadata,},
                 "extrinsic": {
                     "provider": self.provider_url,
                     "when": self.visit_date.isoformat(),
                     "raw": p_info.raw_info,
                 },
             },
         )
         return r
 
 
 def artifact_to_revision_id(
     known_artifacts: Dict, p_info: NpmPackageInfo
 ) -> Optional[bytes]:
     """Given metadata artifact, solves the associated revision id.
 
     The following code allows to deal with 2 metadata formats:
 
     - old format sample::
 
         {
             'package_source': {
                 'sha1': '05181c12cd8c22035dd31155656826b85745da37',
             }
         }
 
     - new format sample::
 
         {
             'original_artifact': [{
                 'checksums': {
                     'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec', # noqa
                     ...
                 },
             }],
             ...
         }
 
     """
     shasum = p_info.shasum
     for rev_id, known_artifact in known_artifacts.items():
         known_original_artifact = known_artifact.get("original_artifact")
         if not known_original_artifact:
             # previous loader-npm version kept original artifact elsewhere
             known_original_artifact = known_artifact.get("package_source")
             if not known_original_artifact:
                 continue
             original_hash = known_original_artifact["sha1"]
         else:
             assert isinstance(known_original_artifact, list)
             original_hash = known_original_artifact[0]["checksums"]["sha1"]
         if shasum == original_hash:
             return rev_id
     return None
 
 
 def _author_str(author_data: Union[Dict, List, str]) -> str:
     """Parse author from package.json author fields
 
     """
     if isinstance(author_data, dict):
         author_str = ""
         name = author_data.get("name")
         if name is not None:
             if isinstance(name, str):
                 author_str += name
             elif isinstance(name, list):
                 author_str += _author_str(name[0]) if len(name) > 0 else ""
         email = author_data.get("email")
         if email is not None:
             author_str += f" <{email}>"
         result = author_str
     elif isinstance(author_data, list):
         result = _author_str(author_data[0]) if len(author_data) > 0 else ""
     else:
         result = author_data
     return result
 
 
 def extract_npm_package_author(package_json: Dict[str, Any]) -> Person:
     """
     Extract package author from a ``package.json`` file content and
     return it in swh format.
 
     Args:
         package_json: Dict holding the content of parsed
             ``package.json`` file
 
     Returns:
         Person
 
     """
     for author_key in ("author", "authors"):
         if author_key in package_json:
             author_data = package_json[author_key]
             if author_data is None:
                 return EMPTY_PERSON
             author_str = _author_str(author_data)
             return Person.from_fullname(author_str.encode())
 
     return EMPTY_PERSON
 
 
 def _lstrip_bom(s, bom=BOM_UTF8):
     if s.startswith(bom):
         return s[len(bom) :]
     else:
         return s
 
 
 def load_json(json_bytes):
     """
     Try to load JSON from bytes and return a dictionary.
 
     First try to decode from utf-8. If the decoding failed,
     try to detect the encoding and decode again with replace
     error handling.
 
     If JSON is malformed, an empty dictionary will be returned.
 
     Args:
         json_bytes (bytes): binary content of a JSON file
 
     Returns:
         dict: JSON data loaded in a dictionary
     """
     json_data = {}
     try:
         json_str = _lstrip_bom(json_bytes).decode("utf-8")
     except UnicodeDecodeError:
         encoding = chardet.detect(json_bytes)["encoding"]
         if encoding:
             json_str = json_bytes.decode(encoding, "replace")
     try:
         json_data = json.loads(json_str)
     except json.decoder.JSONDecodeError:
         pass
     return json_data
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from npm.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) == 0:
         return {}
     project_dirname = lst[0]
     package_json_path = os.path.join(dir_path, project_dirname, "package.json")
     if not os.path.exists(package_json_path):
         return {}
     with open(package_json_path, "rb") as package_json_file:
         package_json_bytes = package_json_file.read()
         return load_json(package_json_bytes)
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
index 509f9cc..650ff7a 100644
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -1,264 +1,265 @@
-# Copyright (C) 2019  The Software Heritage developers
+# Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import os
 import logging
+import json
+import os
 from typing import Any, Dict, Iterator, Optional, Sequence, Tuple
 from urllib.parse import urlparse
 
 import attr
 from pkginfo import UnpackedSDist
 
 from swh.model.model import (
     Person,
     Sha1Git,
     TimestampWithTimezone,
     Revision,
     RevisionType,
 )
 
 from swh.loader.package.loader import BasePackageInfo, PackageLoader
 from swh.loader.package.utils import api_info, release_name, EMPTY_AUTHOR
 
 logger = logging.getLogger(__name__)
 
 
 @attr.s
 class PyPIPackageInfo(BasePackageInfo):
     raw_info = attr.ib(type=Dict[str, Any])
 
     comment_text = attr.ib(type=Optional[str])
     sha256 = attr.ib(type=str)
     upload_time = attr.ib(type=str)
 
     @classmethod
     def from_metadata(cls, metadata: Dict[str, Any]) -> "PyPIPackageInfo":
         return cls(
             url=metadata["url"],
             filename=metadata["filename"],
             raw_info=metadata,
             comment_text=metadata.get("comment_text"),
             sha256=metadata["digests"]["sha256"],
             upload_time=metadata["upload_time"],
         )
 
 
 class PyPILoader(PackageLoader[PyPIPackageInfo]):
     """Load pypi origin's artifact releases into swh archive.
 
     """
 
     visit_type = "pypi"
 
     def __init__(self, url):
         super().__init__(url=url)
         self._info = None
         self.provider_url = pypi_api_url(self.url)
 
     @property
     def info(self) -> Dict:
         """Return the project metadata information (fetched from pypi registry)
 
         """
         if not self._info:
-            self._info = api_info(self.provider_url)
+            self._info = json.loads(api_info(self.provider_url))
         return self._info
 
     def get_versions(self) -> Sequence[str]:
         return self.info["releases"].keys()
 
     def get_default_version(self) -> str:
         return self.info["info"]["version"]
 
     def get_package_info(self, version: str) -> Iterator[Tuple[str, PyPIPackageInfo]]:
         res = []
         for meta in self.info["releases"][version]:
             if meta["packagetype"] != "sdist":
                 continue
             p_info = PyPIPackageInfo.from_metadata(meta)
             res.append((version, p_info))
 
         if len(res) == 1:
             version, p_info = res[0]
             yield release_name(version), p_info
         else:
             for version, p_info in res:
                 yield release_name(version, p_info.filename), p_info
 
     def resolve_revision_from(
         self, known_artifacts: Dict, p_info: PyPIPackageInfo
     ) -> Optional[bytes]:
         return artifact_to_revision_id(known_artifacts, p_info)
 
     def build_revision(
         self, p_info: PyPIPackageInfo, uncompressed_path: str, directory: Sha1Git
     ) -> Optional[Revision]:
         i_metadata = extract_intrinsic_metadata(uncompressed_path)
         if not i_metadata:
             return None
 
         # from intrinsic metadata
         name = i_metadata["version"]
         _author = author(i_metadata)
 
         # from extrinsic metadata
         message = p_info.comment_text or ""
         message = "%s: %s" % (name, message) if message else name
         date = TimestampWithTimezone.from_iso8601(p_info.upload_time)
 
         return Revision(
             type=RevisionType.TAR,
             message=message.encode("utf-8"),
             author=_author,
             date=date,
             committer=_author,
             committer_date=date,
             parents=(),
             directory=directory,
             synthetic=True,
             metadata={
                 "intrinsic": {"tool": "PKG-INFO", "raw": i_metadata,},
                 "extrinsic": {
                     "provider": self.provider_url,
                     "when": self.visit_date.isoformat(),
                     "raw": p_info.raw_info,
                 },
             },
         )
 
 
 def artifact_to_revision_id(
     known_artifacts: Dict, p_info: PyPIPackageInfo
 ) -> Optional[bytes]:
     """Given metadata artifact, solves the associated revision id.
 
     The following code allows to deal with 2 metadata formats (column metadata
     in 'revision')
 
     - old format sample::
 
         {
             'original_artifact': {
                 'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec',  # noqa
                 ...
             },
             ...
         }
 
     - new format sample::
 
         {
             'original_artifact': [{
                 'checksums': {
                     'sha256': '6975816f2c5ad4046acc676ba112f2fff945b01522d63948531f11f11e0892ec',  # noqa
                     ...
                 },
             }],
             ...
         }
 
     """
     sha256 = p_info.sha256
     for rev_id, known_artifact in known_artifacts.items():
         original_artifact = known_artifact["original_artifact"]
         if isinstance(original_artifact, dict):
             # previous loader-pypi version stored metadata as dict
             original_sha256 = original_artifact["sha256"]
             if sha256 == original_sha256:
                 return rev_id
             continue
         # new pypi loader actually store metadata dict differently...
         assert isinstance(original_artifact, list)
         # current loader-pypi stores metadata as list of dict
         for original_artifact in known_artifact["original_artifact"]:
             if sha256 == original_artifact["checksums"]["sha256"]:
                 return rev_id
     return None
 
 
 def pypi_api_url(url: str) -> str:
     """Compute api url from a project url
 
     Args:
         url (str): PyPI instance's url (e.g: https://pypi.org/project/requests)
         This deals with correctly transforming the project's api url (e.g
         https://pypi.org/pypi/requests/json)
 
     Returns:
         api url
 
     """
     p_url = urlparse(url)
     project_name = p_url.path.rstrip("/").split("/")[-1]
     url = "%s://%s/pypi/%s/json" % (p_url.scheme, p_url.netloc, project_name)
     return url
 
 
 def extract_intrinsic_metadata(dir_path: str) -> Dict:
     """Given an uncompressed path holding the pkginfo file, returns a
        pkginfo parsed structure as a dict.
 
        The release artifact contains at their root one folder. For example:
        $ tar tvf zprint-0.0.6.tar.gz
        drwxr-xr-x root/root         0 2018-08-22 11:01 zprint-0.0.6/
        ...
 
     Args:
 
         dir_path (str): Path to the uncompressed directory
                         representing a release artifact from pypi.
 
     Returns:
         the pkginfo parsed structure as a dict if any or None if
         none was present.
 
     """
     # Retrieve the root folder of the archive
     if not os.path.exists(dir_path):
         return {}
     lst = os.listdir(dir_path)
     if len(lst) != 1:
         return {}
     project_dirname = lst[0]
     pkginfo_path = os.path.join(dir_path, project_dirname, "PKG-INFO")
     if not os.path.exists(pkginfo_path):
         return {}
     pkginfo = UnpackedSDist(pkginfo_path)
     raw = pkginfo.__dict__
     raw.pop("filename")  # this gets added with the ondisk location
     return raw
 
 
 def author(data: Dict) -> Person:
     """Given a dict of project/release artifact information (coming from
        PyPI), returns an author subset.
 
     Args:
         data (dict): Representing either artifact information or
                      release information.
 
     Returns:
         swh-model dict representing a person.
 
     """
     name = data.get("author")
     email = data.get("author_email")
     fullname = None  # type: Optional[str]
 
     if email:
         fullname = "%s <%s>" % (name, email)
     else:
         fullname = name
 
     if not fullname:
         return EMPTY_AUTHOR
 
     if name is not None:
         name = name.encode("utf-8")
 
     if email is not None:
         email = email.encode("utf-8")
 
     return Person(fullname=fullname.encode("utf-8"), name=name, email=email)
diff --git a/swh/loader/package/tests/test_utils.py b/swh/loader/package/tests/test_utils.py
index f4827f3..c2a2765 100644
--- a/swh/loader/package/tests/test_utils.py
+++ b/swh/loader/package/tests/test_utils.py
@@ -1,158 +1,159 @@
-# Copyright (C) 2019  The Software Heritage developers
+# Copyright (C) 2019-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 
+import json
 import os
-import pytest
 
+import pytest
 
 import swh.loader.package
 from swh.loader.package.utils import download, api_info, release_name
 
 
 def test_version_generation():
     assert (
         swh.loader.package.__version__ != "devel"
     ), "Make sure swh.loader.core is installed (e.g. pip install -e .)"
 
 
 @pytest.mark.fs
 def test_download_fail_to_download(tmp_path, requests_mock):
     url = "https://pypi.org/pypi/arrow/json"
     status_code = 404
     requests_mock.get(url, status_code=status_code)
 
     with pytest.raises(ValueError) as e:
         download(url, tmp_path)
 
     assert e.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
 
 
 @pytest.mark.fs
 def test_download_ok(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = "requests-0.0.1.tar.gz"
     url = "https://pypi.org/pypi/requests/%s" % filename
     data = "this is something"
     requests_mock.get(url, text=data, headers={"content-length": str(len(data))})
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes["length"] == len(data)
     assert (
         actual_hashes["checksums"]["sha1"] == "fdd1ce606a904b08c816ba84f3125f2af44d92b2"
     )  # noqa
     assert (
         actual_hashes["checksums"]["sha256"]
         == "1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5"
     )
 
 
 @pytest.mark.fs
 def test_download_ok_no_header(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = "requests-0.0.1.tar.gz"
     url = "https://pypi.org/pypi/requests/%s" % filename
     data = "this is something"
     requests_mock.get(url, text=data)  # no header information
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path))
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes["length"] == len(data)
     assert (
         actual_hashes["checksums"]["sha1"] == "fdd1ce606a904b08c816ba84f3125f2af44d92b2"
     )  # noqa
     assert (
         actual_hashes["checksums"]["sha256"]
         == "1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5"
     )
 
 
 @pytest.mark.fs
 def test_download_ok_with_hashes(tmp_path, requests_mock):
     """Download without issue should provide filename and hashes"""
     filename = "requests-0.0.1.tar.gz"
     url = "https://pypi.org/pypi/requests/%s" % filename
     data = "this is something"
     requests_mock.get(url, text=data, headers={"content-length": str(len(data))})
 
     # good hashes for such file
     good = {
         "sha1": "fdd1ce606a904b08c816ba84f3125f2af44d92b2",
         "sha256": "1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5",  # noqa
     }
 
     actual_filepath, actual_hashes = download(url, dest=str(tmp_path), hashes=good)
 
     actual_filename = os.path.basename(actual_filepath)
     assert actual_filename == filename
     assert actual_hashes["length"] == len(data)
     assert actual_hashes["checksums"]["sha1"] == good["sha1"]
     assert actual_hashes["checksums"]["sha256"] == good["sha256"]
 
 
 @pytest.mark.fs
 def test_download_fail_hashes_mismatch(tmp_path, requests_mock):
     """Mismatch hash after download should raise
 
     """
     filename = "requests-0.0.1.tar.gz"
     url = "https://pypi.org/pypi/requests/%s" % filename
     data = "this is something"
     requests_mock.get(url, text=data, headers={"content-length": str(len(data))})
 
     # good hashes for such file
     good = {
         "sha1": "fdd1ce606a904b08c816ba84f3125f2af44d92b2",
         "sha256": "1d9224378d77925d612c9f926eb9fb92850e6551def8328011b6a972323298d5",  # noqa
     }
 
     for hash_algo in good.keys():
         wrong_hash = good[hash_algo].replace("1", "0")
         expected_hashes = good.copy()
         expected_hashes[hash_algo] = wrong_hash  # set the wrong hash
 
         expected_msg = "Failure when fetching %s. " "Checksum mismatched: %s != %s" % (
             url,
             wrong_hash,
             good[hash_algo],
         )
 
         with pytest.raises(ValueError, match=expected_msg):
             download(url, dest=str(tmp_path), hashes=expected_hashes)
 
 
 def test_api_info_failure(requests_mock):
     """Failure to fetch info/release information should raise"""
     url = "https://pypi.org/pypi/requests/json"
     status_code = 400
     requests_mock.get(url, status_code=status_code)
 
     with pytest.raises(ValueError) as e0:
         api_info(url)
 
     assert e0.value.args[0] == "Fail to query '%s'. Reason: %s" % (url, status_code)
 
 
 def test_api_info(requests_mock):
     """Fetching json info from pypi project should be ok"""
     url = "https://pypi.org/pypi/requests/json"
     requests_mock.get(url, text='{"version": "0.0.1"}')
-    actual_info = api_info(url)
+    actual_info = json.loads(api_info(url))
     assert actual_info == {
         "version": "0.0.1",
     }
 
 
 def test_release_name():
     for version, filename, expected_release in [
         ("0.0.1", None, "releases/0.0.1"),
         ("0.0.2", "something", "releases/0.0.2/something"),
     ]:
         assert release_name(version, filename) == expected_release
diff --git a/swh/loader/package/utils.py b/swh/loader/package/utils.py
index 3e69fd8..e83cf64 100644
--- a/swh/loader/package/utils.py
+++ b/swh/loader/package/utils.py
@@ -1,123 +1,123 @@
 # Copyright (C) 2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import logging
 import os
 import requests
 
 from typing import Dict, Optional, Tuple
 
 from swh.model.hashutil import MultiHash, HASH_BLOCK_SIZE
 from swh.model.model import Person
 
 from swh.loader.package import DEFAULT_PARAMS
 
 
 logger = logging.getLogger(__name__)
 
 
 DOWNLOAD_HASHES = set(["sha1", "sha256", "length"])
 
 
 EMPTY_AUTHOR = Person(fullname=b"", name=None, email=None,)
 
 
-def api_info(url: str) -> Dict:
+def api_info(url: str, **extra_params) -> bytes:
     """Basic api client to retrieve information on project. This deals with
        fetching json metadata about pypi projects.
 
     Args:
         url (str): The api url (e.g PyPI, npm, etc...)
 
     Raises:
         ValueError in case of query failures (for some reasons: 404, ...)
 
     Returns:
-        The associated response's information dict
+        The associated response's information
 
     """
-    response = requests.get(url, **DEFAULT_PARAMS)
+    response = requests.get(url, **{**DEFAULT_PARAMS, **extra_params})
     if response.status_code != 200:
         raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))
-    return response.json()
+    return response.content
 
 
 def download(
     url: str,
     dest: str,
     hashes: Dict = {},
     filename: Optional[str] = None,
     auth: Optional[Tuple[str, str]] = None,
 ) -> Tuple[str, Dict]:
     """Download a remote tarball from url, uncompresses and computes swh hashes
        on it.
 
     Args:
         url: Artifact uri to fetch, uncompress and hash
         dest: Directory to write the archive to
         hashes: Dict of expected hashes (key is the hash algo) for the artifact
             to download (those hashes are expected to be hex string)
         auth: Optional tuple of login/password (for http authentication
             service, e.g. deposit)
 
     Raises:
         ValueError in case of any error when fetching/computing (length,
         checksums mismatched...)
 
     Returns:
         Tuple of local (filepath, hashes of filepath)
 
     """
     params = copy.deepcopy(DEFAULT_PARAMS)
     if auth is not None:
         params["auth"] = auth
     # so the connection does not hang indefinitely (read/connection timeout)
     timeout = params.get("timeout", 60)
     response = requests.get(url, **params, timeout=timeout, stream=True)
     if response.status_code != 200:
         raise ValueError("Fail to query '%s'. Reason: %s" % (url, response.status_code))
 
     filename = filename if filename else os.path.basename(url)
     logger.debug("filename: %s", filename)
     filepath = os.path.join(dest, filename)
     logger.debug("filepath: %s", filepath)
 
     h = MultiHash(hash_names=DOWNLOAD_HASHES)
     with open(filepath, "wb") as f:
         for chunk in response.iter_content(chunk_size=HASH_BLOCK_SIZE):
             h.update(chunk)
             f.write(chunk)
 
     # Also check the expected hashes if provided
     if hashes:
         actual_hashes = h.hexdigest()
         for algo_hash in hashes.keys():
             actual_digest = actual_hashes[algo_hash]
             expected_digest = hashes[algo_hash]
             if actual_digest != expected_digest:
                 raise ValueError(
                     "Failure when fetching %s. "
                     "Checksum mismatched: %s != %s"
                     % (url, expected_digest, actual_digest)
                 )
 
     computed_hashes = h.hexdigest()
     length = computed_hashes.pop("length")
     extrinsic_metadata = {
         "length": length,
         "filename": filename,
         "checksums": computed_hashes,
     }
 
     logger.debug("extrinsic_metadata", extrinsic_metadata)
 
     return filepath, extrinsic_metadata
 
 
 def release_name(version: str, filename: Optional[str] = None) -> str:
     if filename:
         return "releases/%s/%s" % (version, filename)
     return "releases/%s" % version