diff --git a/swh/loader/mercurial/__init__.py b/swh/loader/mercurial/__init__.py
--- a/swh/loader/mercurial/__init__.py
+++ b/swh/loader/mercurial/__init__.py
@@ -9,9 +9,9 @@
 
 def register() -> Mapping[str, Any]:
     """Register the current worker module's definition"""
-    from .loader import HgBundle20Loader
+    from .loader import HgLoader
 
     return {
         "task_modules": [f"{__name__}.tasks"],
-        "loader": HgBundle20Loader,
+        "loader": HgLoader,
     }
diff --git a/swh/loader/mercurial/cli.py b/swh/loader/mercurial/cli.py
--- a/swh/loader/mercurial/cli.py
+++ b/swh/loader/mercurial/cli.py
@@ -4,8 +4,8 @@
 # See top-level LICENSE file for more information
 
 import datetime
-from itertools import chain
 import logging
+from itertools import chain
 
 import click
 
@@ -44,11 +44,11 @@
         visit_date = datetime.datetime.now(tz=datetime.timezone.utc)
     kwargs = {"visit_date": visit_date, "origin_url": origin_url}
     if hg_archive:
-        from .loader import HgArchiveBundle20Loader as HgLoader
+        from .loader import HgArchiveLoader as HgLoader
 
         kwargs["archive_path"] = hg_archive
     else:
-        from .loader import HgBundle20Loader as HgLoader
+        from .loader import HgLoader
 
         kwargs["directory"] = hg_directory
 
diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/from_bundle.py
copy from swh/loader/mercurial/loader.py
copy to swh/loader/mercurial/from_bundle.py
--- a/swh/loader/mercurial/loader.py
+++ b/swh/loader/mercurial/from_bundle.py
@@ -19,17 +19,17 @@
 
 import datetime
 import os
-from queue import Empty
 import random
 import re
+import time
+from queue import Empty
 from shutil import rmtree
 from tempfile import mkdtemp
-import time
 from typing import Any, Dict, Iterable, List, Optional
 
 import billiard
-from dateutil import parser
 import hglib
+from dateutil import parser
 
 from swh.core.config import merge_configs
 from swh.loader.core.loader import DVCSLoader
diff --git a/swh/loader/mercurial/from_disk.py b/swh/loader/mercurial/from_disk.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/mercurial/from_disk.py
@@ -0,0 +1,467 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+from datetime import datetime, timezone
+from shutil import rmtree
+from tempfile import mkdtemp
+from typing import Any, Dict, List, Optional, Union
+
+import dateutil
+from mercurial.util import lrucachedict  # type: ignore
+
+from swh.core.config import merge_configs
+from swh.loader.core.loader import BaseLoader
+from swh.loader.core.utils import clean_dangling_folders
+from swh.model.from_disk import Content, DentryPerms, Directory
+from swh.model.hashutil import MultiHash, hash_to_bytehex
+from swh.model.model import Content as ModelContent
+from swh.model.model import (
+    ObjectType,
+    Origin,
+    Person,
+    Release,
+    Revision,
+    RevisionType,
+    Sha1Git,
+    Snapshot,
+    SnapshotBranch,
+    TargetType,
+    TimestampWithTimezone,
+)
+
+from . import hgutil
+from .archive_extract import tmp_extract
+from .hgutil import HgNodeId
+
+FLAG_PERMS = {
+    b"l": DentryPerms.symlink,
+    b"x": DentryPerms.executable_content,
+    b"": DentryPerms.content,
+}  # type: Dict[bytes, DentryPerms]
+DEFAULT_CONFIG: Dict[str, Any] = {
+    "temp_directory": "/tmp",
+    "clone_timeout_seconds": 7200,
+    "content_cache_size": 10_000,
+}
+TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.mercurial.from_disk"
+
+
+def parse_visit_date(visit_date: Optional[Union[datetime, str]]) -> Optional[datetime]:
+    if visit_date is None:
+        return None
+
+    if isinstance(visit_date, datetime):
+        return visit_date
+
+    if visit_date == "now":
+        return datetime.now(tz=timezone.utc)
+
+    if isinstance(visit_date, str):
+        return dateutil.parser.parse(visit_date)
+
+    return ValueError(f"invalid visit date {visit_date!r}")
+
+
+class HgRootDirectory(Directory):
+    def add_file(self, path: bytes, content: Content) -> None:
+        """Add a file creating missing directories if needed."""
+        parts = [part for part in path.split(os.path.sep.encode()) if part]
+
+        current_dir = self
+        while len(parts) > 1:
+            part = parts.pop(0)
+            if part not in current_dir:
+                current_dir[part] = Directory()
+            current_dir = current_dir[part]
+        part = parts.pop(0)
+        current_dir[part] = content
+
+
+class HgLoaderFromDisk(BaseLoader):
+    """Load a mercurial repository from a local repository."""
+
+    CONFIG_BASE_FILENAME = "loader/mercurial"
+
+    visit_type = "hg"
+
+    def __init__(
+        self,
+        url: str,
+        directory: Optional[str] = None,
+        logging_class: str = "swh.loader.mercurial.Loader",
+        visit_date: Optional[Union[datetime, str]] = None,
+        config: Optional[Dict[str, Any]] = None,
+    ):
+        """Initialize the loader.
+
+        Args:
+            url: url of the repository.
+            directory: directory of the local repository.
+            logging_class: class of the loader logger.
+            visit_date: visit date of the repository
+            config: loader configuration
+        """
+        super().__init__(logging_class=logging_class, config=config or {})
+
+        self.config = merge_configs(DEFAULT_CONFIG, self.config)
+        self.temp_directory = self.config["temp_directory"]
+        self.clone_timeout = self.config["clone_timeout_seconds"]
+
+        self.origin_url = url
+        self.visit_date = parse_visit_date(visit_date)
+        self.directory = directory
+
+        self._repo: Optional[hgutil.Repository] = None
+        self.revision_nodeid_to_swhid: Dict[HgNodeId, Sha1Git] = {}
+
+        # Cache the content hash across revisions to avoid recalculation.
+        self.content_hash_cache: lrucachedict = lrucachedict(
+            self.config["content_cache_size"],
+        )
+
+    @property
+    def repo(self) -> hgutil.Repository:
+        """ An filtered mercurial repository.
+
+        The repository will only return visible changesets.
+        If needed, use self.repo.unfiltered() to have a new repository
+        returning all the changesets.
+        """
+        if self._repo is None:
+            self._repo = hgutil.repository(self.repo_directory)
+        return self._repo
+
+    def pre_cleanup(self) -> None:
+        """As a first step, will try and check for dangling data to cleanup.
+        This should do its best to avoid raising issues.
+
+        """
+        clean_dangling_folders(
+            self.temp_directory,
+            pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+            log=self.log,
+        )
+
+    def cleanup(self) -> None:
+        """Last step executed by the loader."""
+        if self.repo_directory and os.path.exists(self.repo_directory):
+            self.log.debug(f"Cleanup up repository {self.repo_directory}")
+            rmtree(self.repo_directory)
+
+    def prepare_origin_visit(self, *args, **kwargs) -> None:
+        """First step executed by the loader to prepare origin and visit
+        references. Set/update self.origin, and
+        optionally self.origin_url, self.visit_date.
+
+        """
+        self.origin = Origin(url=self.origin_url)
+
+    def prepare(self, *args, **kwargs) -> None:
+        """Second step executed by the loader to prepare some state needed by
+        the loader.
+
+        """
+
+    def fetch_data(self) -> bool:
+        """Fetch the data from the source the loader is currently loading
+
+        Returns:
+            a value that is interpreted as a boolean. If True, fetch_data needs
+            to be called again to complete loading.
+
+        """
+        if not self.directory:  # no local repository
+            self.repo_directory = mkdtemp(
+                prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+                suffix=f"-{os.getpid()}",
+                dir=self.temp_directory,
+            )
+            self.log.debug(
+                f"Cloning {self.origin_url} to {self.directory} "
+                f"with timeout {self.clone_timeout} seconds"
+            )
+            hgutil.clone(self.origin_url, self.repo_directory, self.clone_timeout)
+        else:  # existing local repository
+            self.repo_directory = self.directory
+
+        return False
+
+    def store_data(self):
+        """Store fetched data in the database."""
+        for rev in self.repo:
+            hg_nodeid = self.repo[rev].node()
+            revision_swhid = self.store_revision(hg_nodeid)
+            self.revision_nodeid_to_swhid[hg_nodeid] = revision_swhid
+
+        branch_by_hg_nodeid: Dict[HgNodeId, bytes] = {
+            hg_nodeid: name for name, hg_nodeid in hgutil.branches(self.repo).items()
+        }
+        tags_by_name: Dict[bytes, HgNodeId] = self.repo.tags()
+        tags_by_hg_nodeid: Dict[HgNodeId, bytes] = {
+            hg_nodeid: name for name, hg_nodeid in tags_by_name.items()
+        }
+
+        snapshot_branches: Dict[bytes, SnapshotBranch] = {}
+
+        for hg_nodeid, revision_swhid in self.revision_nodeid_to_swhid.items():
+            tag_name = tags_by_hg_nodeid.get(hg_nodeid)
+
+            # tip is listed in the tags by the mercurial api
+            # but its not a tag defined by the user in `.hgtags`
+            if tag_name and tag_name != b"tip":
+                snapshot_branches[tag_name] = SnapshotBranch(
+                    target=self.store_release(tag_name, revision_swhid),
+                    target_type=TargetType.RELEASE,
+                )
+
+            if hg_nodeid in branch_by_hg_nodeid:
+                name = branch_by_hg_nodeid[hg_nodeid]
+                snapshot_branches[name] = SnapshotBranch(
+                    target=revision_swhid, target_type=TargetType.REVISION,
+                )
+
+            if hg_nodeid == tags_by_name[b"tip"]:
+                snapshot_branches[b"HEAD"] = SnapshotBranch(
+                    target=name, target_type=TargetType.ALIAS,
+                )
+
+        snapshot = Snapshot(branches=snapshot_branches)
+        self.storage.snapshot_add([snapshot])
+
+        self.flush()
+        self.loaded_snapshot_id = snapshot.id
+
+    def store_revision(self, hg_nodeid: HgNodeId) -> Sha1Git:
+        """Store a revision given its hg nodeid.
+
+        If a parent revision of the revision to store is not yet stored,
+        it is recursively stored. Revisions are stored from oldest to newest,
+        thus recursion limit will not be reached.
+
+        Args:
+            hg_nodeid: the hg nodeid of the revision.
+
+        Returns:
+            the swhid of the stored revision.
+        """
+        rev_ctx = self.repo[hg_nodeid]
+
+        root_swhid = self.store_directories(hg_nodeid)
+
+        author = Person.from_fullname(rev_ctx.user())
+        (timestamp, offset) = rev_ctx.date()
+
+        # TimestampWithTimezone.from_dict will change name
+        # as it accept more than just dicts
+        rev_date = TimestampWithTimezone.from_dict(int(timestamp))
+
+        extra_headers = [
+            (b"time_offset_seconds", str(offset).encode(),),
+        ]
+        for key, value in rev_ctx.extra().items():
+            # The default branch is skipped to match
+            # the historical implementation
+            if key == b"branch" and value == b"default":
+                continue
+
+            # transplant_source is converted to match
+            # the historical implementation
+            if key == b"transplant_source":
+                value = hash_to_bytehex(value)
+            extra_headers.append((key, value))
+
+        parents = []
+        for parent_ctx in rev_ctx.parents():
+            parent_hg_nodeid = parent_ctx.node()
+            # nullid is the value of a parent that does not exist
+            if parent_hg_nodeid == hgutil.NULLID:
+                continue
+
+            if parent_hg_nodeid not in self.revision_nodeid_to_swhid:
+                parent_revision_swhid = self.store_revision(parent_hg_nodeid)
+                self.revision_nodeid_to_swhid[parent_hg_nodeid] = parent_revision_swhid
+
+            parents.append(self.revision_nodeid_to_swhid[parent_hg_nodeid])
+
+        revision = Revision(
+            author=author,
+            date=rev_date,
+            committer=author,
+            committer_date=rev_date,
+            type=RevisionType.MERCURIAL,
+            directory=root_swhid,
+            message=rev_ctx.description(),
+            metadata={"node": hg_nodeid.hex()},
+            extra_headers=tuple(extra_headers),
+            synthetic=False,
+            parents=tuple(parents),
+        )
+
+        self.storage.revision_add([revision])
+
+        return revision.id
+
+    def store_release(self, name: bytes, target=Sha1Git) -> Sha1Git:
+        """Store a release given its name and its target.
+
+        Args:
+            name: name of the release.
+            target: swhid of the target revision.
+
+        Returns:
+            the swhid of the stored release.
+        """
+        release = Release(
+            name=name,
+            target=target,
+            target_type=ObjectType.REVISION,
+            message=None,
+            metadata=None,
+            synthetic=False,
+            author=Person(name=None, email=None, fullname=b""),
+            date=None,
+        )
+
+        self.storage.release_add([release])
+
+        return release.id
+
+    def store_content(self, hg_nodeid: HgNodeId, file_path: bytes) -> Content:
+        """Store a revision content hg nodeid and file path.
+
+        Args:
+            hg_nodeid: the hg nodeid of the revision.
+            file_path: the hg path of the content.
+
+        Returns:
+            the swhid of the top level directory.
+        """
+        rev_ctx = self.repo[hg_nodeid]
+        file_ctx = rev_ctx[file_path]
+
+        file_nodeid = file_ctx._fileid
+        perms = FLAG_PERMS[file_ctx.flags()]
+
+        # Key is file_nodeid + perms because permissions does not participate
+        # in content hash in hg while it is the case in swh.
+        cache_key = (file_nodeid, perms)
+
+        sha1_git = self.content_hash_cache.get(cache_key)
+        if sha1_git is not None:
+            return Content({"sha1_git": sha1_git, "perms": perms})
+
+        data = file_ctx.data()
+
+        content_data = MultiHash.from_data(data).digest()
+        content_data["length"] = len(data)
+        content_data["perms"] = perms
+        content_data["data"] = data
+        content_data["status"] = "visible"
+        content = Content(content_data)
+
+        model = content.to_model()
+        if isinstance(model, ModelContent):
+            self.storage.content_add([model])
+        else:
+            raise ValueError(
+                f"{file_path!r} at rev {hg_nodeid.hex()!r} "
+                "produced {type(model)!r} instead of {ModelContent!r}"
+            )
+
+        self.content_hash_cache[cache_key] = content.hash
+
+        return content
+
+    def store_directories(self, hg_nodeid: HgNodeId) -> Sha1Git:
+        """Store a revision directories given its hg nodeid.
+
+        Args:
+            hg_nodeid: the hg nodeid of the revision.
+
+        Returns:
+            the swhid of the top level directory.
+        """
+        rev_ctx = self.repo[hg_nodeid]
+
+        root = HgRootDirectory()
+        for file_path in rev_ctx.manifest():
+            content = self.store_content(hg_nodeid, file_path)
+            root.add_file(file_path, content)
+
+        directories: List[Directory] = [root]
+        while directories:
+            directory = directories.pop(0)
+            self.storage.directory_add([directory.to_model()])
+            directories.extend(
+                [item for item in directory.values() if isinstance(item, Directory)]
+            )
+
+        return root.hash
+
+
+class HgArchiveLoaderFromDisk(HgLoaderFromDisk):
+    def __init__(
+        self, url: str, visit_date: Optional[datetime] = None, archive_path: str = None
+    ):
+        super().__init__(
+            url,
+            visit_date=visit_date,
+            logging_class="swh.loader.mercurial.ArchiveLoader",
+        )
+        self.temp_dir = None
+        self.archive_path = archive_path
+
+    def prepare(self, *args, **kwargs):
+        self.temp_directory = tmp_extract(
+            archive=self.archive_path,
+            dir=self.temp_directory,
+            prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+            suffix=f".dump-{os.getpid()}",
+            log=self.log,
+            source=self.origin_url,
+        )
+
+        repo_name = os.listdir(self.temp_dir)[0]
+        self.directory = os.path.join(self.temp_dir, repo_name)
+        super().prepare(*args, **kwargs)
+
+    def cleanup(self) -> None:
+        if self.temp_dir and os.path.exists(self.temp_dir):
+            rmtree(self.temp_dir)
+        super().cleanup()
+
+
+def get_hg_branches(repo) -> Dict[bytes, HgNodeId]:
+    """Equivalent of `hg branches`.
+
+    For some reason, there is no direct way to get the same result of `hg branches` in
+    the localrepository interface.
+    """
+    result = {}
+    for tag, heads, tip, isclosed in repo.branchmap().iterbranches():
+        if isclosed:
+            continue
+        result[tag] = tip
+    return result
+
+
+if __name__ == "__main__":
+    import logging
+
+    import click
+
+    logging.basicConfig(
+        level=logging.DEBUG, format="%(asctime)s %(process)d %(message)s"
+    )
+
+    @click.command()
+    @click.option("--origin-url", help="origin url")
+    @click.option("--hg-directory", help="Path to mercurial repository to load")
+    @click.option("--visit-date", default=None, help="Visit date")
+    def main(origin_url, hg_directory, visit_date):
+        return HgLoaderFromDisk().load(origin_url, hg_directory, visit_date)
+
+    main()
diff --git a/swh/loader/mercurial/hgutil.py b/swh/loader/mercurial/hgutil.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/mercurial/hgutil.py
@@ -0,0 +1,66 @@
+import io
+import traceback
+from multiprocessing import Process, Queue
+from typing import Dict, NewType
+
+# The internal Mercurial API is not guaranteed to be stable.
+import mercurial.ui  # type: ignore
+from mercurial import hg
+
+NULLID = mercurial.node.nullid
+HgNodeId = NewType("HgNodeId", bytes)
+Repository = hg.localrepo
+
+
+def repository(path: str) -> hg.localrepo:
+    ui = mercurial.ui.ui.load()
+    return hg.repository(ui, path.encode())
+
+
+def branches(repo: hg.localrepo) -> Dict[bytes, HgNodeId]:
+    """Equivalent of `hg branches`.
+
+    For some reason, there is no direct way to get the same result of `hg branches` in
+    the localrepository interface.
+    """
+    result = {}
+    for tag, heads, tip, isclosed in repo.branchmap().iterbranches():
+        if isclosed:
+            continue
+        result[tag] = tip
+    return result
+
+
+class CloneTimeout(Exception):
+    pass
+
+
+class CloneFailure(Exception):
+    pass
+
+
+def _clone_task(src: str, dest: str, errors: Queue) -> None:
+    try:
+        hg.clone(mercurial.ui.ui.load(), {}, src.encode(), dest.encode())
+    except Exception as e:
+        exc_buffer = io.StringIO()
+        traceback.print_exc(file=exc_buffer)
+        errors.put_nowait(exc_buffer.getvalue())
+        raise e
+
+
+def clone(src: str, dest: str, timeout: int) -> None:
+    errors: Queue = Queue()
+    process = Process(target=_clone_task, args=(src, dest, errors))
+    process.start()
+    process.join(timeout)
+
+    if process.is_alive():
+        process.terminate()
+        process.join(1)
+        if process.is_alive():
+            process.kill()
+        raise CloneTimeout(src, timeout)
+
+    if not errors.empty():
+        raise CloneFailure(src, dest, errors.get())
diff --git a/swh/loader/mercurial/loader.py b/swh/loader/mercurial/loader.py
--- a/swh/loader/mercurial/loader.py
+++ b/swh/loader/mercurial/loader.py
@@ -1,641 +1,6 @@
-# Copyright (C) 2017-2020  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
+# from .from_bundle import HgArchiveBundle20Loader as HgArchiveLoader
+# from .from_bundle import HgBundle20Loader as HgLoader
+from .from_disk import HgArchiveLoaderFromDisk as HgArchiveLoader
+from .from_disk import HgLoaderFromDisk as HgLoader
 
-"""This document contains a SWH loader for ingesting repository data
-from Mercurial version 2 bundle files.
-
-"""
-
-# NOTE: The code here does expensive work twice in places because of the
-# intermediate need to check for what is missing before sending to the database
-# and the desire to not juggle very large amounts of data.
-
-# TODO: Decide whether to also serialize to disk and read back more quickly
-#       from there. Maybe only for very large repos and fast drives.
-# - Avi
-
-
-import datetime
-import os
-from queue import Empty
-import random
-import re
-from shutil import rmtree
-from tempfile import mkdtemp
-import time
-from typing import Any, Dict, Iterable, List, Optional
-
-import billiard
-from dateutil import parser
-import hglib
-
-from swh.core.config import merge_configs
-from swh.loader.core.loader import DVCSLoader
-from swh.loader.core.utils import clean_dangling_folders
-from swh.model import identifiers
-from swh.model.hashutil import (
-    DEFAULT_ALGORITHMS,
-    MultiHash,
-    hash_to_bytehex,
-    hash_to_bytes,
-    hash_to_hex,
-)
-from swh.model.model import (
-    BaseContent,
-    Content,
-    Directory,
-    ObjectType,
-    Origin,
-    Person,
-    Release,
-    Revision,
-    RevisionType,
-    Sha1Git,
-    SkippedContent,
-    Snapshot,
-    SnapshotBranch,
-    TargetType,
-    TimestampWithTimezone,
-)
-from swh.storage.algos.origin import origin_get_latest_visit_status
-
-from . import converters
-from .archive_extract import tmp_extract
-from .bundle20_reader import Bundle20Reader
-from .converters import PRIMARY_ALGO as ALGO
-from .objects import SelectiveCache, SimpleTree
-
-TAG_PATTERN = re.compile("[0-9A-Fa-f]{40}")
-
-TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.mercurial."
-
-HEAD_POINTER_NAME = b"tip"
-
-
-class CloneTimeoutError(Exception):
-    pass
-
-
-DEFAULT_CONFIG: Dict[str, Any] = {
-    "bundle_filename": "HG20_none_bundle",
-    "reduce_effort": False,
-    "temp_directory": "/tmp",
-    "cache1_size": 800 * 1024 * 1024,
-    "cache2_size": 800 * 1024 * 1024,
-    "clone_timeout_seconds": 7200,
-}
-
-
-class HgBundle20Loader(DVCSLoader):
-    """Mercurial loader able to deal with remote or local repository.
-
-    """
-
-    visit_type = "hg"
-
-    def __init__(
-        self,
-        url,
-        visit_date=None,
-        directory=None,
-        logging_class="swh.loader.mercurial.Bundle20Loader",
-    ):
-        super().__init__(logging_class=logging_class)
-        self.config = merge_configs(DEFAULT_CONFIG, self.config)
-        self.origin_url = url
-        self.visit_date = visit_date
-        self.directory = directory
-        self.bundle_filename = self.config["bundle_filename"]
-        self.reduce_effort_flag = self.config["reduce_effort"]
-        self.empty_repository = None
-        self.temp_directory = self.config["temp_directory"]
-        self.cache1_size = self.config["cache1_size"]
-        self.cache2_size = self.config["cache2_size"]
-        self.clone_timeout = self.config["clone_timeout_seconds"]
-        self.working_directory = None
-        self.bundle_path = None
-        self.heads = {}
-        self.releases = {}
-        self.last_snapshot_id: Optional[bytes] = None
-
-    def pre_cleanup(self):
-        """Cleanup potential dangling files from prior runs (e.g. OOM killed
-           tasks)
-
-        """
-        clean_dangling_folders(
-            self.temp_directory,
-            pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
-            log=self.log,
-        )
-
-    def cleanup(self):
-        """Clean temporary working directory
-
-        """
-        if self.bundle_path and os.path.exists(self.bundle_path):
-            self.log.debug("Cleanup up working bundle %s" % self.bundle_path)
-            os.unlink(self.bundle_path)
-        if self.working_directory and os.path.exists(self.working_directory):
-            self.log.debug(
-                "Cleanup up working directory %s" % (self.working_directory,)
-            )
-            rmtree(self.working_directory)
-
-    def get_heads(self, repo):
-        """Read the closed branches heads (branch, bookmarks) and returns a
-           dict with key the branch_name (bytes) and values the tuple
-           (pointer nature (bytes), mercurial's node id
-           (bytes)). Those needs conversion to swh-ids. This is taken
-           care of in get_revisions.
-
-        """
-        b = {}
-        for _, node_hash_id, pointer_nature, branch_name, *_ in repo.heads():
-            b[branch_name] = (pointer_nature, hash_to_bytes(node_hash_id.decode()))
-
-        bookmarks = repo.bookmarks()
-        if bookmarks and bookmarks[0]:
-            for bookmark_name, _, target_short in bookmarks[0]:
-                target = repo[target_short].node()
-                b[bookmark_name] = (None, hash_to_bytes(target.decode()))
-
-        return b
-
-    def prepare_origin_visit(self, *args, **kwargs) -> None:
-        self.origin = Origin(url=self.origin_url)
-        visit_date = self.visit_date
-        if isinstance(visit_date, str):  # visit_date can be string or datetime
-            visit_date = parser.parse(visit_date)
-        self.visit_date = visit_date
-        visit_and_status = origin_get_latest_visit_status(
-            self.storage, self.origin_url, require_snapshot=True
-        )
-        if visit_and_status is None:
-            self.last_snapshot_id = None
-        else:
-            _, visit_status = visit_and_status
-            self.last_snapshot_id = visit_status.snapshot
-
-    @staticmethod
-    def clone_with_timeout(log, origin, destination, timeout):
-        queue = billiard.Queue()
-        start = time.monotonic()
-
-        def do_clone(queue, origin, destination):
-            try:
-                result = hglib.clone(source=origin, dest=destination)
-            except BaseException as e:
-                queue.put(e)
-            else:
-                queue.put(result)
-
-        process = billiard.Process(target=do_clone, args=(queue, origin, destination))
-        process.start()
-
-        while True:
-            try:
-                result = queue.get(timeout=0.1)
-                break
-            except Empty:
-                duration = time.monotonic() - start
-                if timeout and duration > timeout:
-                    log.warning(
-                        "Timeout cloning `%s` within %s seconds", origin, timeout
-                    )
-                    process.terminate()
-                    process.join()
-                    raise CloneTimeoutError(origin, timeout)
-                continue
-
-        process.join()
-        if isinstance(result, Exception):
-            raise result from None
-
-        return result
-
-    def prepare(self, *args, **kwargs):
-        """Prepare the necessary steps to load an actual remote or local
-           repository.
-
-           To load a local repository, pass the optional directory
-           parameter as filled with a path to a real local folder.
-
-           To load a remote repository, pass the optional directory
-           parameter as None.
-
-        Args:
-            origin_url (str): Origin url to load
-            visit_date (str/datetime): Date of the visit
-            directory (str/None): The local directory to load
-
-        """
-        self.branches = {}
-        self.tags = []
-        self.releases = {}
-        self.node_2_rev = {}
-        self.heads = {}
-
-        directory = self.directory
-
-        if not directory:  # remote repository
-            self.working_directory = mkdtemp(
-                prefix=TEMPORARY_DIR_PREFIX_PATTERN,
-                suffix="-%s" % os.getpid(),
-                dir=self.temp_directory,
-            )
-            os.makedirs(self.working_directory, exist_ok=True)
-            self.hgdir = self.working_directory
-
-            self.log.debug(
-                "Cloning %s to %s with timeout %s seconds",
-                self.origin_url,
-                self.hgdir,
-                self.clone_timeout,
-            )
-
-            self.clone_with_timeout(
-                self.log, self.origin_url, self.hgdir, self.clone_timeout
-            )
-
-        else:  # local repository
-            self.working_directory = None
-            self.hgdir = directory
-
-        self.bundle_path = os.path.join(self.hgdir, self.bundle_filename)
-        self.log.debug("Bundling at %s" % self.bundle_path)
-        with hglib.open(self.hgdir) as repo:
-            self.heads = self.get_heads(repo)
-            repo.bundle(bytes(self.bundle_path, "utf-8"), all=True, type=b"none-v2")
-
-        self.cache_filename1 = os.path.join(
-            self.hgdir, "swh-cache-1-%s" % (hex(random.randint(0, 0xFFFFFF))[2:],)
-        )
-        self.cache_filename2 = os.path.join(
-            self.hgdir, "swh-cache-2-%s" % (hex(random.randint(0, 0xFFFFFF))[2:],)
-        )
-
-        try:
-            self.br = Bundle20Reader(
-                bundlefile=self.bundle_path,
-                cache_filename=self.cache_filename1,
-                cache_size=self.cache1_size,
-            )
-        except FileNotFoundError:
-            # Empty repository! Still a successful visit targeting an
-            # empty snapshot
-            self.log.warn("%s is an empty repository!" % self.hgdir)
-            self.empty_repository = True
-        else:
-            self.reduce_effort = set()
-            if self.reduce_effort_flag:
-                now = datetime.datetime.now(tz=datetime.timezone.utc)
-                if (now - self.visit_date).days > 1:
-                    # Assuming that self.visit_date would be today for
-                    # a new visit, treat older visit dates as
-                    # indication of wanting to skip some processing
-                    # effort.
-                    for header, commit in self.br.yield_all_changesets():
-                        ts = commit["time"].timestamp()
-                        if ts < self.visit_date.timestamp():
-                            self.reduce_effort.add(header["node"])
-
-    def has_contents(self):
-        return not self.empty_repository
-
-    def has_directories(self):
-        return not self.empty_repository
-
-    def has_revisions(self):
-        return not self.empty_repository
-
-    def has_releases(self):
-        return not self.empty_repository
-
-    def fetch_data(self):
-        """Fetch the data from the data source."""
-        pass
-
-    def get_contents(self) -> Iterable[BaseContent]:
-        """Get the contents that need to be loaded."""
-
-        # NOTE: This method generates blobs twice to reduce memory usage
-        # without generating disk writes.
-        self.file_node_to_hash = {}
-        hash_to_info = {}
-        self.num_contents = 0
-        contents: Dict[bytes, BaseContent] = {}
-        missing_contents = set()
-
-        for blob, node_info in self.br.yield_all_blobs():
-            self.num_contents += 1
-            file_name = node_info[0]
-            header = node_info[2]
-
-            length = len(blob)
-            if header["linknode"] in self.reduce_effort:
-                algorithms = set([ALGO])
-            else:
-                algorithms = DEFAULT_ALGORITHMS
-            h = MultiHash.from_data(blob, hash_names=algorithms)
-            content = h.digest()
-            content["length"] = length
-            blob_hash = content[ALGO]
-            self.file_node_to_hash[header["node"]] = blob_hash
-
-            if header["linknode"] in self.reduce_effort:
-                continue
-
-            hash_to_info[blob_hash] = node_info
-            if self.max_content_size is not None and length >= self.max_content_size:
-                contents[blob_hash] = SkippedContent(
-                    status="absent", reason="Content too large", **content
-                )
-            else:
-                contents[blob_hash] = Content(data=blob, status="visible", **content)
-
-            if file_name == b".hgtags":
-                # https://www.mercurial-scm.org/wiki/GitConcepts#Tag_model
-                # overwrite until the last one
-                self.tags = (t for t in blob.split(b"\n") if t != b"")
-
-        if contents:
-            missing_contents = set(
-                self.storage.content_missing(
-                    [c.to_dict() for c in contents.values()], key_hash=ALGO
-                )
-            )
-
-        # Clusters needed blobs by file offset and then only fetches the
-        # groups at the needed offsets.
-        focs: Dict[int, Dict[bytes, bytes]] = {}  # "file/offset/contents"
-        for blob_hash in missing_contents:
-            _, file_offset, header = hash_to_info[blob_hash]
-            focs.setdefault(file_offset, {})
-            focs[file_offset][header["node"]] = blob_hash
-
-        for offset, node_hashes in sorted(focs.items()):
-            for header, data, *_ in self.br.yield_group_objects(group_offset=offset):
-                node = header["node"]
-                if node in node_hashes:
-                    blob, meta = self.br.extract_meta_from_blob(data)
-                    content = contents.pop(node_hashes[node], None)
-                    if content:
-                        if (
-                            self.max_content_size is not None
-                            and len(blob) >= self.max_content_size
-                        ):
-                            yield SkippedContent.from_data(
-                                blob, reason="Content too large"
-                            )
-                        else:
-                            yield Content.from_data(blob)
-
-    def load_directories(self):
-        """This is where the work is done to convert manifest deltas from the
-        repository bundle into SWH directories.
-
-        """
-        self.mnode_to_tree_id = {}
-        cache_hints = self.br.build_manifest_hints()
-
-        def tree_size(t):
-            return t.size()
-
-        self.trees = SelectiveCache(
-            cache_hints=cache_hints,
-            size_function=tree_size,
-            filename=self.cache_filename2,
-            max_size=self.cache2_size,
-        )
-
-        tree = SimpleTree()
-        for header, added, removed in self.br.yield_all_manifest_deltas(cache_hints):
-            node = header["node"]
-            basenode = header["basenode"]
-            tree = self.trees.fetch(basenode) or tree  # working tree
-
-            for path in removed.keys():
-                tree = tree.remove_tree_node_for_path(path)
-            for path, info in added.items():
-                file_node, is_symlink, perms_code = info
-                tree = tree.add_blob(
-                    path, self.file_node_to_hash[file_node], is_symlink, perms_code
-                )
-
-            if header["linknode"] in self.reduce_effort:
-                self.trees.store(node, tree)
-            else:
-                new_dirs = []
-                self.mnode_to_tree_id[node] = tree.hash_changed(new_dirs)
-                self.trees.store(node, tree)
-                yield header, tree, new_dirs
-
-    def get_directories(self) -> Iterable[Directory]:
-        """Compute directories to load
-
-        """
-        dirs: Dict[Sha1Git, Directory] = {}
-        self.num_directories = 0
-        for _, _, new_dirs in self.load_directories():
-            for d in new_dirs:
-                self.num_directories += 1
-                dirs[d["id"]] = Directory.from_dict(d)
-
-        missing_dirs: List[Sha1Git] = list(dirs.keys())
-        if missing_dirs:
-            missing_dirs = list(self.storage.directory_missing(missing_dirs))
-
-        for _id in missing_dirs:
-            yield dirs[_id]
-
-    def get_revisions(self) -> Iterable[Revision]:
-        """Compute revisions to load
-
-        """
-        revisions = {}
-        self.num_revisions = 0
-        for header, commit in self.br.yield_all_changesets():
-            if header["node"] in self.reduce_effort:
-                continue
-
-            self.num_revisions += 1
-            date_dict = identifiers.normalize_timestamp(int(commit["time"].timestamp()))
-            author_dict = converters.parse_author(commit["user"])
-            if commit["manifest"] == Bundle20Reader.NAUGHT_NODE:
-                directory_id = SimpleTree().hash_changed()
-            else:
-                directory_id = self.mnode_to_tree_id[commit["manifest"]]
-
-            extra_headers = [
-                (
-                    b"time_offset_seconds",
-                    str(commit["time_offset_seconds"]).encode("utf-8"),
-                )
-            ]
-            extra = commit.get("extra")
-            if extra:
-                for e in extra.split(b"\x00"):
-                    k, v = e.split(b":", 1)
-                    # transplant_source stores binary reference to a changeset
-                    # prefer to dump hexadecimal one in the revision metadata
-                    if k == b"transplant_source":
-                        v = hash_to_bytehex(v)
-                    extra_headers.append((k, v))
-
-            parents = []
-            p1 = self.node_2_rev.get(header["p1"])
-            p2 = self.node_2_rev.get(header["p2"])
-            if p1:
-                parents.append(p1)
-            if p2:
-                parents.append(p2)
-
-            revision = Revision(
-                author=Person.from_dict(author_dict),
-                date=TimestampWithTimezone.from_dict(date_dict),
-                committer=Person.from_dict(author_dict),
-                committer_date=TimestampWithTimezone.from_dict(date_dict),
-                type=RevisionType.MERCURIAL,
-                directory=directory_id,
-                message=commit["message"],
-                metadata={"node": hash_to_hex(header["node"]),},
-                extra_headers=tuple(extra_headers),
-                synthetic=False,
-                parents=tuple(parents),
-            )
-
-            self.node_2_rev[header["node"]] = revision.id
-            revisions[revision.id] = revision
-
-        # Converts heads to use swh ids
-        self.heads = {
-            branch_name: (pointer_nature, self.node_2_rev[node_id])
-            for branch_name, (pointer_nature, node_id) in self.heads.items()
-        }
-
-        missing_revs = set(revisions.keys())
-        if missing_revs:
-            missing_revs = set(self.storage.revision_missing(list(missing_revs)))
-
-        for rev in missing_revs:
-            yield revisions[rev]
-        self.mnode_to_tree_id = None
-
-    def _read_tag(self, tag, split_byte=b" "):
-        node, *name = tag.split(split_byte)
-        name = split_byte.join(name)
-        return node, name
-
-    def get_releases(self) -> Iterable[Release]:
-        """Get the releases that need to be loaded."""
-        self.num_releases = 0
-        releases = {}
-        missing_releases = set()
-        for t in self.tags:
-            self.num_releases += 1
-            node, name = self._read_tag(t)
-            node = node.decode()
-            node_bytes = hash_to_bytes(node)
-            if not TAG_PATTERN.match(node):
-                self.log.warn("Wrong pattern (%s) found in tags. Skipping" % (node,))
-                continue
-            if node_bytes not in self.node_2_rev:
-                self.log.warn(
-                    "No matching revision for tag %s "
-                    "(hg changeset: %s). Skipping" % (name.decode(), node)
-                )
-                continue
-            tgt_rev = self.node_2_rev[node_bytes]
-            release = Release(
-                name=name,
-                target=tgt_rev,
-                target_type=ObjectType.REVISION,
-                message=None,
-                metadata=None,
-                synthetic=False,
-                author=Person(name=None, email=None, fullname=b""),
-                date=None,
-            )
-            missing_releases.add(release.id)
-            releases[release.id] = release
-            self.releases[name] = release.id
-
-        if missing_releases:
-            missing_releases = set(self.storage.release_missing(list(missing_releases)))
-
-        for _id in missing_releases:
-            yield releases[_id]
-
-    def get_snapshot(self) -> Snapshot:
-        """Get the snapshot that need to be loaded."""
-        branches: Dict[bytes, Optional[SnapshotBranch]] = {}
-        for name, (pointer_nature, target) in self.heads.items():
-            branches[name] = SnapshotBranch(
-                target=target, target_type=TargetType.REVISION
-            )
-            if pointer_nature == HEAD_POINTER_NAME:
-                branches[b"HEAD"] = SnapshotBranch(
-                    target=name, target_type=TargetType.ALIAS
-                )
-        for name, target in self.releases.items():
-            branches[name] = SnapshotBranch(
-                target=target, target_type=TargetType.RELEASE
-            )
-
-        self.snapshot = Snapshot(branches=branches)
-        return self.snapshot
-
-    def get_fetch_history_result(self):
-        """Return the data to store in fetch_history."""
-        return {
-            "contents": self.num_contents,
-            "directories": self.num_directories,
-            "revisions": self.num_revisions,
-            "releases": self.num_releases,
-        }
-
-    def load_status(self):
-        snapshot = self.get_snapshot()
-        load_status = "eventful"
-        if self.last_snapshot_id is not None and self.last_snapshot_id == snapshot.id:
-            load_status = "uneventful"
-        return {
-            "status": load_status,
-        }
-
-
-class HgArchiveBundle20Loader(HgBundle20Loader):
-    """Mercurial loader for repository wrapped within archives.
-
-    """
-
-    def __init__(self, url, visit_date=None, archive_path=None):
-        super().__init__(
-            url,
-            visit_date=visit_date,
-            logging_class="swh.loader.mercurial.HgArchiveBundle20Loader",
-        )
-        self.temp_dir = None
-        self.archive_path = archive_path
-
-    def prepare(self, *args, **kwargs):
-        self.temp_dir = tmp_extract(
-            archive=self.archive_path,
-            dir=self.temp_directory,
-            prefix=TEMPORARY_DIR_PREFIX_PATTERN,
-            suffix=".dump-%s" % os.getpid(),
-            log=self.log,
-            source=self.origin_url,
-        )
-
-        repo_name = os.listdir(self.temp_dir)[0]
-        self.directory = os.path.join(self.temp_dir, repo_name)
-        super().prepare(*args, **kwargs)
-
-    def cleanup(self):
-        if self.temp_dir and os.path.exists(self.temp_dir):
-            rmtree(self.temp_dir)
-        super().cleanup()
+__all__ = ["HgArchiveLoader", "HgLoader"]
diff --git a/swh/loader/mercurial/tasks.py b/swh/loader/mercurial/tasks.py
--- a/swh/loader/mercurial/tasks.py
+++ b/swh/loader/mercurial/tasks.py
@@ -5,7 +5,7 @@
 
 from celery import shared_task
 
-from .loader import HgArchiveBundle20Loader, HgBundle20Loader
+from .loader import HgArchiveLoader, HgLoader
 
 
 @shared_task(name=__name__ + ".LoadMercurial")
@@ -17,7 +17,7 @@
     Args: see :func:`DepositLoader.load`.
 
     """
-    loader = HgBundle20Loader(url, directory=directory, visit_date=visit_date)
+    loader = HgLoader(url, directory=directory, visit_date=visit_date)
     return loader.load()
 
 
@@ -27,7 +27,5 @@
 
     Args: see :func:`DepositLoader.load`.
     """
-    loader = HgArchiveBundle20Loader(
-        url, archive_path=archive_path, visit_date=visit_date
-    )
+    loader = HgArchiveLoader(url, archive_path=archive_path, visit_date=visit_date)
     return loader.load()
diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_from_bundle.py
rename from swh/loader/mercurial/tests/test_loader.py
rename to swh/loader/mercurial/tests/test_from_bundle.py
--- a/swh/loader/mercurial/tests/test_loader.py
+++ b/swh/loader/mercurial/tests/test_from_bundle.py
@@ -21,7 +21,7 @@
 from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType
 from swh.storage.algos.snapshot import snapshot_get_latest
 
-from ..loader import CloneTimeoutError, HgArchiveBundle20Loader, HgBundle20Loader
+from ..from_bundle import CloneTimeoutError, HgArchiveBundle20Loader, HgBundle20Loader
 from .loader_checker import ExpectedSwhids, LoaderChecker
 
 
diff --git a/swh/loader/mercurial/tests/test_loader.py b/swh/loader/mercurial/tests/test_from_disk.py
rename from swh/loader/mercurial/tests/test_loader.py
rename to swh/loader/mercurial/tests/test_from_disk.py
--- a/swh/loader/mercurial/tests/test_loader.py
+++ b/swh/loader/mercurial/tests/test_from_disk.py
@@ -1,15 +1,12 @@
-# Copyright (C) 2018-2020  The Software Heritage developers
+# Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import copy
-import logging
 import os
-import time
+from urllib.parse import urlsplit
 
-import hglib
-import pytest
+import dateutil
 
 from swh.loader.tests import (
     assert_last_visit_matches,
@@ -21,28 +18,45 @@
 from swh.model.model import RevisionType, Snapshot, SnapshotBranch, TargetType
 from swh.storage.algos.snapshot import snapshot_get_latest
 
-from ..loader import CloneTimeoutError, HgArchiveBundle20Loader, HgBundle20Loader
+from ..from_disk import HgLoaderFromDisk
 from .loader_checker import ExpectedSwhids, LoaderChecker
 
 
+# Those tests assert expectations on repository loading
+# by reading expected values from associated json files
+# produced by the `swh-hg-identify` command line utility.
+#
+# It has more granularity than historical tests.
+# Assertions will tell if the error comes from the directories
+# revisions or release rather than only checking the snapshot.
+#
+# With more work it should event be possible to know which part
+# of an object is faulty.
 def test_examples(swh_config, datadir, tmp_path):
     for archive_name in ("hello", "transplant", "the-sandbox", "example"):
         archive_path = os.path.join(datadir, f"{archive_name}.tgz")
         json_path = os.path.join(datadir, f"{archive_name}.json")
         repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+        directory = urlsplit(repo_url).path
 
         LoaderChecker(
-            loader=HgBundle20Loader(repo_url), expected=ExpectedSwhids.load(json_path),
+            loader=HgLoaderFromDisk(repo_url, directory=directory),
+            expected=ExpectedSwhids.load(json_path),
         ).check()
 
 
+# This test has as been adapted from the historical `HgBundle20Loader` tests
+# to ensure compatibility of `HgLoaderFromDisk`.
+# Hashes as been produced by copy pasting the result of the implementation
+# to prevent regressions.
 def test_loader_hg_new_visit_no_release(swh_config, datadir, tmp_path):
     """Eventful visit should yield 1 snapshot"""
     archive_name = "the-sandbox"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+    directory = urlsplit(repo_url).path
 
-    loader = HgBundle20Loader(repo_url)
+    loader = HgLoaderFromDisk(url=repo_url, directory=directory)
 
     assert loader.load() == {"status": "eventful"}
 
@@ -84,39 +98,21 @@
         "snapshot": 1,
     }
 
-    # Ensure archive loader yields the same snapshot
-    loader2 = HgArchiveBundle20Loader(
-        url=archive_path,
-        archive_path=archive_path,
-        visit_date="2016-05-03 15:16:32+00",
-    )
-
-    actual_load_status = loader2.load()
-    assert actual_load_status == {"status": "eventful"}
-
-    stats2 = get_stats(loader2.storage)
-    expected_stats = copy.deepcopy(stats)
-    expected_stats["origin"] += 1
-    expected_stats["origin_visit"] += 1
-    assert stats2 == expected_stats
-
-    # That visit yields the same snapshot
-    assert_last_visit_matches(
-        loader2.storage,
-        archive_path,
-        status="full",
-        type="hg",
-        snapshot=expected_snapshot.id,
-    )
-
 
+# This test has as been adapted from the historical `HgBundle20Loader` tests
+# to ensure compatibility of `HgLoaderFromDisk`.
+# Hashes as been produced by copy pasting the result of the implementation
+# to prevent regressions.
 def test_loader_hg_new_visit_with_release(swh_config, datadir, tmp_path):
     """Eventful visit with release should yield 1 snapshot"""
+
     archive_name = "hello"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
+    directory = urlsplit(repo_url).path
+    visit_date = dateutil.parser.parse("2016-05-03 15:16:32+00")
 
-    loader = HgBundle20Loader(url=repo_url, visit_date="2016-05-03 15:16:32+00",)
+    loader = HgLoaderFromDisk(url=repo_url, directory=directory, visit_date=visit_date,)
 
     actual_load_status = loader.load()
     assert actual_load_status == {"status": "eventful"}
@@ -163,64 +159,11 @@
         snapshot=expected_snapshot.id,
     )
 
-    # Ensure archive loader yields the same snapshot
-    loader2 = HgArchiveBundle20Loader(
-        url=archive_path,
-        archive_path=archive_path,
-        visit_date="2016-05-03 15:16:32+00",
-    )
-
-    actual_load_status = loader2.load()
-    assert actual_load_status == {"status": "eventful"}
-
-    stats2 = get_stats(loader2.storage)
-    expected_stats = copy.deepcopy(stats)
-    expected_stats["origin"] += 1
-    expected_stats["origin_visit"] += 1
-    assert stats2 == expected_stats
-
-    # That visit yields the same snapshot
-    assert_last_visit_matches(
-        loader2.storage,
-        archive_path,
-        status="full",
-        type="hg",
-        snapshot=expected_snapshot.id,
-    )
-
-
-def test_visit_with_archive_decompression_failure(swh_config, mocker, datadir):
-    """Failure to decompress should fail early, no data is ingested"""
-    mock_patoo = mocker.patch("swh.loader.mercurial.archive_extract.patoolib")
-    mock_patoo.side_effect = ValueError
-
-    archive_name = "hello"
-    archive_path = os.path.join(datadir, f"{archive_name}.tgz")
-    # Ensure archive loader yields the same snapshot
-    loader = HgArchiveBundle20Loader(
-        url=archive_path, visit_date="2016-05-03 15:16:32+00",
-    )
-
-    actual_load_status = loader.load()
-    assert actual_load_status == {"status": "failed"}
-
-    stats = get_stats(loader.storage)
-    assert stats == {
-        "content": 0,
-        "directory": 0,
-        "origin": 1,
-        "origin_visit": 1,
-        "release": 0,
-        "revision": 0,
-        "skipped_content": 0,
-        "snapshot": 0,
-    }
-    # That visit yields the same snapshot
-    assert_last_visit_matches(
-        loader.storage, archive_path, status="partial", type="hg", snapshot=None
-    )
-
 
+# This test has as been adapted from the historical `HgBundle20Loader` tests
+# to ensure compatibility of `HgLoaderFromDisk`.
+# Hashes as been produced by copy pasting the result of the implementation
+# to prevent regressions.
 def test_visit_repository_with_transplant_operations(swh_config, datadir, tmp_path):
     """Visit a mercurial repository visit transplant operations within should yield a
     snapshot as well.
@@ -230,7 +173,10 @@
     archive_name = "transplant"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
-    loader = HgBundle20Loader(url=repo_url, visit_date="2019-05-23 12:06:00+00",)
+    directory = urlsplit(repo_url).path
+    visit_date = dateutil.parser.parse("2016-05-03 15:16:32+00")
+
+    loader = HgLoaderFromDisk(url=repo_url, directory=directory, visit_date=visit_date)
 
     # load hg repository
     actual_load_status = loader.load()
@@ -261,50 +207,3 @@
     assert len(hg_changesets) > 0
     assert len(transplant_sources) > 0
     assert transplant_sources.issubset(hg_changesets)
-
-
-def test_clone_with_timeout_timeout(caplog, tmp_path, monkeypatch):
-    log = logging.getLogger("test_clone_with_timeout")
-
-    def clone_timeout(source, dest):
-        time.sleep(60)
-
-    monkeypatch.setattr(hglib, "clone", clone_timeout)
-
-    with pytest.raises(CloneTimeoutError):
-        HgBundle20Loader.clone_with_timeout(
-            log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1
-        )
-
-    for record in caplog.records:
-        assert record.levelname == "WARNING"
-        assert "https://www.mercurial-scm.org/repo/hello" in record.getMessage()
-        assert record.args == ("https://www.mercurial-scm.org/repo/hello", 1)
-
-
-def test_clone_with_timeout_returns(caplog, tmp_path, monkeypatch):
-    log = logging.getLogger("test_clone_with_timeout")
-
-    def clone_return(source, dest):
-        return (source, dest)
-
-    monkeypatch.setattr(hglib, "clone", clone_return)
-
-    assert HgBundle20Loader.clone_with_timeout(
-        log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1
-    ) == ("https://www.mercurial-scm.org/repo/hello", tmp_path)
-
-
-def test_clone_with_timeout_exception(caplog, tmp_path, monkeypatch):
-    log = logging.getLogger("test_clone_with_timeout")
-
-    def clone_return(source, dest):
-        raise ValueError("Test exception")
-
-    monkeypatch.setattr(hglib, "clone", clone_return)
-
-    with pytest.raises(ValueError) as excinfo:
-        HgBundle20Loader.clone_with_timeout(
-            log, "https://www.mercurial-scm.org/repo/hello", tmp_path, 1
-        )
-    assert "Test exception" in excinfo.value.args[0]
diff --git a/swh/loader/mercurial/tests/test_hgutil.py b/swh/loader/mercurial/tests/test_hgutil.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/mercurial/tests/test_hgutil.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2020  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import time
+import traceback
+
+import pytest
+from mercurial import hg  # type: ignore
+
+from .. import hgutil
+
+
+def test_clone_timeout(monkeypatch):
+    src = "https://www.mercurial-scm.org/repo/hello"
+    dest = "/dev/null"
+    timeout = 1
+
+    def clone(*args):
+        time.sleep(5)
+
+    monkeypatch.setattr(hg, "clone", clone)
+
+    with pytest.raises(hgutil.CloneTimeout) as e:
+        hgutil.clone(src, dest, timeout)
+    assert e.value.args == (src, timeout)
+
+
+def test_clone_error(caplog, tmp_path, monkeypatch):
+    src = "https://www.mercurial-scm.org/repo/hello"
+    dest = "/dev/null"
+    expected_traceback = "Some traceback"
+
+    def clone(*args):
+        raise ValueError()
+
+    def print_exc(file):
+        file.write(expected_traceback)
+
+    monkeypatch.setattr(hg, "clone", clone)
+    monkeypatch.setattr(traceback, "print_exc", print_exc)
+
+    with pytest.raises(hgutil.CloneFailure) as e:
+        hgutil.clone(src, dest, 1)
+    assert e.value.args == (src, dest, expected_traceback)
diff --git a/swh/loader/mercurial/tests/test_loader.org b/swh/loader/mercurial/tests/test_loader.org
deleted file mode 100644
--- a/swh/loader/mercurial/tests/test_loader.org
+++ /dev/null
@@ -1,121 +0,0 @@
-#+title: Where the loader test data comes from
-
-Mercurial repositories are archived within the folder
-swh/loader/mercurial/tests/resources.  They contain mercurial
-repository.
-
-The following demonstrates the commands executed from within the
-repository to retrieve information.
-
-* the-sandbox
-
-Archive: the-sandbox.tgz
-
-** branches
-
-Listing of branches and their tip:
-#+BEGIN_SRC sh
-$ hg branches
-develop                       57:76cc0882284d
-default                        2:2f13849f14f5 (inactive)
-#+END_SRC
-
-** Changesets
-
-#+BEGIN_SRC sh
-$ for i in {0..57}; do hg checkout $i > /dev/null; echo $i $(swh-hashtree --ignore '.hg' --path .); done
-0  e2e117569b086ceabeeedee4acd95f35298d4553
-1  9cd8160c67ac4b0bc97e2e2cd918a580425167d3
-2  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-3  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-4  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-5  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-6  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-7  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-8  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-9  180bd57623a7c2c47a8c43514a5f4d903503d0aa
-10 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-11 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-12 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-13 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-14 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-15 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-16 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-17 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-18 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-19 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-20 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-21 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-22 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-23 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-24 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-25 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-26 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-27 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-28 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-29 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-30 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-31 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-32 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-33 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-34 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-35 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-36 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-37 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-38 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-39 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-40 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-41 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-42 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-43 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-44 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-45 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-46 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-47 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-48 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-49 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-50 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-51 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-52 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-53 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-54 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-55 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-56 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-57 180bd57623a7c2c47a8c43514a5f4d903503d0aa
-#+END_SRC
-
-Note: swh-hashtree is a cli tool defined in swh-model/bin/swh-hashtree
-
-* hello
-
-Archive: hello.tgz
-
-** branches
-
-#+BEGIN_SRC sh
-$ hg branches
-default                        1:82e55d328c8c
-#+END_SRC
-
-** tags
-
-I added a tag to have some more data to load (1st repository has no tags):
-#+BEGIN_SRC sh
-$ hg tags
-tip                                2:b985ae4a07e1
-0.1                                1:82e55d328c8c
-#+END_SRC
-
-#+BEGIN_SRC sh
-$ cat .hgtags
-82e55d328c8ca4ee16520036c0aaace03a5beb65 0.1
-#+END_SRC
-
-** Changesets
-
-#+BEGIN_SRC sh
-$ for i in {0..1}; do hg checkout $i > /dev/null; echo $i $(swh-hashtree --ignore '.hg' --path .); done
-0 43d727f2f3f2f7cb3b098ddad1d7038464a4cee2
-1 b3f85f210ff86d334575f64cb01c5bf49895b63e
-2 8f2be433c945384c85920a8e60f2a68d2c0f20fb
-#+END_SRC
diff --git a/swh/loader/mercurial/tests/test_tasks.py b/swh/loader/mercurial/tests/test_tasks.py
--- a/swh/loader/mercurial/tests/test_tasks.py
+++ b/swh/loader/mercurial/tests/test_tasks.py
@@ -7,7 +7,7 @@
 def test_loader(
     mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker
 ):
-    mock_loader = mocker.patch("swh.loader.mercurial.loader.HgBundle20Loader.load")
+    mock_loader = mocker.patch("swh.loader.mercurial.loader.HgLoader.load")
     mock_loader.return_value = {"status": "eventful"}
 
     res = swh_scheduler_celery_app.send_task(
@@ -26,9 +26,7 @@
 def test_archive_loader(
     mocker, swh_config, swh_scheduler_celery_app, swh_scheduler_celery_worker
 ):
-    mock_loader = mocker.patch(
-        "swh.loader.mercurial.loader.HgArchiveBundle20Loader.load"
-    )
+    mock_loader = mocker.patch("swh.loader.mercurial.loader.HgArchiveLoader.load")
     mock_loader.return_value = {"status": "uneventful"}
 
     res = swh_scheduler_celery_app.send_task(