diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py
index 2025488..67305e6 100644
--- a/swh/loader/cvs/loader.py
+++ b/swh/loader/cvs/loader.py
@@ -1,104 +1,188 @@
 # Copyright (C) 2015-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 """Loader in charge of injecting either new or existing cvs repositories to
 swh-storage.
 
 """
 from datetime import datetime
 from mmap import ACCESS_WRITE, mmap
 import os
 import pty
 import re
 import shutil
-from subprocess import Popen
+import subprocess
 import tempfile
 from typing import Dict, Iterator, List, Optional, Tuple
+from urllib3.util import parse_url
 
 from swh.loader.core.loader import BaseLoader
 from swh.loader.core.utils import clean_dangling_folders
 from swh.loader.exception import NotFound
+import swh.loader.cvs.rcsparse as rcsparse
 from swh.model import from_disk, hashutil
 from swh.model.model import (
     Content,
     Directory,
     Origin,
     Revision,
     SkippedContent,
     Snapshot,
     SnapshotBranch,
     TargetType,
 )
 from swh.storage.algos.snapshot import snapshot_get_latest
 from swh.storage.interface import StorageInterface
 
 DEFAULT_BRANCH = b"HEAD"
 
 TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs."
 
 
 class CvsLoader(BaseLoader):
     """Swh cvs loader.
 
     The repository is local.  The loader deals with
     update on an already previously loaded repository.
 
     """
 
     visit_type = "cvs"
 
     def __init__(
         self,
         storage: StorageInterface,
         url: str,
         origin_url: Optional[str] = None,
         visit_date: Optional[datetime] = None,
-        destination_path: Optional[str] = None,
+        cvsroot_path: Optional[str] = None,
         swh_revision: Optional[str] = None,
         start_from_scratch: bool = False,
         temp_directory: str = "/tmp",
         debug: bool = False,
         check_revision: int = 0,
         max_content_size: Optional[int] = None,
     ):
         super().__init__(
             storage=storage,
             logging_class="swh.loader.cvs.CvsLoader",
             max_content_size=max_content_size,
         )
         self.cvsroot_url = url
         # origin url as unique identifier for origin in swh archive
         self.origin_url = origin_url if origin_url else self.cvsroot_url
         self.debug = debug
         self.temp_directory = temp_directory
         self.done = False
         self.cvsrepo = None
         # Revision check is configurable
         self.check_revision = check_revision
         # internal state used to store swh objects
         self._contents: List[Content] = []
         self._skipped_contents: List[SkippedContent] = []
         self._directories: List[Directory] = []
         self._revisions: List[Revision] = []
         self._snapshot: Optional[Snapshot] = None
         # internal state, current visit
         self._last_revision = None
         self._visit_status = "full"
+        self._load_status = "uneventful"
         self.visit_date = visit_date
-        self.destination_path = destination_path
+        self.cvsroot_path = cvsroot_path
         self.start_from_scratch = start_from_scratch
         self.snapshot = None
         # state from previous visit
         self.latest_snapshot = None
         self.latest_revision = None
 
+    def prepare_origin_visit(self):
+        self.origin = Origin(url=self.origin_url if self.origin_url else self.cvsroot_url)
+
+    def cleanup(self):
+        self.log.info("cleanup")
+
+    def fetch_cvs_repo_with_rsync(self, host, path_on_server):
+        module_name = os.path.basename(path_on_server)
+        # URL *must* end with a trailing slash in order to get CVSROOT listed
+        url = 'rsync://%s%s/' % (host, path_on_server)
+        rsync = subprocess.run(['rsync', url], capture_output=True, encoding='ascii')
+        rsync.check_returncode()
+        have_cvsroot = False
+        have_module = False
+        for line in rsync.stdout.split('\n'):
+            self.log.debug("rsync server: %s" % line)
+            if line.endswith(' CVSROOT'):
+                have_cvsroot = True
+            elif line.endswith(' %s' % module_name):
+                have_module = True
+            if have_module and have_cvsroot:
+                break
+        if not have_module:
+            raise NotFound("CVS module %s not found at %s" \
+                % (module_name, host, url))
+        if not have_cvsroot:
+            raise NotFound("No CVSROOT directory found at %s" % url)
+
+        rsync = subprocess.run(['rsync', '-a', url, self.cvsroot_path])
+        rsync.check_returncode()
+
+    def prepare(self):
+        if not self.cvsroot_path:
+            self.cvsroot_path = tempfile.mkdtemp(
+                suffix="-%s" % os.getpid(),
+                prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+                dir=self.temp_directory,
+            )
+        url = parse_url(self.origin_url)
+        self.log.debug("prepare; origin_url=%s scheme=%s path=%s" % (self.origin_url, url.scheme, url.path))
+        if url.scheme == 'file':
+            if not os.path.exists(url.path):
+                raise NotFound
+        elif url.scheme == 'rsync':
+            self.fetch_cvs_repo_with_rsync(url.host, url.path)
+        else:
+            raise NotFound("Invalid CVS origin URL '%s'" % self.origin_url)
+        have_rcsfile = False
+        have_cvsroot = False
+        for root, dirs, files in os.walk(self.cvsroot_path):
+            if 'CVSROOT' in dirs:
+                have_cvsroot = True
+                dirs.remove('CVSROOT')
+                continue;
+            for f in files:
+                filepath = os.path.join(root, f)
+                if f[-2:] == ',v':
+                    try:
+                      rcsfile = rcsparse.rcsfile(filepath)
+                    except(Exception):
+                        raise
+                    else:
+                        self.log.debug("Looks like we have data to convert; "
+                            "found a valid RCS file at %s" % filepath)
+                        have_rcsfile = True
+                        break
+            if have_rcsfile:
+                break;
+
+        if not have_rcsfile:
+            raise NotFound("Directory %s does not contain any valid RCS files %s" % self.cvsroot_path)
+        if not have_cvsroot:
+            self.log.warn("The CVS repository at '%s' lacks a CVSROOT directory; "
+                "we might be ingesting an incomplete copy of the repository" % self.cvsroot_path)
+
+    def fetch_data(self):
+        self.log.info("fetch_data")
+
+    def store_data(self):
+        self.log.info("store data")
+
     def load_status(self):
         return {
             "status": self._load_status,
         }
 
     def visit_status(self):
         return self._visit_status
 
diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py
index 080fb41..7564c7d 100644
--- a/swh/loader/cvs/tests/test_loader.py
+++ b/swh/loader/cvs/tests/test_loader.py
@@ -1,61 +1,61 @@
 # Copyright (C) 2016-2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import os
 
 import pytest
 from swh.loader.cvs.loader import CvsLoader
 from swh.loader.tests import (
     assert_last_visit_matches,
     check_snapshot,
     get_stats,
     prepare_repository_from_archive,
 )
 from swh.model.hashutil import hash_to_bytes
 from swh.model.model import Snapshot, SnapshotBranch, TargetType
 
 def test_loader_cvs_not_found_no_mock(swh_storage, tmp_path):
     """Given an unknown repository, the loader visit ends up in status not_found"""
     unknown_repo_url = "unknown-repository"
-    loader = CvsLoader(swh_storage, unknown_repo_url, destination_path=tmp_path)
+    loader = CvsLoader(swh_storage, unknown_repo_url, cvsroot_path=tmp_path)
 
     assert loader.load() == {"status": "uneventful"}
 
     assert_last_visit_matches(
         swh_storage, unknown_repo_url, status="not_found", type="cvs",
     )
 
 
-def test_loader_svn_cvs_visit(swh_storage, datadir, tmp_path):
+def test_loader_cvs_visit(swh_storage, datadir, tmp_path):
     """Eventful visit should yield 1 snapshot"""
     archive_name = "runbaby"
     archive_path = os.path.join(datadir, f"{archive_name}.tgz")
     repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path)
 
-    loader = CvsLoader(swh_storage, repo_url, destination_path=tmp_path)
+    loader = CvsLoader(swh_storage, repo_url, cvsroot_path=tmp_path)
 
     assert loader.load() == {"status": "eventful"}
 
     assert_last_visit_matches(
         loader.storage,
         repo_url,
         status="full",
         type="svn",
         snapshot=GOURMET_SNAPSHOT.id,
     )
 
     stats = get_stats(loader.storage)
     assert stats == {
         "content": 19,
         "directory": 17,
         "origin": 1,
         "origin_visit": 1,
         "release": 0,
         "revision": 6,
         "skipped_content": 0,
         "snapshot": 1,
     }
 
     #check_snapshot(GOURMET_SNAPSHOT, loader.storage)