diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -54,6 +54,10 @@ TEMPORARY_DIR_PREFIX_PATTERN = "swh.loader.cvs." +class BadPathException(Exception): + pass + + class CvsLoader(BaseLoader): """Swh cvs loader. @@ -135,6 +139,20 @@ self._last_revision = revision return (revision, swh_dir) + def file_path_is_safe(self, wtpath): + if "%s..%s" % (os.path.sep, os.path.sep) in wtpath: + # Paths with back-references should not appear + # in CVS protocol messages or CVS rlog output + return False + elif ( + os.path.commonpath([self.tempdir_path, os.path.normpath(wtpath)]) + != self.tempdir_path + ): + # The path must be a child of our temporary directory. + return False + else: + return True + def checkout_file_with_rcsparse( self, k: ChangeSetKey, f: FileRevision, rcsfile: rcsparse.rcsfile ) -> None: @@ -142,6 +160,8 @@ assert self.server_style_cvsroot path = file_path(self.cvsroot_path, f.path) wtpath = os.path.join(self.tempdir_path, path) + if not self.file_path_is_safe(wtpath): + raise BadPathException("unsafe path found in RCS file: %s" % f.path) self.log.info("rev %s state %s file %s" % (f.rev, f.state, f.path)) if f.state == "dead": # remove this file from work tree @@ -189,6 +209,8 @@ assert self.cvsroot_path path = file_path(self.cvsroot_path, f.path) wtpath = os.path.join(self.tempdir_path, path) + if not self.file_path_is_safe(wtpath): + raise BadPathException("unsafe path found in cvs rlog output: %s" % f.path) self.log.info("rev %s state %s file %s" % (f.rev, f.state, f.path)) if f.state == "dead": # remove this file from work tree @@ -528,6 +550,7 @@ return False except Exception: self.log.exception("Exception in fetch_data:") + self._visit_status = "failed" return False # Stopping iteration self._contents, self._skipped_contents, self._directories, rev = data self._revisions = [rev] @@ -602,8 +625,9 @@ self._revisions = [] def load_status(self) -> Dict[str, Any]: - assert self.snapshot is not None - if self.last_snapshot == self.snapshot: + if self.snapshot is None: + load_status = "failed" + elif self.last_snapshot == self.snapshot: load_status = "uneventful" else: load_status = "eventful" diff --git a/swh/loader/cvs/tests/data/unsafe_rlog_with_unsafe_relative_path.rlog b/swh/loader/cvs/tests/data/unsafe_rlog_with_unsafe_relative_path.rlog new file mode 100644 --- /dev/null +++ b/swh/loader/cvs/tests/data/unsafe_rlog_with_unsafe_relative_path.rlog @@ -0,0 +1,103 @@ +RCS file: {cvsroot_path}/../greek-tree/alpha,v +head: 1.2 +branch: +locks: strict +access list: +symbolic names: + start: 1.1.1.1 + yoyo: 1.1.1 +keyword substitution: kv +total revisions: 3; selected revisions: 3 +description: +---------------------------- +revision 1.2 +date: 2021-04-20 15:30:37 +0200; author: stsp; state: Exp; lines: +1 -0; commitid: 100607ED77A971503F5; +edit alpha +---------------------------- +revision 1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; commitid: 100607ED74996F4C8AF; +branches: 1.1.1; +Initial revision +---------------------------- +revision 1.1.1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; lines: +0 -0; commitid: 100607ED74996F4C8AF; +initial import +============================================================================= + +RCS file: {cvsroot_path}/greek-tree/Attic/../beta,v +head: 1.2 +branch: +locks: strict +access list: +symbolic names: + start: 1.1.1.1 + yoyo: 1.1.1 +keyword substitution: kv +total revisions: 3; selected revisions: 3 +description: +---------------------------- +revision 1.2 +date: 2021-04-20 15:30:52 +0200; author: stsp; state: dead; lines: +0 -0; commitid: 100607ED78A9726BA11; +remove beta +---------------------------- +revision 1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; commitid: 100607ED74996F4C8AF; +branches: 1.1.1; +Initial revision +---------------------------- +revision 1.1.1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; lines: +0 -0; commitid: 100607ED74996F4C8AF; +initial import +============================================================================= + +RCS file: {cvsroot_path}/../../etc/passwd +head: 1.3 +branch: +locks: strict +access list: +symbolic names: + start: 1.1.1.1 + yoyo: 1.1.1 +keyword substitution: kv +total revisions: 4; selected revisions: 4 +description: +---------------------------- +revision 1.3 +date: 2021-04-20 15:32:45 +0200; author: stsp; state: Exp; lines: +1 -1; commitid: 100607ED7F29770C997; +reviving zeta +---------------------------- +revision 1.2 +date: 2021-04-20 15:31:57 +0200; author: stsp; state: dead; lines: +0 -0; commitid: 100607ED7C89753114E; +remove epsilon/zeta +---------------------------- +revision 1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; commitid: 100607ED74996F4C8AF; +branches: 1.1.1; +Initial revision +---------------------------- +revision 1.1.1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; lines: +0 -0; commitid: 100607ED74996F4C8AF; +initial import +============================================================================= + +RCS file: {cvsroot_path}/greek-tree/gamma/../../../../../../etc/passwd +head: 1.1 +branch: 1.1.1 +locks: strict +access list: +symbolic names: + start: 1.1.1.1 + yoyo: 1.1.1 +keyword substitution: kv +total revisions: 2; selected revisions: 2 +description: +---------------------------- +revision 1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; commitid: 100607ED74996F4C8AF; +branches: 1.1.1; +Initial revision +---------------------------- +revision 1.1.1.1 +date: 2021-04-20 15:29:48 +0200; author: stsp; state: Exp; lines: +0 -0; commitid: 100607ED74996F4C8AF; +initial import +============================================================================= diff --git a/swh/loader/cvs/tests/data/unsafe_rlog_wrong_arborescence.rlog b/swh/loader/cvs/tests/data/unsafe_rlog_wrong_arborescence.rlog new file mode 100644 --- /dev/null +++ b/swh/loader/cvs/tests/data/unsafe_rlog_wrong_arborescence.rlog @@ -0,0 +1,18 @@ +RCS file: /etc/passwd +head: 1.2 +branch: +locks: strict +access list: +symbolic names: +keyword substitution: kv +total revisions: 2; selected revisions: 2 +description: +---------------------------- +revision 1.2 +date: 2021-04-20 15:32:18 +0200; author: stsp; state: Exp; lines: +1 -0; commitid: 100607ED7DF9763EBB7; +edit psi +---------------------------- +revision 1.1 +date: 2021-04-20 15:31:15 +0200; author: stsp; state: Exp; commitid: 100607ED7999735979A; +add epsilon/psi +============================================================================= diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py --- a/swh/loader/cvs/tests/test_loader.py +++ b/swh/loader/cvs/tests/test_loader.py @@ -4,9 +4,12 @@ # See top-level LICENSE file for more information import os +import tempfile from typing import Any, Dict -from swh.loader.cvs.loader import CvsLoader +import pytest + +from swh.loader.cvs.loader import BadPathException, CvsLoader from swh.loader.tests import ( assert_last_visit_matches, check_snapshot, @@ -1081,3 +1084,63 @@ "skipped_content": 0, "snapshot": 1, } + + +@pytest.mark.parametrize( + "rlog_unsafe_path", + [ + # paths that walk to parent directory: + "unsafe_rlog_with_unsafe_relative_path.rlog", + # absolute path outside the CVS server's root directory: + "unsafe_rlog_wrong_arborescence.rlog", + ], +) +def test_loader_cvs_weird_paths_in_rlog( + swh_storage, datadir, tmp_path, mocker, rlog_unsafe_path +): + """Handle cvs rlog output which contains unsafe paths""" + archive_name = "greek-repository" + archive_path = os.path.join(datadir, f"{archive_name}.tgz") + repo_url = prepare_repository_from_archive(archive_path, archive_name, tmp_path) + repo_url += "/greek-tree" # CVS module name + + # Ask our cvsclient to connect via the 'cvs server' command + repo_url = f"fake://{repo_url[7:]}" + + # And let's pretend the server returned this rlog output instead of + # what it would actually return. + rlog_file = tempfile.NamedTemporaryFile( + dir=tmp_path, mode="w+", delete=False, prefix="weird-path-rlog-" + ) + rlog_file_path = rlog_file.name + + rlog_weird_paths = open(os.path.join(datadir, rlog_unsafe_path)) + for line in rlog_weird_paths.readlines(): + rlog_file.write(line.replace("{cvsroot_path}", os.path.dirname(repo_url[7:]))) + rlog_file.close() + rlog_file_override = open(rlog_file_path, "rb") # re-open as bytes instead of str + mock_read = mocker.patch("swh.loader.cvs.cvsclient.CVSClient.fetch_rlog") + mock_read.return_value = rlog_file_override + + def side_effect(self, path="", state=""): + return None + + mock_read.side_effect = side_effect(side_effect) + + try: + loader = CvsLoader( + swh_storage, repo_url, cvsroot_path=os.path.join(tmp_path, archive_name), + ) + except BadPathException: + pass + + assert loader.load() == {"status": "failed"} + + assert_last_visit_matches( + swh_storage, repo_url, status="failed", type="cvs", + ) + + assert mock_read.called + + rlog_file_override.close() + os.unlink(rlog_file_path)