diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -20,6 +20,3 @@ [mypy-swh.loader.*] ignore_missing_imports = True - -[mypy-urllib3.util.*] -ignore_missing_imports = True diff --git a/swh/loader/cvs/cvsclient.py b/swh/loader/cvs/cvsclient.py --- a/swh/loader/cvs/cvsclient.py +++ b/swh/loader/cvs/cvsclient.py @@ -1,4 +1,4 @@ -# Copyright (C) 2015-2021 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -86,20 +86,12 @@ class CVSClient: - def connect_pserver(self, hostname, port, auth): + def connect_pserver(self, hostname, port, username, password): if port is None: port = CVS_PSERVER_PORT - if auth is None: + if username is None: raise NotFound( - "Username and password are required for " - "a pserver connection: %s" % EXAMPLE_PSERVER_URL - ) - try: - user = auth.split(":")[0] - password = auth.split(":")[1] - except IndexError: - raise NotFound( - "Username and password are required for " + "Username is required for " "a pserver connection: %s" % EXAMPLE_PSERVER_URL ) @@ -108,10 +100,11 @@ except ConnectionRefusedError: raise NotFound("Could not connect to %s:%s", hostname, port) - scrambled_password = scramble_password(password) + # use empty password if it is None + scrambled_password = scramble_password(password or "") request = "BEGIN AUTH REQUEST\n%s\n%s\n%s\nEND AUTH REQUEST\n" % ( self.cvsroot_path, - user, + username, scrambled_password, ) print("Request: %s\n" % request) @@ -124,13 +117,13 @@ % (hostname, port, response) ) - def connect_ssh(self, hostname, port, auth): + def connect_ssh(self, hostname, port, username): command = ["ssh"] - if auth is not None: + if username is not None: # Assume 'auth' contains only a user name. # We do not support password authentication with SSH since the # anoncvs user is usually granted access without a password. - command += ["-l", "%s" % auth] + command += ["-l", "%s" % username] if port is not None: command += ["-p", "%d" % port] @@ -150,7 +143,7 @@ command, bufsize=0, stdin=subprocess.PIPE, stdout=subprocess.PIPE ) - def connect_fake(self, hostname, port, auth): + def connect_fake(self): command = ["cvs", "server"] # use non-buffered I/O to match behaviour of self.socket self.ssh = subprocess.Popen( @@ -222,7 +215,7 @@ Connect to a CVS server at the specified URL and perform the initial CVS protocol handshake. """ - self.hostname = url.host + self.hostname = url.hostname self.cvsroot_path = os.path.dirname(url.path) self.cvs_module_name = os.path.basename(url.path) self.socket = None @@ -231,11 +224,11 @@ self.incomplete_line = b"" if url.scheme == "pserver": - self.connect_pserver(url.host, url.port, url.auth) + self.connect_pserver(url.hostname, url.port, url.username, url.password) elif url.scheme == "ssh": - self.connect_ssh(url.host, url.port, url.auth) + self.connect_ssh(url.hostname, url.port, url.username) elif url.scheme == "fake": - self.connect_fake(url.host, url.port, url.auth) + self.connect_fake() else: raise NotFound("Invalid CVS origin URL '%s'" % url) diff --git a/swh/loader/cvs/loader.py b/swh/loader/cvs/loader.py --- a/swh/loader/cvs/loader.py +++ b/swh/loader/cvs/loader.py @@ -14,12 +14,12 @@ import tempfile import time from typing import Any, BinaryIO, Dict, Iterator, List, Optional, Sequence, Tuple, cast +from urllib.parse import urlparse import sentry_sdk from tenacity import retry from tenacity.retry import retry_if_exception_type from tenacity.stop import stop_after_attempt -from urllib3.util import parse_url from swh.loader.core.loader import BaseLoader from swh.loader.core.utils import clean_dangling_folders @@ -387,7 +387,7 @@ prefix=TEMPORARY_DIR_PREFIX_PATTERN, dir=self.temp_directory, ) - url = parse_url(self.origin.url) + url = urlparse(self.origin.url) self.log.debug( "prepare; origin_url=%s scheme=%s path=%s", self.origin.url, @@ -411,7 +411,8 @@ if not os.path.exists(url.path): raise NotFound elif url.scheme == "rsync": - self.fetch_cvs_repo_with_rsync(url.host, url.path) + assert url.hostname is not None + self.fetch_cvs_repo_with_rsync(url.hostname, url.path) have_rcsfile = False have_cvsroot = False @@ -487,7 +488,7 @@ cvsroot_path = os.path.dirname(url.path) self.log.debug( "Fetching CVS rlog from %s:%s/%s", - url.host, + url.hostname, cvsroot_path, self.cvs_module_name, ) diff --git a/swh/loader/cvs/tests/test_loader.py b/swh/loader/cvs/tests/test_loader.py --- a/swh/loader/cvs/tests/test_loader.py +++ b/swh/loader/cvs/tests/test_loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2021 The Software Heritage developers +# Copyright (C) 2016-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,9 +7,11 @@ import subprocess import tempfile from typing import Any, Dict +from urllib.parse import urlparse import pytest +from swh.loader.cvs.cvsclient import CVSClient from swh.loader.cvs.loader import BadPathException, CvsLoader from swh.loader.tests import ( assert_last_visit_matches, @@ -1236,3 +1238,22 @@ loader.cvs_module_name = module_name loader.cvsroot_path = tmp_path loader.fetch_cvs_repo_with_rsync(host, path) + + +@pytest.mark.parametrize( + "pserver_url", + [ + "pserver://anonymous:anonymous@cvs.example.org/cvsroot/project/module", + "pserver://anonymous@cvs.example.org/cvsroot/project/module", + ], +) +def test_cvs_client_connect_pserver(mocker, pserver_url): + from swh.loader.cvs.cvsclient import socket + + conn = mocker.MagicMock() + conn.recv.side_effect = [b"I LOVE YOU\n", b"Valid-requests \n", b"ok\n"] + mocker.patch.object(socket, "create_connection").return_value = conn + parsed_url = urlparse(pserver_url) + + # check cvs client can be instantiated without errors + CVSClient(parsed_url)