diff --git a/swh/web/common/query.py b/swh/web/common/query.py index e639581f..a99a8f2c 100644 --- a/swh/web/common/query.py +++ b/swh/web/common/query.py @@ -1,111 +1,88 @@ -# Copyright (C) 2015-2018 The Software Heritage developers +# Copyright (C) 2015-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import re -from uuid import UUID from swh.model.hashutil import ALGORITHMS, hash_to_bytes from swh.web.common.exc import BadInputExc SHA256_RE = re.compile(r"^[0-9a-f]{64}$", re.IGNORECASE) SHA1_RE = re.compile(r"^[0-9a-f]{40}$", re.IGNORECASE) def parse_hash(q): """Detect the hash type of a user submitted query string. Args: query string with the following format: "[HASH_TYPE:]HEX_CHECKSUM", where HASH_TYPE is optional, defaults to "sha1", and can be one of swh.model.hashutil.ALGORITHMS Returns: A pair (hash_algorithm, byte hash value) Raises: ValueError if the given query string does not correspond to a valid hash value """ def guess_algo(q): if SHA1_RE.match(q): return "sha1" elif SHA256_RE.match(q): return "sha256" else: raise BadInputExc("Invalid checksum query string %s" % q) def check_algo(algo, hex): if (algo in {"sha1", "sha1_git"} and not SHA1_RE.match(hex)) or ( algo == "sha256" and not SHA256_RE.match(hex) ): raise BadInputExc("Invalid hash %s for algorithm %s" % (hex, algo)) parts = q.split(":") if len(parts) > 2: raise BadInputExc("Invalid checksum query string %s" % q) elif len(parts) == 1: parts = (guess_algo(q), q) elif len(parts) == 2: check_algo(parts[0], parts[1]) algo = parts[0] if algo not in ALGORITHMS: raise BadInputExc("Unknown hash algorithm %s" % algo) return (algo, hash_to_bytes(parts[1])) def parse_hash_with_algorithms_or_throws(q, accepted_algo, error_msg): """Parse a query but only accepts accepted_algo. Otherwise, raise the exception with message error_msg. Args: - q: query string with the following format: "[HASH_TYPE:]HEX_CHECKSUM" where HASH_TYPE is optional, defaults to "sha1", and can be one of swh.model.hashutil.ALGORITHMS. - accepted_algo: array of strings representing the names of accepted algorithms. - error_msg: error message to raise as BadInputExc if the algo of the query does not match. Returns: A pair (hash_algorithm, byte hash value) Raises: BadInputExc when the inputs is invalid or does not validate the accepted algorithms. """ algo, hash = parse_hash(q) if algo not in accepted_algo: raise BadInputExc(error_msg) return (algo, hash) - - -def parse_uuid4(uuid): - """Parse an uuid 4 from a string. - - Args: - uuid: String representing an uuid. - - Returns: - The uuid as is if everything is ok. - - Raises: - BadInputExc: if the uuid is invalid. - - """ - try: - UUID(uuid, version=4) - except ValueError as e: - # not a valid hex code for a UUID - raise BadInputExc(str(e)) - - return uuid diff --git a/swh/web/tests/common/test_query.py b/swh/web/tests/common/test_query.py index 6de97907..33420077 100644 --- a/swh/web/tests/common/test_query.py +++ b/swh/web/tests/common/test_query.py @@ -1,122 +1,110 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.model import hashutil from swh.web.common import query from swh.web.common.exc import BadInputExc def test_parse_hash_malformed_query_with_more_than_2_parts(): with pytest.raises(BadInputExc): query.parse_hash("sha1:1234567890987654:other-stuff") def test_parse_hash_guess_sha1(): h = "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15" r = query.parse_hash(h) assert r == ("sha1", hashutil.hash_to_bytes(h)) def test_parse_hash_guess_sha256(): h = "084C799CD551DD1D8D5C5F9A5D593B2E931F5E36122ee5c793c1d08a19839cc0" r = query.parse_hash(h) assert r == ("sha256", hashutil.hash_to_bytes(h)) def test_parse_hash_guess_algo_malformed_hash(): with pytest.raises(BadInputExc): query.parse_hash("1234567890987654") def test_parse_hash_check_sha1(): h = "f1d2d2f924e986ac86fdf7b36c94bcdf32beec15" r = query.parse_hash("sha1:" + h) assert r == ("sha1", hashutil.hash_to_bytes(h)) def test_parse_hash_check_sha1_git(): h = "e1d2d2f924e986ac86fdf7b36c94bcdf32beec15" r = query.parse_hash("sha1_git:" + h) assert r == ("sha1_git", hashutil.hash_to_bytes(h)) def test_parse_hash_check_sha256(): h = "084C799CD551DD1D8D5C5F9A5D593B2E931F5E36122ee5c793c1d08a19839cc0" r = query.parse_hash("sha256:" + h) assert r == ("sha256", hashutil.hash_to_bytes(h)) def test_parse_hash_check_algo_malformed_sha1_hash(): with pytest.raises(BadInputExc): query.parse_hash("sha1:1234567890987654") def test_parse_hash_check_algo_malformed_sha1_git_hash(): with pytest.raises(BadInputExc): query.parse_hash("sha1_git:1234567890987654") def test_parse_hash_check_algo_malformed_sha256_hash(): with pytest.raises(BadInputExc): query.parse_hash("sha256:1234567890987654") def test_parse_hash_check_algo_unknown_one(): with pytest.raises(BadInputExc): query.parse_hash("sha2:1234567890987654") def test_parse_hash_with_algorithms_or_throws_bad_query(mocker): mock_hash = mocker.patch("swh.web.common.query.parse_hash") mock_hash.side_effect = BadInputExc("Error input") with pytest.raises(BadInputExc) as e: query.parse_hash_with_algorithms_or_throws( "sha1:blah", ["sha1"], "useless error message for this use case" ) assert e.match("Error input") mock_hash.assert_called_once_with("sha1:blah") def test_parse_hash_with_algorithms_or_throws_bad_algo(mocker): mock_hash = mocker.patch("swh.web.common.query.parse_hash") mock_hash.return_value = "sha1", "123" with pytest.raises(BadInputExc) as e: query.parse_hash_with_algorithms_or_throws( "sha1:431", ["sha1_git"], "Only sha1_git!" ) assert e.match("Only sha1_git!") mock_hash.assert_called_once_with("sha1:431") def test_parse_hash_with_algorithms(mocker): mock_hash = mocker.patch("swh.web.common.query.parse_hash") mock_hash.return_value = ("sha256", b"123") algo, sha = query.parse_hash_with_algorithms_or_throws( "sha256:123", ["sha256", "sha1_git"], "useless error message for this use case" ) assert algo == "sha256" assert sha == b"123" mock_hash.assert_called_once_with("sha256:123") - - -def test_parse_uuid4(): - actual_uuid = query.parse_uuid4("7c33636b-8f11-4bda-89d9-ba8b76a42cec") - - assert actual_uuid == "7c33636b-8f11-4bda-89d9-ba8b76a42cec" - - -def test_parse_uuid4_ko(): - with pytest.raises(BadInputExc) as e: - query.parse_uuid4("7c33636b-8f11-4bda-89d9-ba8b76a42") - assert e.match("badly formed hexadecimal UUID string")