diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py index fd330def..69f0e2d2 100644 --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -1,399 +1,399 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from hypothesis import given from swh.web.common.utils import reverse from swh.web.tests.data import random_content from swh.web.tests.strategies import content, contents_with_ctags from swh.web.tests.conftest import ctags_json_missing, fossology_missing @given(content()) def test_api_content_filetype(api_client, indexer_data, content): indexer_data.content_add_mimetype(content["sha1"]) url = reverse( "api-1-content-filetype", url_args={"q": "sha1_git:%s" % content["sha1_git"]} ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" content_url = reverse( "api-1-content", url_args={"q": "sha1:%s" % content["sha1"]}, request=rv.wsgi_request, ) expected_data = indexer_data.content_get_mimetype(content["sha1"]) expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_filetype_sha_not_found(api_client): unknown_content_ = random_content() url = reverse( "api-1-content-filetype", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} ) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "No filetype information found for content " "sha1:%s." % unknown_content_["sha1"], } @pytest.mark.skip # Language indexer is disabled @given(content()) def test_api_content_language(api_client, indexer_data, content): indexer_data.content_add_language(content["sha1"]) url = reverse( "api-1-content-language", url_args={"q": "sha1_git:%s" % content["sha1_git"]} ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" content_url = reverse( "api-1-content", url_args={"q": "sha1:%s" % content["sha1"]}, request=rv.wsgi_request, ) expected_data = indexer_data.content_get_language(content["sha1"]) expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_language_sha_not_found(api_client): unknown_content_ = random_content() url = reverse( "api-1-content-language", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} ) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "No language information found for content " "sha1:%s." % unknown_content_["sha1"], } @pytest.mark.skip # Language indexer is disabled @pytest.mark.skipif( ctags_json_missing, reason="requires ctags with json output support" ) @given(contents_with_ctags()) def test_api_content_symbol(api_client, indexer_data, contents_with_ctags): expected_data = {} for content_sha1 in contents_with_ctags["sha1s"]: indexer_data.content_add_ctags(content_sha1) for ctag in indexer_data.content_get_ctags(content_sha1): if ctag["name"] == contents_with_ctags["symbol_name"]: expected_data[content_sha1] = ctag break url = reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={"per_page": 100}, ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" for entry in rv.data: content_sha1 = entry["sha1"] expected_entry = expected_data[content_sha1] for key, view_name in ( ("content_url", "api-1-content"), ("data_url", "api-1-content-raw"), ("license_url", "api-1-content-license"), ("language_url", "api-1-content-language"), ("filetype_url", "api-1-content-filetype"), ): expected_entry[key] = reverse( view_name, url_args={"q": "sha1:%s" % content_sha1}, request=rv.wsgi_request, ) expected_entry["sha1"] = content_sha1 del expected_entry["id"] assert entry == expected_entry assert "Link" not in rv url = reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={"per_page": 2}, ) rv = api_client.get(url) next_url = ( reverse( "api-1-content-symbol", url_args={"q": contents_with_ctags["symbol_name"]}, query_params={"last_sha1": rv.data[1]["sha1"], "per_page": 2}, request=rv.wsgi_request, ), ) assert rv["Link"] == '<%s>; rel="next"' % next_url def test_api_content_symbol_not_found(api_client): url = reverse("api-1-content-symbol", url_args={"q": "bar"}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "No indexed raw content match expression 'bar'.", } assert "Link" not in rv @pytest.mark.skipif( ctags_json_missing, reason="requires ctags with json output support" ) @given(content()) def test_api_content_ctags(api_client, indexer_data, content): indexer_data.content_add_ctags(content["sha1"]) url = reverse( "api-1-content-ctags", url_args={"q": "sha1_git:%s" % content["sha1_git"]} ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" content_url = reverse( "api-1-content", url_args={"q": "sha1:%s" % content["sha1"]}, request=rv.wsgi_request, ) expected_data = list(indexer_data.content_get_ctags(content["sha1"])) for e in expected_data: e["content_url"] = content_url assert rv.data == expected_data @pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") @given(content()) def test_api_content_license(api_client, indexer_data, content): indexer_data.content_add_license(content["sha1"]) url = reverse( "api-1-content-license", url_args={"q": "sha1_git:%s" % content["sha1_git"]} ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" content_url = reverse( "api-1-content", url_args={"q": "sha1:%s" % content["sha1"]}, request=rv.wsgi_request, ) expected_data = indexer_data.content_get_license(content["sha1"]) expected_data["content_url"] = content_url assert rv.data == expected_data def test_api_content_license_sha_not_found(api_client): unknown_content_ = random_content() url = reverse( "api-1-content-license", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} ) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "No license information found for content " "sha1:%s." % unknown_content_["sha1"], } @given(content()) def test_api_content_metadata(api_client, archive_data, content): url = reverse("api-1-content", {"q": "sha1:%s" % content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" - expected_data = archive_data.content_get_metadata(content["sha1"]) + expected_data = archive_data.content_get(content["sha1"]) for key, view_name in ( ("data_url", "api-1-content-raw"), ("license_url", "api-1-content-license"), ("language_url", "api-1-content-language"), ("filetype_url", "api-1-content-filetype"), ): expected_data[key] = reverse( view_name, url_args={"q": "sha1:%s" % content["sha1"]}, request=rv.wsgi_request, ) assert rv.data == expected_data def test_api_content_not_found_as_json(api_client): unknown_content_ = random_content() url = reverse("api-1-content", url_args={"q": "sha1:%s" % unknown_content_["sha1"]}) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "Content with sha1 checksum equals to %s not found!" % unknown_content_["sha1"], } def test_api_content_not_found_as_yaml(api_client): unknown_content_ = random_content() url = reverse( "api-1-content", url_args={"q": "sha256:%s" % unknown_content_["sha256"]} ) rv = api_client.get(url, HTTP_ACCEPT="application/yaml") assert rv.status_code == 404, rv.data assert "application/yaml" in rv["Content-Type"] assert rv.data == { "exception": "NotFoundExc", "reason": "Content with sha256 checksum equals to %s not found!" % unknown_content_["sha256"], } def test_api_content_raw_ko_not_found(api_client): unknown_content_ = random_content() url = reverse( "api-1-content-raw", url_args={"q": "sha1:%s" % unknown_content_["sha1"]} ) rv = api_client.get(url) assert rv.status_code == 404, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "exception": "NotFoundExc", "reason": "Content with sha1 checksum equals to %s not found!" % unknown_content_["sha1"], } @given(content()) def test_api_content_raw_text(api_client, archive_data, content): url = reverse("api-1-content-raw", url_args={"q": "sha1:%s" % content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/octet-stream" assert ( rv["Content-disposition"] == "attachment; filename=content_sha1_%s_raw" % content["sha1"] ) assert rv["Content-Type"] == "application/octet-stream" expected_data = archive_data.content_get_data(content["sha1"]) assert rv.content == expected_data["data"] @given(content()) def test_api_content_raw_text_with_filename(api_client, archive_data, content): url = reverse( "api-1-content-raw", url_args={"q": "sha1:%s" % content["sha1"]}, query_params={"filename": "filename.txt"}, ) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/octet-stream" assert rv["Content-disposition"] == "attachment; filename=filename.txt" assert rv["Content-Type"] == "application/octet-stream" expected_data = archive_data.content_get_data(content["sha1"]) assert rv.content == expected_data["data"] @given(content()) def test_api_check_content_known(api_client, content): url = reverse("api-1-content-known", url_args={"q": content["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "search_res": [{"found": True, "sha1": content["sha1"]}], "search_stats": {"nbfiles": 1, "pct": 100.0}, } @given(content()) def test_api_check_content_known_as_yaml(api_client, content): url = reverse("api-1-content-known", url_args={"q": content["sha1"]}) rv = api_client.get(url, HTTP_ACCEPT="application/yaml") assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/yaml" assert rv.data == { "search_res": [{"found": True, "sha1": content["sha1"]}], "search_stats": {"nbfiles": 1, "pct": 100.0}, } @given(content()) def test_api_check_content_known_post_as_yaml(api_client, content): url = reverse("api-1-content-known") rv = api_client.post( url, data={"q": content["sha1"]}, HTTP_ACCEPT="application/yaml" ) assert rv.status_code == 200, rv.data assert "application/yaml" in rv["Content-Type"] assert rv.data == { "search_res": [{"found": True, "sha1": content["sha1"]}], "search_stats": {"nbfiles": 1, "pct": 100.0}, } def test_api_check_content_known_not_found(api_client): unknown_content_ = random_content() url = reverse("api-1-content-known", url_args={"q": unknown_content_["sha1"]}) rv = api_client.get(url) assert rv.status_code == 200, rv.data assert rv["Content-Type"] == "application/json" assert rv.data == { "search_res": [{"found": False, "sha1": unknown_content_["sha1"]}], "search_stats": {"nbfiles": 1, "pct": 0.0}, } @given(content()) def test_api_content_uppercase(api_client, content): url = reverse( "api-1-content-uppercase-checksum", url_args={"q": content["sha1"].upper()} ) rv = api_client.get(url) assert rv.status_code == 302, rv.data redirect_url = reverse("api-1-content", url_args={"q": content["sha1"]}) assert rv["location"] == redirect_url diff --git a/swh/web/tests/browse/views/test_content.py b/swh/web/tests/browse/views/test_content.py index 1f87eded..6bba3f81 100644 --- a/swh/web/tests/browse/views/test_content.py +++ b/swh/web/tests/browse/views/test_content.py @@ -1,592 +1,591 @@ # Copyright (C) 2017-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from django.utils.html import escape from hypothesis import given from swh.model.identifiers import CONTENT, DIRECTORY, RELEASE, REVISION, SNAPSHOT from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.browse.utils import ( get_mimetype_and_encoding_for_content, prepare_content_for_display, _re_encode_content, ) from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import gen_swhid from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import ( assert_contains, assert_not_contains, assert_template_used, ) from swh.web.tests.strategies import ( content, content_text_non_utf8, content_text_no_highlight, content_image_type, content_unsupported_image_type_rendering, content_text, invalid_sha1, unknown_content, content_utf8_detected_as_binary, origin_with_multiple_visits, ) @given(content_text()) def test_content_view_text(client, archive_data, content): sha1_git = content["sha1_git"] url = reverse( "browse-content", url_args={"query_string": content["sha1"]}, query_params={"path": content["path"]}, ) url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) mimetype = content_display["mimetype"] assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") if mimetype.startswith("text/"): assert_contains(resp, '' % content_display["language"]) assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) swh_cnt_id = gen_swhid(CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) @given(content_text_no_highlight()) def test_content_view_text_no_highlight(client, archive_data, content): sha1_git = content["sha1_git"] url = reverse("browse-content", url_args={"query_string": content["sha1"]}) url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") assert_contains(resp, '') assert_contains(resp, escape(content_display["content_data"])) assert_contains(resp, url_raw) swh_cnt_id = gen_swhid(CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id) assert_contains(resp, swh_cnt_id_url) @given(content_text_non_utf8()) def test_content_view_no_utf8_text(client, archive_data, content): sha1_git = content["sha1_git"] url = reverse("browse-content", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") swh_cnt_id = gen_swhid(CONTENT, sha1_git) swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id}) assert_contains(resp, swh_cnt_id_url) assert_contains(resp, escape(content_display["content_data"])) @given(content_image_type()) def test_content_view_image(client, archive_data, content): url = reverse("browse-content", url_args={"query_string": content["sha1"]}) url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = client.get(url) content_display = _process_content_for_display(archive_data, content) mimetype = content_display["mimetype"] content_data = content_display["content_data"] assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") assert_contains(resp, '' % (mimetype, content_data)) assert_contains(resp, url_raw) @given(content_unsupported_image_type_rendering()) def test_content_view_image_no_rendering(client, archive_data, content): url = reverse("browse-content", url_args={"query_string": content["sha1"]}) resp = client.get(url) mimetype = content["mimetype"] encoding = content["encoding"] assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") assert_contains( resp, ( f"Content with mime type {mimetype} and encoding {encoding} " "cannot be displayed." ), ) @given(content_text()) def test_content_view_text_with_path(client, archive_data, content): path = content["path"] url = reverse( "browse-content", url_args={"query_string": content["sha1"]}, query_params={"path": path}, ) resp = client.get(url) assert resp.status_code == 200 assert_template_used(resp, "browse/content.html") assert_contains(resp, '