diff --git a/assets/src/bundles/webapp/coverage.css b/assets/src/bundles/webapp/coverage.css new file mode 100644 index 00000000..e118a0e5 --- /dev/null +++ b/assets/src/bundles/webapp/coverage.css @@ -0,0 +1,79 @@ +/** + * Copyright (C) 2021 The Software Heritage developers + * See the AUTHORS file at the top-level directory of this distribution + * License: GNU Affero General Public License version 3, or any later version + * See top-level LICENSE file for more information + */ + +.swh-coverage { + padding-top: 0.3rem; + border: none; + overflow: visible; +} + +.swh-coverage a { + text-decoration: none; +} + +.swh-coverage-col { + padding-left: 10px; + padding-right: 10px; +} + +.swh-coverage-header { + padding-top: 0; + padding-bottom: 0; +} + +.swh-coverage-logo { + display: block; + width: 100%; + height: 50px; + margin-left: auto; + margin-right: auto; + object-fit: contain; + + /* polyfill for old browsers, see https://github.com/bfred-it/object-fit-images */ + font-family: "object-fit: contain;"; +} + +.swh-coverage-list { + width: 100%; + height: 320px; + border: none; +} + +.swh-coverage-chevron { + position: absolute; + right: 0; +} + +.swh-coverage .card-header .mdi { + transition: 0.3s transform ease-in-out; +} + +.swh-coverage .card-header .collapsed .mdi { + transform: rotate(90deg); +} + +.swh-coverage-info-body { + max-height: 150px; + overflow-y: auto; + overflow-x: hidden; + scrollbar-width: thin; /* Firefox only */ + padding: 0; +} + +/* Thin scrollbar for chromium based browsers */ + +.swh-coverage-info-body::-webkit-scrollbar { + width: 4px; +} + +.swh-coverage-info-body::-webkit-scrollbar-track { + background: #eff0f1; +} + +.swh-coverage-info-body::-webkit-scrollbar-thumb { + background: #909396; +} diff --git a/assets/src/bundles/webapp/index.js b/assets/src/bundles/webapp/index.js index 90663d72..1189dd91 100644 --- a/assets/src/bundles/webapp/index.js +++ b/assets/src/bundles/webapp/index.js @@ -1,27 +1,28 @@ /** - * Copyright (C) 2018-2020 The Software Heritage developers + * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ // webapp entrypoint bundle centralizing global custom stylesheets // and utility js modules used in all swh-web applications // global swh-web custom stylesheets import './webapp.css'; import './breadcrumbs.css'; +import './coverage.css'; export * from './webapp-utils'; // utility js modules export * from './code-highlighting'; export * from './readme-rendering'; export * from './pdf-rendering'; export * from './notebook-rendering'; export * from './xss-filtering'; export * from './history-counters'; export * from './badges'; export * from './sentry'; export * from './math-typesetting'; export * from './status-widget'; diff --git a/assets/src/bundles/webapp/webapp.css b/assets/src/bundles/webapp/webapp.css index 06954f92..2de07c90 100644 --- a/assets/src/bundles/webapp/webapp.css +++ b/assets/src/bundles/webapp/webapp.css @@ -1,703 +1,749 @@ /** * Copyright (C) 2018-2021 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU Affero General Public License version 3, or any later version * See top-level LICENSE file for more information */ html { height: 100%; overflow-x: hidden; scroll-behavior: auto !important; } body { min-height: 100%; margin: 0; position: relative; padding-bottom: 120px; } a:active, a.active { outline: none; } code { background-color: #f9f2f4; } pre code { background-color: transparent; } footer { background-color: #262626; color: #fff; font-size: 0.8rem; position: absolute; bottom: 0; width: 100%; padding-top: 10px; padding-bottom: 10px; } footer a, footer a:visited, footer a:hover { color: #fecd1b; } footer a:hover { text-decoration: underline; } .link-color { color: #fecd1b; } pre { background-color: #f5f5f5; border: 1px solid #ccc; border-radius: 4px; padding: 9.5px; font-size: 0.8rem; } .btn.active { background-color: #e7e7e7; } .card { margin-bottom: 5px !important; overflow-x: auto; } .navbar-brand { padding: 5px; margin-right: 0; } .table { margin-bottom: 0; } .swh-table thead { background-color: #f2f4f5; border-top: 1px solid rgba(0, 0, 0, 0.2); font-weight: normal; } .swh-table-striped th { border-top: none; } .swh-table-striped tbody tr:nth-child(even) { background-color: #f2f4f5; } .swh-table-striped tbody tr:nth-child(odd) { background-color: #fff; } .swh-web-app-link a { text-decoration: none; border: none; } .swh-web-app-link:hover { background-color: #efeff2; } .table > thead > tr > th { border-top: none; border-bottom: 1px solid #e20026; } .table > tbody > tr > td { border-style: none; } .sitename .first-word, .sitename .second-word { color: rgba(0, 0, 0, 0.75); font-weight: normal; font-size: 1.2rem; } .sitename .first-word { - font-family: 'Alegreya Sans', sans-serif; + font-family: "Alegreya Sans", sans-serif; } .sitename .second-word { - font-family: 'Alegreya', serif; + font-family: "Alegreya", serif; } .swh-counter { font-size: 150%; } @media (max-width: 600px) { .swh-counter-container { margin-top: 1rem; } } .swh-http-error { margin: 0 auto; text-align: center; } .swh-http-error-head { color: #2d353c; font-size: 30px; } .swh-http-error-code { bottom: 60%; color: #2d353c; font-size: 96px; line-height: 80px; margin-bottom: 10px !important; } .swh-http-error-desc { font-size: 12px; color: #647788; text-align: center; } .swh-http-error-desc pre { display: inline-block; text-align: left; max-width: 800px; white-space: pre-wrap; } .swh-list-unstyled { list-style: none; } .popover { max-width: 97%; z-index: 40000; } .modal { text-align: center; padding: 0 !important; z-index: 50000; } .modal::before { - content: ''; + content: ""; display: inline-block; height: 100%; vertical-align: middle; margin-right: -4px; } .modal-dialog { display: inline-block; text-align: left; vertical-align: middle; } .dropdown-submenu { position: relative; } .dropdown-submenu .dropdown-menu { top: 0; left: -100%; margin-top: -5px; margin-left: -2px; } .dropdown-item:hover, .dropdown-item:focus { background-color: rgba(0, 0, 0, 0.1); } a.dropdown-left::before { content: "\f035e"; - font-family: 'Material Design Icons'; + font-family: "Material Design Icons"; display: block; width: 20px; height: 20px; float: left; margin-left: 0; } #swh-navbar { border-top-style: none; border-left-style: none; border-right-style: none; border-bottom-style: solid; border-bottom-width: 5px; - border-image: linear-gradient(to right, rgb(226, 0, 38) 0%, rgb(254, 205, 27) 100%) 1 1 1 1; + border-image: + linear-gradient( + to right, + rgb(226, 0, 38) 0%, + rgb(254, 205, 27) 100% + ) + 1 1 1 1; width: 100%; padding: 5px; margin-bottom: 10px; margin-top: 30px; justify-content: normal; flex-wrap: nowrap; height: 72px; overflow: hidden; } #back-to-top { display: none; position: fixed; bottom: 30px; right: 30px; z-index: 10; } #back-to-top a img { display: block; width: 32px; height: 32px; background-size: 32px 32px; text-indent: -999px; overflow: hidden; } .swh-top-bar { direction: ltr; height: 30px; position: fixed; top: 0; left: 0; width: 100%; z-index: 99999; background-color: #262626; color: #fff; text-align: center; font-size: 14px; } .swh-top-bar ul { margin-top: 4px; padding-left: 0; white-space: nowrap; } .swh-top-bar li { display: inline-block; margin-left: 10px; margin-right: 10px; } .swh-top-bar a, .swh-top-bar a:visited { color: white; } .swh-top-bar a.swh-current-site, .swh-top-bar a.swh-current-site:visited { color: #fecd1b; } .swh-position-left { position: absolute; left: 0; } .swh-position-right { position: absolute; right: 0; } .swh-background-gray { background: #efeff2; } .swh-donate-link { border: 1px solid #fecd1b; background-color: #e20026; color: white !important; padding: 3px; border-radius: 3px; } .swh-navbar-content h4 { padding-top: 7px; } .swh-navbar-content .bread-crumbs { display: block; margin-left: -40px; } .swh-navbar-content .bread-crumbs li.bc-no-root { padding-top: 7px; } .main-sidebar { margin-top: 30px; } .content-wrapper { background: none; } .brand-image { max-height: 40px; } .brand-link { padding-top: 18.5px; padding-bottom: 18px; padding-left: 4px; border-bottom: 5px solid #e20026 !important; } .navbar-header a, ul.dropdown-menu a, ul.navbar-nav a, ul.nav-sidebar a { border-bottom-style: none; color: #323232; } .swh-sidebar .nav-link.active { color: #323232 !important; background-color: #e7e7e7 !important; } .nav-tabs .nav-link.active { border-top: 3px solid #e20026; } .swh-image-error { width: 80px; height: auto; } @media (max-width: 600px) { .card { min-width: 80%; } .swh-image-error { width: 40px; height: auto; } .swh-donate-link { display: none; } } .form-check-label { padding-top: 4px; } .swhid { white-space: pre-wrap; } .swhid .swhid-option { display: inline-block; margin-right: 5px; line-height: 1rem; } .nav-pills .nav-link:not(.active):hover { color: rgba(0, 0, 0, 0.55); } .swh-heading-color { color: #e20026 !important; } .sidebar-mini.sidebar-collapse .main-sidebar:hover { width: 4.6rem; } .sidebar-mini.sidebar-collapse .main-sidebar:hover .user-panel > .info, .sidebar-mini.sidebar-collapse .main-sidebar:hover .nav-sidebar .nav-link p, .sidebar-mini.sidebar-collapse .main-sidebar:hover .brand-text { visibility: hidden !important; } .sidebar .nav-link p, .main-sidebar .brand-text, .sidebar .user-panel .info { transition: none; } .sidebar-mini.sidebar-mini.sidebar-collapse .sidebar { padding-right: 0; } .swh-words-logo { position: absolute; top: 0; left: 0; width: 73px; height: 73px; text-align: center; font-size: 10pt; color: rgba(0, 0, 0, 0.75); } .swh-words-logo:hover { text-decoration: none; } .swh-words-logo-swh { line-height: 1; padding-top: 13px; visibility: hidden; } hr.swh-faded-line { border: 0; height: 1px; background-image: linear-gradient(to left, #f0f0f0, #8c8b8b, #f0f0f0); } /* Ensure that section title with link is colored like standard section title */ .swh-readme h1 a, .swh-readme h2 a, .swh-readme h3 a, .swh-readme h4 a, .swh-readme h5 a, .swh-readme h6 a { color: #e20026; } /* Make list compact in reStructuredText rendering */ .swh-rst li p { margin-bottom: 0; } .swh-readme-txt pre { background: none; border: none; } -.swh-coverage-col { - padding-left: 10px; - padding-right: 10px; -} - .swh-coverage { - height: calc(65px + 1em); padding-top: 0.3rem; border: none; + overflow: visible; } .swh-coverage a { text-decoration: none; } +.swh-coverage-col { + padding-left: 10px; + padding-right: 10px; +} + +.swh-coverage-header { + padding-top: 0; + padding-bottom: 0; +} + .swh-coverage-logo { display: block; width: 100%; height: 50px; margin-left: auto; margin-right: auto; object-fit: contain; /* polyfill for old browsers, see https://github.com/bfred-it/object-fit-images */ - font-family: 'object-fit: contain;'; + font-family: "object-fit: contain;"; } .swh-coverage-list { width: 100%; height: 320px; border: none; } +.swh-coverage-chevron { + position: absolute; + right: 0; +} + +.swh-coverage .card-header .mdi { + transition: 0.3s transform ease-in-out; +} + +.swh-coverage .card-header .collapsed .mdi { + transform: rotate(90deg); +} + +.swh-coverage-info-body { + max-height: 150px; + overflow-y: auto; + overflow-x: hidden; + scrollbar-width: thin; /* Firefox only */ + padding: 0; +} + +/* Thin scrollbar for chromium based browsers */ + +.swh-coverage-info-body::-webkit-scrollbar { + width: 4px; +} + +.swh-coverage-info-body::-webkit-scrollbar-track { + background: #eff0f1; +} + +.swh-coverage-info-body::-webkit-scrollbar-thumb { + background: #909396; +} + tr.swh-tr-hover-highlight:hover td { background: #ededed; } tr.swh-api-doc-route a { text-decoration: none; } .swh-apidoc .col { margin: 10px; } .swh-apidoc .swh-rst blockquote { border: 0; margin: 0; padding: 0; } a.toggle-col { text-decoration: none; } a.toggle-col.col-hidden { text-decoration: line-through; } .admonition.warning { background: #fcf8e3; border: 1px solid #faebcc; padding: 15px; border-radius: 4px; } .admonition.warning p { margin-bottom: 0; } .admonition.warning .first { font-size: 1.5rem; } .swh-popover { max-height: 50vh; overflow-y: auto; overflow-x: auto; padding: 0; } @media screen and (min-width: 768px) { .swh-popover { max-width: 50vw; } } .swh-popover pre { white-space: pre-wrap; margin-bottom: 0; } .d3-wrapper { position: relative; height: 0; width: 100%; padding: 0; /* padding-bottom will be overwritten by JavaScript later */ padding-bottom: 100%; } .d3-wrapper > svg { position: absolute; height: 100%; width: 100%; left: 0; top: 0; } div.d3-tooltip { position: absolute; text-align: center; width: auto; height: auto; padding: 2px; font: 12px sans-serif; background: white; border: 1px solid black; border-radius: 4px; pointer-events: none; } .page-link { cursor: pointer; } .wrapper { overflow: hidden; } .swh-badge { padding-bottom: 1rem; cursor: pointer; } .swh-badge-html, .swh-badge-md, .swh-badge-rst { white-space: pre-wrap; } /* Material Design icons alignment tweaks */ .mdi { display: inline-block; } .mdi-camera { transform: translateY(1px); } .mdi-source-commit { transform: translateY(2px); } /* To set icons at a fixed width. Great to use when different icon widths throw off alignment. Courtesy of Font Awesome. */ .mdi-fw { text-align: center; width: 1.25em; } .main-header .nav-link { height: inherit; } .nav-sidebar .nav-header:not(:first-of-type) { padding-top: 1rem; } .nav-sidebar .nav-link { padding-top: 0; padding-bottom: 0; } .nav-sidebar > .nav-item .nav-icon { vertical-align: sub; } .swh-search-icon { line-height: 1rem; vertical-align: middle; } .swh-search-navbar { position: absolute; top: 0.7rem; right: 15rem; z-index: 50000; width: 500px; } .sidebar-collapse .swh-search-navbar { right: 4rem; } .swh-corner-ribbon { width: 200px; background: #fecd1b; color: #e20026; position: absolute; text-align: center; letter-spacing: 1px; box-shadow: 0 0 3px rgba(0, 0, 0, 0.3); top: 55px; right: -50px; left: auto; transform: rotate(45deg); z-index: 2000; } @media screen and (max-width: 600px) { .swh-corner-ribbon { top: 53px; right: -65px; } } .invalid-feedback { font-size: 100%; } diff --git a/static/img/logos/cgit.png b/static/img/logos/cgit.png new file mode 100644 index 00000000..425528ee Binary files /dev/null and b/static/img/logos/cgit.png differ diff --git a/static/img/logos/cran.png b/static/img/logos/cran.png new file mode 100644 index 00000000..16f1734a Binary files /dev/null and b/static/img/logos/cran.png differ diff --git a/static/img/logos/cran.svg b/static/img/logos/cran.svg deleted file mode 100644 index 7b162d43..00000000 --- a/static/img/logos/cran.svg +++ /dev/null @@ -1,33 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/static/img/logos/elife.png b/static/img/logos/elife.png new file mode 100644 index 00000000..9a5cb7cb Binary files /dev/null and b/static/img/logos/elife.png differ diff --git a/static/img/logos/framagit.png b/static/img/logos/framagit.png deleted file mode 100644 index 0b78ed4f..00000000 Binary files a/static/img/logos/framagit.png and /dev/null differ diff --git a/static/img/logos/gitea.png b/static/img/logos/gitea.png new file mode 100644 index 00000000..b624d2d5 Binary files /dev/null and b/static/img/logos/gitea.png differ diff --git a/static/img/logos/gitlab.png b/static/img/logos/gitlab.png new file mode 100644 index 00000000..1eb3c77d Binary files /dev/null and b/static/img/logos/gitlab.png differ diff --git a/static/img/logos/gitlab.svg b/static/img/logos/gitlab.svg deleted file mode 100644 index 74505975..00000000 --- a/static/img/logos/gitlab.svg +++ /dev/null @@ -1,32 +0,0 @@ - - - - wm_no_bg - Created with Sketch. - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/static/img/logos/guix.png b/static/img/logos/guix.png new file mode 100644 index 00000000..7693a131 Binary files /dev/null and b/static/img/logos/guix.png differ diff --git a/static/img/logos/guix.svg b/static/img/logos/guix.svg deleted file mode 100644 index ecd4bc42..00000000 --- a/static/img/logos/guix.svg +++ /dev/null @@ -1,177 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - diff --git a/static/img/logos/launchpad.png b/static/img/logos/launchpad.png new file mode 100644 index 00000000..8bc38f2f Binary files /dev/null and b/static/img/logos/launchpad.png differ diff --git a/static/img/logos/phabricator.png b/static/img/logos/phabricator.png new file mode 100644 index 00000000..bcae2ea5 Binary files /dev/null and b/static/img/logos/phabricator.png differ diff --git a/static/img/logos/pypi.png b/static/img/logos/pypi.png new file mode 100644 index 00000000..4d5942ca Binary files /dev/null and b/static/img/logos/pypi.png differ diff --git a/static/img/logos/pypi.svg b/static/img/logos/pypi.svg deleted file mode 100644 index e53853c5..00000000 --- a/static/img/logos/pypi.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/static/img/logos/sourceforge.png b/static/img/logos/sourceforge.png new file mode 100644 index 00000000..c307f271 Binary files /dev/null and b/static/img/logos/sourceforge.png differ diff --git a/swh/web/config.py b/swh/web/config.py index 169b317b..dba7378f 100644 --- a/swh/web/config.py +++ b/swh/web/config.py @@ -1,220 +1,219 @@ # Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict from swh.core import config from swh.counters import get_counters from swh.indexer.storage import get_indexer_storage from swh.scheduler import get_scheduler from swh.search import get_search from swh.storage import get_storage from swh.vault import get_vault from swh.web import settings SWH_WEB_INTERNAL_SERVER_NAME = "archive.internal.softwareheritage.org" STAGING_SERVER_NAMES = [ "webapp.staging.swh.network", "webapp.internal.staging.swh.network", ] ORIGIN_VISIT_TYPES = [ "cran", "deb", "deposit", "ftp", "hg", "git", "nixguix", "npm", "pypi", "svn", "tar", ] SETTINGS_DIR = os.path.dirname(settings.__file__) DEFAULT_CONFIG = { "allowed_hosts": ("list", []), "storage": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5002/", "timeout": 10,}, ), "indexer_storage": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5007/", "timeout": 1,}, ), "counters": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5011/", "timeout": 1,}, ), "search": ( "dict", {"cls": "remote", "url": "http://127.0.0.1:5010/", "timeout": 10,}, ), "search_config": ( "dict", {"backend": "swh-indexer-storage", "enable_ql": False}, # or "swh-search" ), "log_dir": ("string", "/tmp/swh/log"), "debug": ("bool", False), "serve_assets": ("bool", False), "host": ("string", "127.0.0.1"), "port": ("int", 5004), "secret_key": ("string", "development key"), # do not display code highlighting for content > 1MB "content_display_max_size": ("int", 5 * 1024 * 1024), "snapshot_content_max_size": ("int", 1000), "throttling": ( "dict", { "cache_uri": None, # production: memcached as cache (127.0.0.1:11211) # development: in-memory cache so None "scopes": { "swh_api": { "limiter_rate": {"default": "120/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_search": { "limiter_rate": {"default": "10/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_vault_cooking": { "limiter_rate": {"default": "120/h", "GET": "60/m"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_save_origin": { "limiter_rate": {"default": "120/h", "POST": "10/h"}, "exempted_networks": ["127.0.0.0/8"], }, "swh_api_origin_visit_latest": { "limiter_rate": {"default": "700/m"}, "exempted_networks": ["127.0.0.0/8"], }, }, }, ), "vault": ("dict", {"cls": "remote", "args": {"url": "http://127.0.0.1:5005/",}}), "scheduler": ("dict", {"cls": "remote", "url": "http://127.0.0.1:5008/"}), "development_db": ("string", os.path.join(SETTINGS_DIR, "db.sqlite3")), "test_db": ("string", os.path.join(SETTINGS_DIR, "testdb.sqlite3")), "production_db": ("dict", {"name": "swh-web"}), "deposit": ( "dict", { "private_api_url": "https://deposit.softwareheritage.org/1/private/", "private_api_user": "swhworker", "private_api_password": "some-password", }, ), - "coverage_count_origins": ("bool", False), "e2e_tests_mode": ("bool", False), "es_workers_index_url": ("string", ""), "history_counters_url": ( "string", ( "http://counters1.internal.softwareheritage.org:5011" "/counters_history/history.json" ), ), "client_config": ("dict", {}), "keycloak": ("dict", {"server_url": "", "realm_name": ""}), "graph": ( "dict", {"server_url": "http://graph.internal.softwareheritage.org:5009/graph/"}, ), "status": ( "dict", { "server_url": "https://status.softwareheritage.org/", "json_path": "1.0/status/578e5eddcdc0cc7951000520", }, ), "counters_backend": ("string", "swh-storage"), # or "swh-counters" "staging_server_names": ("list", STAGING_SERVER_NAMES), "instance_name": ("str", "archive-test.softwareheritage.org"), } swhweb_config: Dict[str, Any] = {} def get_config(config_file="web/web"): """Read the configuration file `config_file`. If an environment variable SWH_CONFIG_FILENAME is defined, this takes precedence over the config_file parameter. In any case, update the app with parameters (secret_key, conf) and return the parsed configuration as a dict. If no configuration file is provided, return a default configuration. """ if not swhweb_config: config_filename = os.environ.get("SWH_CONFIG_FILENAME") if config_filename: config_file = config_filename cfg = config.load_named_config(config_file, DEFAULT_CONFIG) swhweb_config.update(cfg) config.prepare_folders(swhweb_config, "log_dir") if swhweb_config.get("search"): swhweb_config["search"] = get_search(**swhweb_config["search"]) else: swhweb_config["search"] = None swhweb_config["storage"] = get_storage(**swhweb_config["storage"]) swhweb_config["vault"] = get_vault(**swhweb_config["vault"]) swhweb_config["indexer_storage"] = get_indexer_storage( **swhweb_config["indexer_storage"] ) swhweb_config["scheduler"] = get_scheduler(**swhweb_config["scheduler"]) swhweb_config["counters"] = get_counters(**swhweb_config["counters"]) return swhweb_config def search(): """Return the current application's search. """ return get_config()["search"] def storage(): """Return the current application's storage. """ return get_config()["storage"] def vault(): """Return the current application's vault. """ return get_config()["vault"] def indexer_storage(): """Return the current application's indexer storage. """ return get_config()["indexer_storage"] def scheduler(): """Return the current application's scheduler. """ return get_config()["scheduler"] def counters(): """Return the current application's counters. """ return get_config()["counters"] diff --git a/swh/web/misc/coverage.py b/swh/web/misc/coverage.py index c42c4aae..b4b84234 100644 --- a/swh/web/misc/coverage.py +++ b/swh/web/misc/coverage.py @@ -1,170 +1,376 @@ -# Copyright (C) 2018-2019 The Software Heritage developers +# Copyright (C) 2018-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information +from collections import Counter, defaultdict +from functools import lru_cache +from typing import Dict, List, Tuple +from urllib.parse import urlparse + +import sentry_sdk + from django.conf.urls import url from django.shortcuts import render from django.views.decorators.clickjacking import xframe_options_exempt -from swh.web.config import get_config - -# Current coverage list of the archive -# TODO: Retrieve that list dynamically instead of hardcoding it -_code_providers = [ - { - "provider_id": "bitbucket", - "provider_url": "https://bitbucket.org/", - "provider_logo": "img/logos/bitbucket.png", - "provider_info": "public repositories from Bitbucket " - "(continuously archived)", - "origin_url_regexp": "^https://bitbucket.org/", - "origin_types": "repositories", - }, - { - "provider_id": "cran", - "provider_url": "https://cran.r-project.org/", - "provider_logo": "img/logos/cran.svg", - "provider_info": "source packages from The Comprehensive R Archive " - "Network (continuously archived)", - "origin_url_regexp": "^https://cran.r-project.org/", - "origin_types": "packages", - }, - { - "provider_id": "debian", - "provider_url": "https://www.debian.org/", - "provider_logo": "img/logos/debian.png", - "provider_info": "source packages from the Debian distribution " - "(continuously archived)", - "origin_url_regexp": "^deb://", - "origin_types": "packages", - }, - { - "provider_id": "framagit", - "provider_url": "https://framagit.org/", - "provider_logo": "img/logos/framagit.png", - "provider_info": "public repositories from Framagit " "(continuously archived)", - "origin_url_regexp": "^https://framagit.org/", - "origin_types": "repositories", - }, - { - "provider_id": "github", - "provider_url": "https://github.com", - "provider_logo": "img/logos/github.png", - "provider_info": "public repositories from GitHub " "(continuously archived)", - "origin_url_regexp": "^https://github.com/", - "origin_types": "repositories", - }, - { - "provider_id": "gitlab", - "provider_url": "https://gitlab.com", - "provider_logo": "img/logos/gitlab.svg", - "provider_info": "public repositories from GitLab " "(continuously archived)", - "origin_url_regexp": "^https://gitlab.com/", - "origin_types": "repositories", - }, - { - "provider_id": "gitorious", - "provider_url": "https://gitorious.org/", - "provider_logo": "img/logos/gitorious.png", - "provider_info": "public repositories from the former Gitorious code " - "hosting service", - "origin_url_regexp": "^https://gitorious.org/", - "origin_types": "repositories", - }, - { - "provider_id": "googlecode", - "provider_url": "https://code.google.com/archive/", - "provider_logo": "img/logos/googlecode.png", - "provider_info": "public repositories from the former Google Code " - "project hosting service", - "origin_url_regexp": "^http.*.googlecode.com/", - "origin_types": "repositories", - }, - { - "provider_id": "gnu", - "provider_url": "https://www.gnu.org", - "provider_logo": "img/logos/gnu.png", - "provider_info": "releases from the GNU project (as of August 2015)", - "origin_url_regexp": "^rsync://ftp.gnu.org/", - "origin_types": "releases", - }, - { - "provider_id": "guix", - "provider_url": "https://guix.gnu.org/", - "provider_logo": "img/logos/guix.svg", - "provider_info": "source code tarballs used to build the Guix package " - "collection", - "origin_url_regexp": "^https://guix.gnu.org/", - "origin_types": "tarballs", - }, - { - "provider_id": "hal", - "provider_url": "https://hal.archives-ouvertes.fr/", - "provider_logo": "img/logos/hal.png", - "provider_info": "scientific software source code deposited in the " - "open archive HAL", - "origin_url_regexp": "^https://hal.archives-ouvertes.fr/", - "origin_types": "deposits", - }, - { - "provider_id": "inria", - "provider_url": "https://gitlab.inria.fr", - "provider_logo": "img/logos/inria.jpg", - "provider_info": "public repositories from Inria GitLab " - "(continuously archived)", - "origin_url_regexp": "^https://gitlab.inria.fr/", - "origin_types": "repositories", - }, - { - "provider_id": "ipol", - "provider_url": "https://www.ipol.im/", - "provider_logo": "img/logos/ipol.png", - "provider_info": "software artifacts associated to the articles " - "IPOL publishes", - "origin_url_regexp": "^https://doi.org/10.5201/ipol", - "origin_types": "tarballs", - }, - { - "provider_id": "npm", - "provider_url": "https://www.npmjs.com/", - "provider_logo": "img/logos/npm.png", - "provider_info": "public packages from the package registry for " - "javascript (continuously archived)", - "origin_url_regexp": "^https://www.npmjs.com/", - "origin_types": "packages", - }, - { - "provider_id": "nixos", - "provider_url": "https://nixos.org/", - "provider_logo": "img/logos/nixos.png", - "provider_info": "source code tarballs used to build the Nix package " - "collection", - "origin_url_regexp": "^https://nix-community.github.io/nixpkgs-swh", - "origin_types": "tarballs", - }, - { - "provider_id": "pypi", - "provider_url": "https://pypi.org", - "provider_logo": "img/logos/pypi.svg", - "provider_info": "source packages from the Python Packaging Index " - "(continuously archived)", - "origin_url_regexp": "^https://pypi.org/", - "origin_types": "packages", - }, -] +from swh.scheduler.model import SchedulerMetrics +from swh.web.common import archive +from swh.web.common.utils import get_deposits_list, reverse +from swh.web.config import scheduler + +_swh_arch_overview_doc = ( + "https://docs.softwareheritage.org/devel/architecture/overview.html" +) + +# Current coverage list of the archive in a high level overview fashion, +# categorized as follow: +# - listed origins: origins discovered using a swh lister +# - legacy: origins where public hosting service has closed +# - deposited: origins coming from swh-deposit +# +# TODO: Store that list in a database table somewhere (swh-scheduler, swh-storage ?) +# and retrieve it dynamically +listed_origins = { + "info": ( + "These software origins get continuously discovered and archived using " + f'the listers implemented by Software Heritage.' + ), + "origins": [ + { + "type": "bitbucket", + "info_url": "https://bitbucket.org", + "info": "public repositories from Bitbucket", + "search_pattern": "https://bitbucket.org/", + }, + { + "type": "cgit", + "info_url": "https://git.zx2c4.com/cgit/about", + "info": "public repositories from cgit instances", + "search_pattern": "cgit", + }, + { + "type": "CRAN", + "info_url": "https://cran.r-project.org", + "info": "source packages from The Comprehensive R Archive Network", + "search_pattern": "https://cran.r-project.org/", + }, + { + "type": "debian", + "info_url": "https://www.debian.org", + "info": "source packages from the Debian distribution", + "search_pattern": "deb://", + }, + { + "type": "gitea", + "info_url": "https://gitea.io", + "info": "public repositories from Gitea instances", + "search_pattern": "gitea", + }, + { + "type": "github", + "info_url": "https://github.com", + "info": "public repositories from GitHub", + "search_pattern": "https://github.com/", + }, + { + "type": "gitlab", + "info_url": "https://gitlab.com", + "info": "public repositories from multiple GitLab instances", + "search_pattern": "gitlab", + }, + { + "type": "guix", + "info_url": "https://guix.gnu.org", + "info": "source code tarballs used to build the Guix package collection", + "visit_types": ["nixguix"], + "search_pattern": "https://guix.gnu.org/sources.json", + }, + { + "type": "GNU", + "info_url": "https://www.gnu.org", + "info": "releases from the GNU project (as of August 2015)", + "search_pattern": "gnu", + }, + { + "type": "launchpad", + "info_url": "https://launchpad.net", + "logo": "img/logos/launchpad.png", + "info": "public repositories from Launchpad", + "search_pattern": "https://git.launchpad.net/", + }, + { + "type": "nixos", + "info_url": "https://nixos.org", + "info": "source code tarballs used to build the Nix package collection", + "visit_types": ["nixguix"], + "search_pattern": ( + "https://nix-community.github.io/nixpkgs-swh/sources-unstable.json" + ), + }, + { + "type": "npm", + "info_url": "https://www.npmjs.com", + "info": "public packages from the package registry for javascript", + "search_pattern": "https://www.npmjs.com", + }, + # apart our forge, most phabricator origins have not been archived + # while they have been listed so do not display those type of origins + # until new listing processes have been executed and origins loaded + # + # { + # "type": "phabricator", + # "info_url": "https://www.phacility.com/phabricator", + # "info": "public repositories from multiple Phabricator instances", + # "search_pattern": "phabricator", + # }, + { + "type": "pypi", + "info_url": "https://pypi.org", + "info": "source packages from the Python Package Index", + "search_pattern": "https://pypi.org", + }, + { + "type": "sourceforge", + "info_url": "https://sourceforge.net", + "info": "public repositories from SourceForge", + "search_pattern": "code.sf.net", + }, + ], +} + +legacy_origins = { + "info": ( + "Discontinued hosting services. Those origins have been archived " + "by Software Heritage." + ), + "origins": [ + { + "type": "gitorious", + "info_url": "https://en.wikipedia.org/wiki/Gitorious", + "info": ( + "public repositories from the former Gitorious code hosting service" + ), + "visit_types": ["git"], + "search_pattern": "https://gitorious.org", + "count": "122,014", + }, + { + "type": "googlecode", + "info_url": "https://code.google.com/archive", + "info": ( + "public repositories from the former Google Code project " + "hosting service" + ), + "visit_types": ["git", "hg", "svn"], + "search_pattern": "googlecode.com", + "count": "790,026", + }, + ], +} + +deposited_origins = { + "info": ( + "These origins are directly pushed into the archive by trusted partners " + f'using the deposit service of Software Heritage.' + ), + "origins": [ + { + "type": "elife", + "info_url": "https://elifesciences.org", + "info": ( + "research software source code associated to the articles " + "eLife publishes" + ), + "search_pattern": "elife.stencila.io", + "visit_types": ["deposit"], + }, + { + "type": "hal", + "info_url": "https://hal.archives-ouvertes.fr", + "info": "scientific software source code deposited in the open archive HAL", + "visit_types": ["deposit"], + "search_pattern": "hal.archives-ouvertes.fr", + }, + { + "type": "ipol", + "info_url": "https://www.ipol.im", + "info": "software artifacts associated to the articles IPOL publishes", + "visit_types": ["deposit"], + "search_pattern": "doi.org/10.5201", + }, + ], +} + + +@lru_cache() +def _get_listers_metrics() -> Dict[str, List[Tuple[str, SchedulerMetrics]]]: + """Returns scheduler metrics in the following mapping: + Dict[lister_name, List[Tuple[instance_name, SchedulerMetrics]]] + as a lister instance has one SchedulerMetrics object per visit type. + """ + listers_metrics = defaultdict(list) + try: + listers = scheduler().get_listers() + scheduler_metrics = scheduler().get_metrics() + for lister in listers: + for metrics in filter( + lambda m: m.lister_id == lister.id, scheduler_metrics + ): + listers_metrics[lister.name].append((lister.instance_name, metrics)) + except Exception as e: + sentry_sdk.capture_exception(e) + return listers_metrics + + +@lru_cache() +def _get_deposits_netloc_counts() -> Counter: + """Return deposit counts per origin url network location. + """ + + def _process_origin_url(origin_url): + parsed_url = urlparse(origin_url) + netloc = parsed_url.netloc + # special treatment for doi.org netloc as it is not specific enough + # for origins mapping + if parsed_url.netloc == "doi.org": + netloc += "/" + parsed_url.path.split("/")[1] + return netloc + + netlocs = [] + try: + deposits = get_deposits_list() + netlocs = [ + _process_origin_url(d["origin_url"]) + for d in deposits + if d["status"] == "done" + ] + except Exception as e: + sentry_sdk.capture_exception(e) + return Counter(netlocs) + + +@lru_cache() +def _get_nixguix_origins_count(origin_url: str) -> int: + """Returns number of archived tarballs for NixOS, aka the number + of branches in a dedicated origin in the archive. + """ + snapshot = archive.lookup_latest_origin_snapshot(origin_url) + if snapshot: + snapshot_sizes = archive.lookup_snapshot_sizes(snapshot["id"]) + return snapshot_sizes["revision"] + else: + return 0 + + +def _search_url(query: str, visit_type: str) -> str: + return reverse( + "browse-search", + query_params={ + "q": query, + "visit_type": visit_type, + "with_visit": "true", + "with_content": "true", + }, + ) @xframe_options_exempt def _swh_coverage(request): - count_origins = get_config()["coverage_count_origins"] + listers_metrics = _get_listers_metrics() + for origins in listed_origins["origins"]: + origins["instances"] = {} + origins_type = origins["type"] + + # special processing for nixos/guix origins as there is no + # scheduler metrics for those + if origins_type in ("nixos", "guix"): + count = _get_nixguix_origins_count(origins["search_pattern"]) + origins["count"] = count + origins["instances"][origins_type] = {"nixguix": {"count": count}} + + if origins_type not in listers_metrics: + continue + + count = sum( + [metrics.origins_known for _, metrics in listers_metrics[origins_type]] + ) + count_never_visited = sum( + [ + metrics.origins_never_visited + for _, metrics in listers_metrics[origins_type] + ] + ) + # CRAN origins are currently marked as not visited while they have been + if origins_type != "CRAN": + count -= count_never_visited + + origins["count"] = f"{count:,}" + origins["instances"] = defaultdict(dict) + for instance, metrics in listers_metrics[origins_type]: + # not yet in production + if metrics.visit_type in ("bzr", "cvs"): + continue + origins["instances"][instance].update( + { + metrics.visit_type: { + "count": metrics.origins_known - metrics.origins_never_visited + } + } + ) + origins["visit_types"] = list( + set(origins["instances"][instance].keys()) + | set(origins.get("visit_types", [])) + ) + + if origins_type == "CRAN": + origins["instances"]["cran"]["cran"] = {"count": origins["count"]} + + # defaultdict cannot be iterated in django template + origins["instances"] = dict(origins["instances"]) + + for origins in listed_origins["origins"]: + instances = origins["instances"] + nb_instances = len(instances) + for instance_name, visit_types in instances.items(): + for visit_type in visit_types: + if nb_instances > 1: + search_pattern = instance_name + else: + search_pattern = origins["search_pattern"] + search_url = _search_url(search_pattern, visit_type) + visit_types[visit_type]["search_url"] = search_url + + for origins in legacy_origins["origins"]: + origins["search_urls"] = {} + for visit_type in origins["visit_types"]: + origins["search_urls"][visit_type] = _search_url( + origins["search_pattern"], visit_type + ) + + deposits_counts = _get_deposits_netloc_counts() + for origins in deposited_origins["origins"]: + if origins["search_pattern"] in deposits_counts: + origins["count"] = f"{deposits_counts[origins['search_pattern']]:,}" + origins["search_urls"] = { + "deposit": _search_url(origins["search_pattern"], "deposit") + } + return render( request, "misc/coverage.html", - {"providers": _code_providers, "count_origins": count_origins}, + { + "origins": { + "Regular crawling": listed_origins, + "Discontinued hosting": legacy_origins, + "On demand archival": deposited_origins, + } + }, ) urlpatterns = [ url(r"^coverage/$", _swh_coverage, name="swh-coverage"), ] diff --git a/swh/web/templates/homepage.html b/swh/web/templates/homepage.html index caa259a6..d49f8411 100644 --- a/swh/web/templates/homepage.html +++ b/swh/web/templates/homepage.html @@ -1,117 +1,113 @@ {% extends "layout.html" %} {% comment %} Copyright (C) 2017-2020 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load static %} {% load render_bundle from webpack_loader %} {% block header %} {% render_bundle 'browse' %} {% endblock %} {% block title %}Welcome to the Software Heritage archive{% endblock %} {% block navbar-content %}

Welcome to the Software Heritage archive

{% endblock %} {% block content %}

... or check our Web API

Overview

The long term goal of the Software Heritage initiative is to collect all publicly available software in source code form together with its development history, replicate it massively to ensure its preservation, and share it with everyone who needs it. The Software Heritage archive is growing over time as we crawl new source code from software projects and development forges.

Content

-

- A significant amount of source code has already been ingested in the Software Heritage - archive. It currently includes: -

Size

As of today the archive already contains and keeps safe for you the following amount of objects:

Source files
0
Commits
0
Projects
0
Directories
0
Authors
0
Releases
0

Note: the counters and graphs above are based on heuristics that might not reflect the exact size of the archive. While the long-term trends shown and ballpark figures are reliable, individual point-in-time values might not be.

{% endblock %} diff --git a/swh/web/templates/misc/coverage.html b/swh/web/templates/misc/coverage.html index d6171e13..5a6c9658 100644 --- a/swh/web/templates/misc/coverage.html +++ b/swh/web/templates/misc/coverage.html @@ -1,91 +1,151 @@ {% comment %} -Copyright (C) 2015-2019 The Software Heritage developers +Copyright (C) 2015-2021 The Software Heritage developers See the AUTHORS file at the top-level directory of this distribution License: GNU Affero General Public License version 3, or any later version See top-level LICENSE file for more information {% endcomment %} {% load js_reverse %} {% load static %} {% load render_bundle from webpack_loader %} Software Heritage archive coverage {% render_bundle 'vendors' %} {% render_bundle 'webapp' %}
-
- {% for provider in providers %} -
-
- - - -
+

+ A significant amount of source code has already been ingested in the Software Heritage + archive. It notably includes the following software origins. +

+ {% for origins_type, origins_data in origins.items %} +
{{ origins_type }}
+

{{ origins_data.info | safe }}

+
+ {% for origins in origins_data.origins %} +
+
+ + {% with 'img/logos/'|add:origins.type.lower|add:'.png' as png_logo %} + + {% endwith %} + + +
+
+ + {% if "instances" in origins %} + + + + + + + + + + {% for instance, visit_types in origins.instances.items %} + {% for visit_type, data in visit_types.items %} + {% if data.count %} + + + + + + + {% endif %} + {% endfor %} + {% endfor %} + + {% else %} + + + + + + + + + {% for visit_type, search_url in origins.search_urls.items %} + + + + + + {% endfor %} + + {% endif %} +
instancetypecountsearch
{{ instance }}{{ visit_type }}{{ data.count }} + + + +
instancetypesearch
{{ origins.type }}{{ visit_type }} + + + +
+
+
+
-
- {% endfor %} -
+ {% endfor %} +
+ {% endfor %}
JavaScript license information - {% if count_origins %} - - {% endif %} diff --git a/swh/web/tests/misc/test_coverage.py b/swh/web/tests/misc/test_coverage.py new file mode 100644 index 00000000..1de2b2c7 --- /dev/null +++ b/swh/web/tests/misc/test_coverage.py @@ -0,0 +1,133 @@ +# Copyright (C) 2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from datetime import datetime, timezone +from itertools import chain +import os +from random import choice, randint +import uuid + +import pytest + +from django.conf import settings +from django.utils.html import escape + +from swh.scheduler.model import LastVisitStatus, ListedOrigin, OriginVisitStats +from swh.web.common.utils import reverse +from swh.web.misc.coverage import ( + _get_deposits_netloc_counts, + _get_listers_metrics, + deposited_origins, + legacy_origins, + listed_origins, +) +from swh.web.tests.django_asserts import assert_contains +from swh.web.tests.utils import check_html_get_response + + +@pytest.fixture(autouse=True) +def clear_lru_caches(): + _get_listers_metrics.cache_clear() + _get_deposits_netloc_counts.cache_clear() + + +def test_coverage_view_no_metrics(client): + """ + Check coverage view can be rendered when scheduler metrics and deposits + data are not available. + """ + url = reverse("swh-coverage") + check_html_get_response( + client, url, status_code=200, template_used="misc/coverage.html" + ) + + +def test_coverage_view_with_metrics(client, swh_scheduler, mocker): + """ + Generate some sample scheduler metrics and some sample deposits + that will be consumed by the archive coverage view, then check + the HTML page gets rendered without errors. + """ + mocker.patch( + "swh.web.misc.coverage._get_nixguix_origins_count" + ).return_value = 30095 + listers = [] + for origins in listed_origins["origins"]: + # create some instances for each lister + for instance in range(randint(1, 5)): + lister = swh_scheduler.get_or_create_lister( + origins["type"], f"instance-{instance}" + ) + listers.append(lister) + # record some sample listed origins + _origins = [] + origin_visit_stats = [] + for i in range(randint(3, 10)): + url = str(uuid.uuid4()) + visit_type = choice(["git", "hg", "svn"]) + _origins.append( + ListedOrigin( + lister_id=lister.id, + url=url, + visit_type=visit_type, + extra_loader_arguments={}, + ) + ) + # set origin visit stats to some origins + if i % 2 == 0: + now = datetime.now(tz=timezone.utc) + origin_visit_stats.append( + OriginVisitStats( + url=url, + visit_type=visit_type, + last_successful=now, + last_visit=now, + last_visit_status=LastVisitStatus.successful, + last_snapshot=os.urandom(20), + ) + ) + # send origins data to scheduler + swh_scheduler.record_listed_origins(_origins) + swh_scheduler.origin_visit_stats_upsert(origin_visit_stats) + + # compute scheduler metrics + swh_scheduler.update_metrics() + + # add some sample deposits + deposits = [] + for origins in deposited_origins["origins"]: + for _ in range(randint(2, 10)): + deposits.append( + { + "origin_url": f"https://{origins['search_pattern']}/{uuid.uuid4()}", + "status": "done", + } + ) + get_deposits_list = mocker.patch("swh.web.misc.coverage.get_deposits_list") + get_deposits_list.return_value = deposits + + # check view gets rendered without errors + url = reverse("swh-coverage") + resp = check_html_get_response( + client, url, status_code=200, template_used="misc/coverage.html" + ) + + # check logos and origins search links are present in the rendered page + for origins in chain( + listed_origins["origins"], + legacy_origins["origins"], + deposited_origins["origins"], + ): + logo_url = f'{settings.STATIC_URL}img/logos/{origins["type"].lower()}.png' + assert_contains(resp, f'src="{logo_url}"') + + if "instances" in origins: + for visit_types in origins["instances"].values(): + for data in visit_types.values(): + if data["count"]: + assert_contains(resp, f'