Changeset View
Standalone View
swh/web/misc/coverage.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | |||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | |||||||||||
# License: GNU Affero General Public License version 3, or any later version | # License: GNU Affero General Public License version 3, or any later version | |||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | |||||||||||
from collections import defaultdict | ||||||||||||
from typing import Dict, List, Tuple | ||||||||||||
from django.conf.urls import url | from django.conf.urls import url | |||||||||||
from django.shortcuts import render | from django.shortcuts import render | |||||||||||
from django.views.decorators.clickjacking import xframe_options_exempt | from django.views.decorators.clickjacking import xframe_options_exempt | |||||||||||
from swh.web.config import get_config | from swh.scheduler.model import SchedulerMetrics | |||||||||||
from swh.web.config import scheduler | ||||||||||||
# Current coverage list of the archive | # Current coverage list of the archive, categorized as follow: | |||||||||||
# - listed origins: origins discovered using a swh lister | ||||||||||||
# - legacy: origins where public hosting service has closed | ||||||||||||
# - deposited: origins coming from swh-deposit | ||||||||||||
# - miscellaneous: other origin types | ||||||||||||
# TODO: Retrieve that list dynamically instead of hardcoding it | # TODO: Retrieve that list dynamically instead of hardcoding it | |||||||||||
_code_providers = [ | _listed_origins = [ | |||||||||||
{ | ||||||||||||
"type": "bitbucket", | ||||||||||||
"info_url": "https://bitbucket.org", | ||||||||||||
"logo": "img/logos/bitbucket.png", | ||||||||||||
"info": "public repositories from Bitbucket (continuously archived)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "cgit", | ||||||||||||
"info_url": "https://git.zx2c4.com/cgit/about", | ||||||||||||
"logo": "img/logos/cgit.png", | ||||||||||||
"info": "public repositories from cgit instances (continuously archived)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "CRAN", | ||||||||||||
"info_url": "https://cran.r-project.org", | ||||||||||||
"logo": "img/logos/cran.svg", | ||||||||||||
"info": ( | ||||||||||||
"source packages from The Comprehensive R Archive " | ||||||||||||
"Network (continuously archived)" | ||||||||||||
), | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "debian", | ||||||||||||
"info_url": "https://www.debian.org", | ||||||||||||
"logo": "img/logos/debian.png", | ||||||||||||
"info": ( | ||||||||||||
"source packages from the Debian distribution (continuously archived)" | ||||||||||||
), | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "gitea", | ||||||||||||
"info_url": "https://gitea.io", | ||||||||||||
"logo": "img/logos/gitea.png", | ||||||||||||
"info": "public repositories from Gitea instances (continuously archived)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "github", | ||||||||||||
"info_url": "https://github.com", | ||||||||||||
"logo": "img/logos/github.png", | ||||||||||||
"info": "public repositories from GitHub (continuously archived)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "gitlab", | ||||||||||||
"info_url": "https://gitlab.com", | ||||||||||||
"logo": "img/logos/gitlab.svg", | ||||||||||||
"info": ( | ||||||||||||
"public repositories from multiple GitLab instances (continuously archived)" | ||||||||||||
), | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "GNU", | ||||||||||||
"info_url": "https://www.gnu.org", | ||||||||||||
"logo": "img/logos/gnu.png", | ||||||||||||
"info": "releases from the GNU project (as of August 2015)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "launchpad", | ||||||||||||
"info_url": "https://launchpad.net", | ||||||||||||
"logo": "img/logos/launchpad.png", | ||||||||||||
"info": "public repositories from Launchpad (continuously archived)", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "npm", | ||||||||||||
"info_url": "https://www.npmjs.com", | ||||||||||||
"logo": "img/logos/npm.png", | ||||||||||||
"info": ( | ||||||||||||
"public packages from the package registry for" | ||||||||||||
"javascript (continuously archived)" | ||||||||||||
), | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"type": "pypi", | ||||||||||||
"info_url": "https://pypi.org", | ||||||||||||
"logo": "img/logos/pypi.svg", | ||||||||||||
"info": ( | ||||||||||||
"source packages from the Python Packaging Index (continuously archived)" | ||||||||||||
), | ||||||||||||
}, | ||||||||||||
{ | { | |||||||||||
"provider_id": "bitbucket", | "type": "sourceforge", | |||||||||||
"provider_url": "https://bitbucket.org/", | "info_url": "https://sourceforge.net", | |||||||||||
"provider_logo": "img/logos/bitbucket.png", | "logo": "img/logos/sourceforge.png", | |||||||||||
"provider_info": "public repositories from Bitbucket " | "info": ("public repositories from SourceForge (continuously archived)"), | |||||||||||
"(continuously archived)", | }, | |||||||||||
"origin_url_regexp": "^https://bitbucket.org/", | ] | |||||||||||
"origin_types": "repositories", | ||||||||||||
}, | _legacy_origins = [ | |||||||||||
{ | { | |||||||||||
"provider_id": "cran", | "type": "gitorious", | |||||||||||
"provider_url": "https://cran.r-project.org/", | "info_url": "https://gitorious.org/", | |||||||||||
"provider_logo": "img/logos/cran.svg", | "logo": "img/logos/gitorious.png", | |||||||||||
"provider_info": "source packages from The Comprehensive R Archive " | "info": "public repositories from the former Gitorious code hosting service", | |||||||||||
"Network (continuously archived)", | "count": "122,014", | |||||||||||
"origin_url_regexp": "^https://cran.r-project.org/", | }, | |||||||||||
"origin_types": "packages", | { | |||||||||||
}, | "type": "googlecode", | |||||||||||
{ | "info_url": "https://code.google.com/archive/", | |||||||||||
"provider_id": "debian", | "logo": "img/logos/googlecode.png", | |||||||||||
"provider_url": "https://www.debian.org/", | "info": ( | |||||||||||
"provider_logo": "img/logos/debian.png", | "public repositories from the former Google Code project hosting service" | |||||||||||
"provider_info": "source packages from the Debian distribution " | ), | |||||||||||
"(continuously archived)", | "count": "790,026", | |||||||||||
"origin_url_regexp": "^deb://", | }, | |||||||||||
"origin_types": "packages", | ] | |||||||||||
}, | ||||||||||||
{ | _deposited_origins = [ | |||||||||||
"provider_id": "framagit", | { | |||||||||||
"provider_url": "https://framagit.org/", | "type": "hal", | |||||||||||
"provider_logo": "img/logos/framagit.png", | "info_url": "https://hal.archives-ouvertes.fr/", | |||||||||||
"provider_info": "public repositories from Framagit " "(continuously archived)", | "logo": "img/logos/hal.png", | |||||||||||
"origin_url_regexp": "^https://framagit.org/", | "info": "scientific software source code deposited in the open archive HAL", | |||||||||||
"origin_types": "repositories", | }, | |||||||||||
}, | { | |||||||||||
{ | "type": "ipol", | |||||||||||
"provider_id": "github", | "info_url": "https://www.ipol.im/", | |||||||||||
"provider_url": "https://github.com", | "logo": "img/logos/ipol.png", | |||||||||||
"provider_logo": "img/logos/github.png", | "info": "software artifacts associated to the articles IPOL publishes", | |||||||||||
"provider_info": "public repositories from GitHub " "(continuously archived)", | }, | |||||||||||
"origin_url_regexp": "^https://github.com/", | ] | |||||||||||
"origin_types": "repositories", | ||||||||||||
}, | _miscellaneous_origins = [ | |||||||||||
{ | { | |||||||||||
"provider_id": "gitlab", | "type": "guix", | |||||||||||
"provider_url": "https://gitlab.com", | "info_url": "https://guix.gnu.org/", | |||||||||||
"provider_logo": "img/logos/gitlab.svg", | "logo": "img/logos/guix.svg", | |||||||||||
"provider_info": "public repositories from GitLab " "(continuously archived)", | "info": "source code tarballs used to build the Guix package collection", | |||||||||||
"origin_url_regexp": "^https://gitlab.com/", | }, | |||||||||||
"origin_types": "repositories", | { | |||||||||||
}, | "type": "nixos", | |||||||||||
{ | "info_url": "https://nixos.org/", | |||||||||||
"provider_id": "gitorious", | "logo": "img/logos/nixos.png", | |||||||||||
"provider_url": "https://gitorious.org/", | "info": "source code tarballs used to build the Nix package collection", | |||||||||||
"provider_logo": "img/logos/gitorious.png", | ||||||||||||
"provider_info": "public repositories from the former Gitorious code " | ||||||||||||
"hosting service", | ||||||||||||
"origin_url_regexp": "^https://gitorious.org/", | ||||||||||||
"origin_types": "repositories", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "googlecode", | ||||||||||||
"provider_url": "https://code.google.com/archive/", | ||||||||||||
"provider_logo": "img/logos/googlecode.png", | ||||||||||||
"provider_info": "public repositories from the former Google Code " | ||||||||||||
"project hosting service", | ||||||||||||
"origin_url_regexp": "^http.*.googlecode.com/", | ||||||||||||
"origin_types": "repositories", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "gnu", | ||||||||||||
"provider_url": "https://www.gnu.org", | ||||||||||||
"provider_logo": "img/logos/gnu.png", | ||||||||||||
"provider_info": "releases from the GNU project (as of August 2015)", | ||||||||||||
"origin_url_regexp": "^rsync://ftp.gnu.org/", | ||||||||||||
"origin_types": "releases", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "guix", | ||||||||||||
"provider_url": "https://guix.gnu.org/", | ||||||||||||
"provider_logo": "img/logos/guix.svg", | ||||||||||||
"provider_info": "source code tarballs used to build the Guix package " | ||||||||||||
"collection", | ||||||||||||
"origin_url_regexp": "^https://guix.gnu.org/", | ||||||||||||
"origin_types": "tarballs", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "hal", | ||||||||||||
"provider_url": "https://hal.archives-ouvertes.fr/", | ||||||||||||
"provider_logo": "img/logos/hal.png", | ||||||||||||
"provider_info": "scientific software source code deposited in the " | ||||||||||||
"open archive HAL", | ||||||||||||
"origin_url_regexp": "^https://hal.archives-ouvertes.fr/", | ||||||||||||
"origin_types": "deposits", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "inria", | ||||||||||||
"provider_url": "https://gitlab.inria.fr", | ||||||||||||
"provider_logo": "img/logos/inria.jpg", | ||||||||||||
"provider_info": "public repositories from Inria GitLab " | ||||||||||||
"(continuously archived)", | ||||||||||||
"origin_url_regexp": "^https://gitlab.inria.fr/", | ||||||||||||
"origin_types": "repositories", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "ipol", | ||||||||||||
"provider_url": "https://www.ipol.im/", | ||||||||||||
"provider_logo": "img/logos/ipol.png", | ||||||||||||
"provider_info": "software artifacts associated to the articles " | ||||||||||||
"IPOL publishes", | ||||||||||||
"origin_url_regexp": "^https://doi.org/10.5201/ipol", | ||||||||||||
"origin_types": "tarballs", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "npm", | ||||||||||||
"provider_url": "https://www.npmjs.com/", | ||||||||||||
"provider_logo": "img/logos/npm.png", | ||||||||||||
"provider_info": "public packages from the package registry for " | ||||||||||||
"javascript (continuously archived)", | ||||||||||||
"origin_url_regexp": "^https://www.npmjs.com/", | ||||||||||||
"origin_types": "packages", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "nixos", | ||||||||||||
"provider_url": "https://nixos.org/", | ||||||||||||
"provider_logo": "img/logos/nixos.png", | ||||||||||||
"provider_info": "source code tarballs used to build the Nix package " | ||||||||||||
"collection", | ||||||||||||
"origin_url_regexp": "^https://nix-community.github.io/nixpkgs-swh", | ||||||||||||
"origin_types": "tarballs", | ||||||||||||
}, | ||||||||||||
{ | ||||||||||||
"provider_id": "pypi", | ||||||||||||
"provider_url": "https://pypi.org", | ||||||||||||
"provider_logo": "img/logos/pypi.svg", | ||||||||||||
"provider_info": "source packages from the Python Packaging Index " | ||||||||||||
"(continuously archived)", | ||||||||||||
"origin_url_regexp": "^https://pypi.org/", | ||||||||||||
"origin_types": "packages", | ||||||||||||
}, | }, | |||||||||||
ardumont: or swh-web's? | ||||||||||||
Done Inline ActionsThose info could be needed in other swh components so I would not put that in swh-web db. anlambert: Those info could be needed in other swh components so I would not put that in swh-web db. | ||||||||||||
Not Done Inline Actionssounds fair. ardumont: sounds fair. | ||||||||||||
] | ] | |||||||||||
def _get_listers_metrics() -> Dict[str, List[Tuple[str, SchedulerMetrics]]]: | ||||||||||||
listers_metrics = defaultdict(list) | ||||||||||||
listers = scheduler().get_listers() | ||||||||||||
scheduler_metrics = {m.lister_id: m for m in scheduler().get_metrics()} | ||||||||||||
for lister in listers: | ||||||||||||
if lister.id in scheduler_metrics: | ||||||||||||
Not Done Inline Actions
That might pose an issue because long term, other deposit clients could also reference their origin through the doi site... The ipol specific doi is https://doi.org/10.5201/ so might be change the netloc entry to what the changes suggest? Or maybe it's yagni, let's consider this when the time comes? At least now you know ^ ;) ardumont: That might pose an issue because long term, other deposit clients could also reference their… | ||||||||||||
Done Inline ActionsI think the best way is to add a special processing for doi.org netloc and add the first path to it. anlambert: I think the best way is to add a special processing for doi.org netloc and add the first path… | ||||||||||||
listers_metrics[lister.name].append( | ||||||||||||
(lister.instance_name, scheduler_metrics[lister.id]) | ||||||||||||
) | ||||||||||||
return listers_metrics | ||||||||||||
@xframe_options_exempt | @xframe_options_exempt | |||||||||||
Not Done Inline Actionsardumont: til [1]
[1] https://docs.djangoproject.com/en/3.2/ref/clickjacking/#setting-x-frame-options… | ||||||||||||
Done Inline ActionsOriginally that view was also integrated in an iframe on softwareheritage.org but this is no longer the case. anlambert: Originally that view was also integrated in an iframe on softwareheritage.org but this is no… | ||||||||||||
def _swh_coverage(request): | def _swh_coverage(request): | |||||||||||
count_origins = get_config()["coverage_count_origins"] | listers_metrics = _get_listers_metrics() | |||||||||||
for origins in _listed_origins: | ||||||||||||
origins_type = origins["type"] | ||||||||||||
if origins_type not in listers_metrics: | ||||||||||||
continue | ||||||||||||
count = sum([d[1].origins_known for d in listers_metrics[origins_type]]) | ||||||||||||
count_never_visited = sum( | ||||||||||||
[d[1].origins_never_visited for d in listers_metrics[origins_type]] | ||||||||||||
) | ||||||||||||
Not Done Inline Actionshow weird!? ardumont: how weird!? | ||||||||||||
Done Inline ActionsYes, do not know what is wrong here but this what we currently have in scheduler_metrics table: softwareheritage-scheduler=> select name, instance_name, last_update, origins_known, origins_enabled, origins_never_visited, origins_with_pending_changes from listers inner join scheduler_metrics on id = lister_id order by name; name | instance_name | last_update | origins_known | origins_enabled | origins_never_visited | origins_with_pending_changes ---------------+------------------------------+-------------------------------+---------------+-----------------+-----------------------+------------------------------ CRAN | cran | 2021-07-13 12:14:07.919027+00 | 18292 | 18292 | 18292 | 0 anlambert: Yes, do not know what is wrong here but this what we currently have in scheduler_metrics table… | ||||||||||||
if origins_type != "CRAN": | ||||||||||||
count -= count_never_visited | ||||||||||||
origins["count"] = f"{count:,}" | ||||||||||||
return render( | return render( | |||||||||||
request, | request, | |||||||||||
"misc/coverage.html", | "misc/coverage.html", | |||||||||||
{"providers": _code_providers, "count_origins": count_origins}, | { | |||||||||||
"origins": { | ||||||||||||
"Listed origins": _listed_origins, | ||||||||||||
"Legacy origins": _legacy_origins, | ||||||||||||
"Deposited origins": _deposited_origins, | ||||||||||||
"Miscellaneous origins": _miscellaneous_origins, | ||||||||||||
} | ||||||||||||
}, | ||||||||||||
) | ) | |||||||||||
urlpatterns = [ | urlpatterns = [ | |||||||||||
url(r"^coverage/$", _swh_coverage, name="swh-coverage"), | url(r"^coverage/$", _swh_coverage, name="swh-coverage"), | |||||||||||
] | ] | |||||||||||
Not Done Inline Actions
(proposal) ardumont: (proposal) | ||||||||||||
Done Inline ActionsThere should be a fallback when this fails (eg. because the scheduler isn't available, or not configured in the testing env) vlorentz: There should be a fallback when this fails (eg. because the scheduler isn't available, or not… | ||||||||||||
Done Inline ActionsI ensured that widget will still be displayed when metrics are not available, only counters info will be missing in that case. anlambert: I ensured that widget will still be displayed when metrics are not available, only counters… | ||||||||||||
Not Done Inline Actionscrashes with KeyError: 'instances' in an empty docker instance. vlorentz: crashes with `KeyError: 'instances'` in an empty docker instance. | ||||||||||||
Not Done Inline Actionsshould be like the other nixguix loader visit type is nixguix [1] That probably explains why we got so few in your screenshot (26) is too few to my taste ;) ardumont: should be like the other nixguix loader visit type is nixguix [1]
But those are complicated… | ||||||||||||
Done Inline ActionsI missed the nixguix loader configuartion for guix, correct origin counts is the number of branches in that origin, will adapt to retrieve it. anlambert: I missed the nixguix loader configuartion for guix, correct origin counts is the number of… | ||||||||||||
Not Done Inline Actionsright! ardumont: right! | ||||||||||||
Not Done Inline Actionsi'd say nixpkgs here. ardumont: i'd say nixpkgs here. | ||||||||||||
Done Inline ActionsI use the type value to get associated png logo in static assets so I will keep nixos value here. anlambert: I use the type value to get associated png logo in static assets so I will keep nixos value… | ||||||||||||
Not Done Inline Actions
ack ardumont: > I use the type value to get associated png logo in static assets so I will keep nixos value… | ||||||||||||
Not Done Inline ActionsNixOS is the linux distribution (that uses nix and nix's dsl for everything). I understood nixpkgs as the superset of all packages (including the one we can install for nixos). ardumont: NixOS is the linux distribution (that uses nix and nix's dsl for everything).
Nixpkgs is the… | ||||||||||||
Not Done Inline Actions
ardumont: | ||||||||||||
Not Done Inline Actions
Scratch my previous suggestion change then ;) ardumont: Scratch my previous suggestion change then ;)
Given one of your last comment, then this method… | ||||||||||||
Done Inline Actionsalready done ;-) update incoming anlambert: already done ;-) update incoming | ||||||||||||
Not Done Inline Actions*thumbs up* ardumont: *thumbs up* |
or swh-web's?