diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ rev: v1.16.0 hooks: - id: codespell - args: [-L cas] + args: [-L cas, -L cmak ] - repo: https://github.com/PyCQA/isort rev: 5.5.2 diff --git a/bin/debpkg-bootstrap-branches b/bin/debpkg-bootstrap-branches --- a/bin/debpkg-bootstrap-branches +++ b/bin/debpkg-bootstrap-branches @@ -7,48 +7,70 @@ package=$(basename $(pwd)) module=${package//-/.} -if [ $# -ne 0 ]; then - last_debian_rev=$1 -else - last_debian_rev=master +if [ $# -lt 1 ]; then + echo "Usage: $0 " + exit 1 fi -git branch -D pristine-tar debian/upstream debian/unstable-swh debian/stretch-swh || true +initial_upstream_tag=$1 -for tag in `git tag -l debian/*`; do - git tag -d $tag -done +shift -for tag in `git tag -l --sort=v:refname v\*`; do - ver=${tag/v/} - firstver=${firstver:-${ver}} - if [ -f ../packages/${package}_${ver}.orig.tar.gz ]; then - continue - fi +read -p "This script will remove local changes and delete debian branches without confirmation. Use it on a clean checkout. Proceed? " -n 1 -r +echo # (optional) move to a new line +if [[ ! $REPLY =~ ^[Yy]$ ]] +then + exit 1 +fi - git checkout $tag - if [ -d swh ]; then - git clean -dfx swh - fi - if [ -f yarn.lock ]; then - (yarn install --frozen-lockfile && yarn build) || true - fi - find . -maxdepth 1 -type d -name '*.egg-info' -exec rm -r '{}' \+ - python3 setup.py egg_info - pname=$(awk '/^Name:/{print $2}' *.egg-info/PKG-INFO) - pver=$(awk '/^Version:/{print $2}' *.egg-info/PKG-INFO) - python3 setup.py sdist -d ../packages - mv ../packages/${pname}-${pver}.tar.gz ../packages/${package}_${ver}.orig.tar.gz +build_depends=() +extra_build_depends=("$@") +extra_binary_depends=() +pkg_architecture=all + +# Clean up existing debian branches and tags + +git branch -D pristine-tar debian/upstream debian/unstable-swh || true + +for tag in $(git tag -l debian/*); do + git tag -d "$tag" done -upstream_tag=$(git describe --abbrev=0) -ver=${upstream_tag/v/} +# create sdist for the initial tag + +firstver=${initial_upstream_tag/v/} +git checkout $initial_upstream_tag -author_name=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggername)%(else)%(authorname)%(end)" "${upstream_tag}") -author_email=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggeremail)%(else)%(authoremail)%(end)" "${upstream_tag}") +if [ -d swh ]; then + git clean -dfx swh +fi +if [ -f yarn.lock ]; then + (yarn install --frozen-lockfile && yarn build) || true +fi + +destdir=$(mktemp -d) + +find . -maxdepth 1 -type d -name '*.egg-info' -exec rm -r '{}' \+ +python3 setup.py egg_info +pname=${module} +pver=$(grep-dctrl -n -s Version -FName $module 2>/dev/null < *.egg-info/PKG-INFO || true) +python3 setup.py sdist -d $destdir +python3 setup.py bdist_wheel -d $destdir +mv ${destdir}/${pname}-${pver}.tar.gz ${destdir}/${package}_${firstver}.orig.tar.gz + +# Check if package is going to be architecture-dependent +wheels=(${destdir}/${pname}-${pver}*.whl) +if [[ "${wheels[0]}" = *$(uname -m)* ]]; then + pkg_architecture=any + extra_build_depends+=('python3-all-dev') + extra_binary_depends+=('${shlibs:Depends}') +fi + +author_name=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggername)%(else)%(authorname)%(end)" "${initial_upstream_tag}") +author_email=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggeremail)%(else)%(authoremail)%(end)" "${initial_upstream_tag}") # Strip <> author_email=${author_email:1:-1} -author_date=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggerdate:iso)%(else)%(authordate:iso)%(end)" "${upstream_tag}") +author_date=$(git tag -l --format="%(if)%(*objecttype)%(then)%(taggerdate:iso)%(else)%(authordate:iso)%(end)" "${initial_upstream_tag}") export DEBEMAIL="${author_email}" export DEBFULLNAME="${author_name}" @@ -59,52 +81,57 @@ export GIT_COMMITTER_EMAIL="${author_email}" export GIT_COMMITTER_DATE="${author_date}" -git checkout $upstream_tag +git checkout $initial_upstream_tag git clean -dfx git checkout -b debian/upstream git ls-tree --name-only HEAD | xargs rm -r -tar -x --strip-components 1 -f ../packages/${package}_${firstver}.orig.tar.gz +tar -x --strip-components 1 -f ${destdir}/${package}_${firstver}.orig.tar.gz git add . git commit --no-verify -m "Import upstream version ${firstver}" git tag debian/upstream/${firstver} git checkout --orphan pristine-tar -pristine-tar commit ../packages/${package}_${firstver}.orig.tar.gz +pristine-tar commit ${destdir}/${package}_${firstver}.orig.tar.gz git checkout debian/upstream git checkout -b debian/unstable-swh -git checkout ${last_debian_rev} -- debian -cat > debian/gbp.conf << EOF -[DEFAULT] -upstream-branch=debian/upstream -upstream-tag=debian/upstream/%(version)s -upstream-vcs-tag=v%(version)s -debian-branch=debian/unstable-swh -pristine-tar=True -EOF - -rm debian/changelog +cp -r $(dirname $0)/../debian-template debian/ + +summary=$(grep-dctrl -n -s Summary -FName ${module} 2>/dev/null < *.egg-info/PKG-INFO || true) +summary_escaped="${summary//\//\\\/}" + +extra_build_depends_sub="$(printf "\\\\n %s," "${extra_build_depends[@]}")" +extra_binary_depends_sub="$(printf "\\\\n %s," "${extra_binary_depends[@]}")" + +for file in debian/*; do + if [ -f "$file" ]; then + sed -i \ + -e "s/@DOTTED_PKG_NAME@/$module/g" \ + -e "s/@DASHED_PKG_NAME@/$package/g" \ + -e "s/@YEAR@/$(date +%Y)/g" \ + -e "s/@PKG_DESCRIPTION@/${summary_escaped}/g" \ + -e "s/@EXTRA_BUILD_DEPENDS@/${extra_build_depends_sub:2}/" \ + -e "s/@EXTRA_BINARY_DEPENDS@/${extra_binary_depends_sub:2}/" \ + -e "s/@PKG_ARCHITECTURE@/${pkg_architecture}/g" \ + "$file" + fi +done + +#wrap-and-sort -an + faketime "${author_date}" dch --create --package ${package} -v ${firstver}-1~swh1 'Initial release' + git tag -l --format="%(contents:subject)%(if)%(contents:body)%(then)%0a%(contents:body)%(end)" "${upstream_tag}" | sed -E -e '/^$/d' -e 's/^ *(- *)?//' | while read line; do faketime "${author_date}" dch "${line}" done faketime "${author_date}" dch -D unstable-swh --force-distribution '' +$(dirname $0)/debpkg-update-automatic-dependencies | sponge debian/control + git add debian git commit --no-verify -m "Updated debian directory for version ${firstver}" -git checkout -b debian/buster-swh -faketime "${author_date}" dch --bpo -D buster-swh --force-distribution '' -git add debian/changelog -sed -i s/unstable/buster/ debian/gbp.conf -git add debian/gbp.conf -git commit --no-verify -m "Updated debian buster backport directory for version ${firstver}" - -for tag in `git tag -l --sort=v:refname v\* | tail -n +2`; do - version=${tag/v/} - echo $tag: $version - ../bin/debpkg-bump-version $version ../packages/${package}_${version}.orig.tar.gz -done +rm -r ${destdir} diff --git a/bin/debpkg-get-dependencies b/bin/debpkg-get-dependencies new file mode 100755 --- /dev/null +++ b/bin/debpkg-get-dependencies @@ -0,0 +1,104 @@ +#!/usr/bin/env python3 + +import glob +import logging +import os +import sys + +sys.path.append("/usr/share/dh-python") + +from dhpython.pydist import REQUIRES_RE, guess_dependency, sensible_pname +from dhpython.version import Version + +log = logging.getLogger(__name__) + + +def parse_requirements(filename): + requirements = [] + if not os.path.exists(filename): + return requirements + + with open(filename) as f: + for line in f.readlines(): + if "#" in line: + line = line[: line.index("#")] + line = line.strip() + if not line: + continue + requirements.append(line) + return requirements + + +def dependencies_from_requirements(directory): + requirements = [] + for filename in glob.glob(os.path.join(directory, "requirements*.txt")): + requirements.extend(parse_requirements(filename)) + + for req in set(requirements): + dep = guess_dependency("cpython3", req, accept_upstream_versions=True) + if dep: + yield dep + else: + # fallback to sensible_pname + req_d = REQUIRES_RE.match(req) + if not req_d: + log.info( + "please ask dh_python3 author to fix REQUIRES_RE " + "or your upstream author to fix requires.txt" + ) + raise Exception("requirement is not valid: %s" % req) + req_d = req_d.groupdict() + name = req_d["name"] + pname = sensible_pname("cpython3", name) + if req_d.get("operator") == ">=": + yield f"{pname} (>= {req_d['version']})" + else: + yield pname + + if name == "swh.core" and "db" in req_d["enabled_extras"]: + yield "python3-swh.core.db.pytestplugin" + + +def get_all_dependencies(directory): + dependencies = ( + [ + "debhelper-compat (= 13)", + "python3-all", + "python3-setuptools", + "python3-setuptools-scm", + "dh-python (>= 3)", + ] + + sys.argv[1:] + + list(dependencies_from_requirements(directory)) + ) + + deduped_dependencies = set() + versions_dict = {} + for dep in dependencies: + if " " not in dep: + deduped_dependencies.add(dep) + continue + pkg, constraint = dep.split(" ", 1) + if not constraint.startswith("(>= "): + deduped_dependencies.add(dep) + continue + + version = Version(constraint[4:-1]) + if pkg in versions_dict: + versions_dict[pkg] = max(versions_dict[pkg], version) + else: + versions_dict[pkg] = version + + for pkg, minver in versions_dict.items(): + if minver.micro is not None: + minver_str = f"{minver.major}.{minver.minor}.{minver.micro}" + else: + minver_str = str(minver) + deduped_dependencies.add(f"{pkg} (>= {minver_str})") + + return sorted(deduped_dependencies) + + +if __name__ == "__main__": + for dep in get_all_dependencies("."): + print(dep) diff --git a/bin/debpkg-update-automatic-dependencies b/bin/debpkg-update-automatic-dependencies new file mode 100755 --- /dev/null +++ b/bin/debpkg-update-automatic-dependencies @@ -0,0 +1,31 @@ +#!/usr/bin/perl + +use strict; +use v5.26; + +use File::Basename; + +open( my $control, "<", "debian/control" ) or die $!; + +my $dir = dirname $0; +open( my $deps, "-|", "$dir/debpkg-get-dependencies" ) or die $!; + +my $in_build_depends = 0; + +while (<$control>) { + if ( $in_build_depends && /^[^ ]/ ) { + $in_build_depends = 0; + } + + print unless $in_build_depends; + + if (/^Build-Depends:/) { + $in_build_depends = 1; + while (<$deps>) { + chomp; + print " $_,\n"; + } + } +} + +0; diff --git a/bin/debpkg-update-metadata b/bin/debpkg-update-metadata deleted file mode 100755 --- a/bin/debpkg-update-metadata +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python3 - -import glob -import os -import sys - -sys.path.append("/usr/share/dh-python") - -from dhpython.pydist import guess_dependency # noqa - - -def parse_requirements(filename): - requirements = [] - if not os.path.exists(filename): - return requirements - - with open(filename) as f: - for line in f.readlines(): - line = line.strip() - if not line or line.startswith("#"): - continue - requirements.append(line) - return requirements - - -def dependencies_from_requirements(directory): - requirements = [] - for filename in glob.glob(os.path.join(directory, "requirements*.txt")): - requirements.extend(parse_requirements(filename)) - - for req in set(requirements): - yield guess_dependency("cpython3", req, accept_upstream_versions=True) - - -def get_all_dependencies(directory): - dependencies = ["debhelper (>= 11)", "python3-all", "dh-python (>= 3)"] - yield from dependencies - yield from dependencies_from_requirements(directory) - - -if __name__ == "__main__": - if len(sys.argv) != 2: - dir = "." - else: - dir = sys.argv[1] - for dep in get_all_dependencies(dir): - print(dep) diff --git a/bin/ls-all-repos b/bin/ls-all-repos --- a/bin/ls-all-repos +++ b/bin/ls-all-repos @@ -30,6 +30,7 @@ dir=${dir#./} # strip "./" prefix if [ "$dir" == ".git" \ -o "$dir" == "bin" \ + -o "$dir" == "debian-template" \ -o "$dir" == "doc" \ -o "$dir" == "docker" \ -o "$dir" == "packages" \ diff --git a/debian-template/control b/debian-template/control new file mode 100644 --- /dev/null +++ b/debian-template/control @@ -0,0 +1,16 @@ +Source: @DASHED_PKG_NAME@ +Maintainer: Software Heritage developers +Section: python +Priority: optional +Build-Depends: +# The above dependencies are automatically generated. Add extra dependencies below: +@EXTRA_BUILD_DEPENDS@ +Rules-Requires-Root: no +Standards-Version: 4.6.0 +Homepage: https://forge.softwareheritage.org/source/@DASHED_PKG_NAME@ + +Package: python3-@DOTTED_PKG_NAME@ +Architecture: @PKG_ARCHITECTURE@ +Depends: ${misc:Depends}, ${python3:Depends}, +@EXTRA_BINARY_DEPENDS@ +Description: @PKG_DESCRIPTION@ diff --git a/debian-template/copyright b/debian-template/copyright new file mode 100644 --- /dev/null +++ b/debian-template/copyright @@ -0,0 +1,22 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ + +Files: * +Copyright: 2015-@YEAR@ The Software Heritage developers +License: GPL-3+ + +License: GPL-3+ + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + . + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + . + You should have received a copy of the GNU General Public License + along with this program. If not, see . + . + On Debian systems, the complete text of the GNU General Public + License version 3 can be found in `/usr/share/common-licenses/GPL-3'. diff --git a/debian-template/gbp.conf b/debian-template/gbp.conf new file mode 100644 --- /dev/null +++ b/debian-template/gbp.conf @@ -0,0 +1,6 @@ +[DEFAULT] +upstream-branch=debian/upstream +upstream-tag=debian/upstream/%(version)s +upstream-vcs-tag=v%(version)s +debian-branch=debian/unstable-swh +pristine-tar=True diff --git a/debian-template/rules b/debian-template/rules new file mode 100755 --- /dev/null +++ b/debian-template/rules @@ -0,0 +1,11 @@ +#!/usr/bin/make -f + +export PYBUILD_NAME=@DOTTED_PKG_NAME@ +export PYBUILD_TEST_ARGS=-vv + +%: + dh $@ --with python3 --buildsystem=pybuild + +override_dh_install: + dh_install + rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py diff --git a/debian-template/source/format b/debian-template/source/format new file mode 100644 --- /dev/null +++ b/debian-template/source/format @@ -0,0 +1 @@ +3.0 (quilt) diff --git a/docker/Dockerfile b/docker/Dockerfile --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -12,6 +12,7 @@ apt-get install -y \ libapr1-dev \ libaprutil1-dev \ + libcmph-dev \ libpq-dev \ libsvn-dev \ libsystemd-dev \ diff --git a/docker/README.rst b/docker/README.rst --- a/docker/README.rst +++ b/docker/README.rst @@ -389,14 +389,12 @@ If you hack the code of one or more archive components with a virtual env based setup as described in the -[[https://docs.softwareheritage.org/devel/developer-setup.html|developer -setup guide]], you may want to test your modifications in a working +`developer setup guide `__, you may want to test your modifications in a working Software Heritage instance. The simplest way to achieve this is to use this docker-based environment. If you haven’t followed the -[[https://docs.softwareheritage.org/devel/developer-setup.html|developer -setup guide]], you must clone the the [swh-environment] repo in your +`developer setup guide `__, you must clone the the [swh-environment] repo in your ``swh-environment`` directory:: ~/swh-environment$ git clone https://forge.softwareheritage.org/source/swh-environment.git . @@ -406,8 +404,7 @@ directory. Also note that if you haven’t done it yet and you want to hack the source code of one or more Software Heritage packages, you really should read the -[[https://docs.softwareheritage.org/devel/developer-setup.html|developer -setup guide]]. +`developer setup guide `__. From there, we will checkout or update all the swh packages:: diff --git a/docker/conf/loader.yml b/docker/conf/loader.yml --- a/docker/conf/loader.yml +++ b/docker/conf/loader.yml @@ -23,6 +23,8 @@ - swh.loader.package.archive.tasks.LoadArchive - swh.loader.package.cran.tasks.LoadCRAN - swh.loader.package.debian.tasks.LoadDebian + - swh.loader.package.maven.tasks.LoadMaven + - swh.loader.package.nixguix.tasks.LoadNixguix - swh.loader.package.npm.tasks.LoadNpm - swh.loader.package.pypi.tasks.LoadPyPI - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository diff --git a/docker/conf/nginx.conf b/docker/conf/nginx.conf --- a/docker/conf/nginx.conf +++ b/docker/conf/nginx.conf @@ -19,6 +19,11 @@ # upstreams. resolver 127.0.0.11 valid=30s; + log_format combined_with_duration '$remote_addr - $remote_user [$time_local] ' + '"$request" $status $body_bytes_sent ' + '"$http_referer" "$http_user_agent" $request_time'; + access_log /dev/stdout combined_with_duration; + server { listen 5080 default_server; diff --git a/docker/conf/web.yml b/docker/conf/web.yml --- a/docker/conf/web.yml +++ b/docker/conf/web.yml @@ -1,7 +1,7 @@ storage: cls: remote url: http://swh-storage:5002/ - timeout: 1 + timeout: 5 indexer_storage: cls: remote diff --git a/docker/docker-compose.search.yml b/docker/docker-compose.search.yml --- a/docker/docker-compose.search.yml +++ b/docker/docker-compose.search.yml @@ -2,9 +2,12 @@ services: elasticsearch: - image: elasticsearch:7.9.3 env_file: - ./env/elasticsearch.env + image: elastic/elasticsearch:7.15.2 + environment: + - ingest.geoip.downloader.enabled=false + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" ports: - 9200:9200 volumes: diff --git a/docker/services/swh-storage/entrypoint.sh b/docker/services/swh-storage/entrypoint.sh --- a/docker/services/swh-storage/entrypoint.sh +++ b/docker/services/swh-storage/entrypoint.sh @@ -40,9 +40,11 @@ echo Starting the swh-storage API server exec gunicorn --bind 0.0.0.0:5002 \ --reload \ + --access-logfile /dev/stdout \ + --access-logformat "%(t)s %(r)s %(s)s %(b)s %(M)s" \ --threads 4 \ --workers 2 \ - --log-level DEBUG \ + --log-level INFO \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.storage.api.server:make_app_from_configfile()' diff --git a/docker/services/swh-web/entrypoint.sh b/docker/services/swh-web/entrypoint.sh --- a/docker/services/swh-web/entrypoint.sh +++ b/docker/services/swh-web/entrypoint.sh @@ -23,6 +23,8 @@ exec bash -i ;; "cron") + wait-for-it swh-web:5004 -s --timeout=0 + echo "Start periodic save code now refresh statuses routine (in background)" exec sh -c 'trap exit TERM INT; while :; do (date && django-admin refresh_savecodenow_statuses \ diff --git a/docker/tests/run_tests.sh b/docker/tests/run_tests.sh --- a/docker/tests/run_tests.sh +++ b/docker/tests/run_tests.sh @@ -28,7 +28,7 @@ RED='\033[0;31m' NC='\033[0m' else - DOCO_OPTIONS='--no-ansi' + DOCO_OPTIONS='--ansi never' fi # Remove previously dumped service logs file if any