diff --git a/docker/Dockerfile b/docker/Dockerfile index ec80cf5..bd6db86 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,56 +1,57 @@ FROM python:3.7 RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && \ apt-get install -y \ libapr1-dev \ libaprutil1-dev \ libpq-dev \ libsvn-dev \ libsystemd-dev \ + memcached \ postgresql-client \ wait-for-it \ ngrep && \ apt-get install -y --no-install-recommends \ r-base-core \ r-cran-jsonlite && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN useradd -md /srv/softwareheritage -s /bin/bash swh USER swh RUN python3 -m venv /srv/softwareheritage/venv ENV PATH="/srv/softwareheritage/venv/bin:${PATH}" RUN pip install --upgrade pip setuptools wheel RUN pip install gunicorn httpie ARG CASS_DRIVER_NO_CYTHON ENV CASS_DRIVER_NO_CYTHON ${CASS_DRIVER_NO_CYTHON} ARG CASS_DRIVER_BUILD_CONCURRENCY ENV CASS_DRIVER_BUILD_CONCURRENCY ${CASS_DRIVER_BUILD_CONCURRENCY:-1} RUN pip install cassandra-driver # Enforce installation of django 1 otherwise pip will choose django 2 when # installing the swh stack due to poor version dependency support in pip RUN pip install 'Django<2' RUN pip install \ swh-core[db,http] \ swh-deposit[server] \ swh-indexer \ swh-journal \ swh-lister \ swh-loader-core \ swh-loader-git \ swh-loader-mercurial \ swh-loader-svn \ swh-storage \ swh-objstorage \ swh-scheduler \ swh-vault \ swh-web COPY utils/*.sh /srv/softwareheritage/utils/ RUN mkdir -p /srv/softwareheritage/objects RUN rm -rd /srv/softwareheritage/.cache diff --git a/docker/conf/web.yml b/docker/conf/web.yml index 1a07ba0..fa0e6e2 100644 --- a/docker/conf/web.yml +++ b/docker/conf/web.yml @@ -1,69 +1,71 @@ storage: cls: remote args: url: http://swh-storage:5002/ timeout: 1 objstorage: cls: remote args: url: http://swh-objstorage:5003/ indexer_storage: cls: remote args: url: http://swh-idx-storage:5007/ scheduler: cls: remote args: url: http://swh-scheduler:5008/ vault: cls: remote args: url: http://swh-vault:5005/ deposit: private_api_url: https://swh-deposit:5006/1/private/ private_api_user: swhworker private_api_password: '' allowed_hosts: - "*" debug: yes serve_assets: yes development_db: /tmp/db.sqlite3 +production_db: /tmp/db.sqlite3 throttling: + cache_uri: 127.0.0.1:11211 scopes: swh_api: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_api_origin_search: limiter_rate: default: 70/m exempted_networks: - 0.0.0.0/0 swh_api_origin_visit_latest: limiter_rate: default: 700/m exempted_networks: - 0.0.0.0/0 swh_vault_cooking: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_save_origin: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 search: {} diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 93390cb..bd30576 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,415 +1,415 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 # flower: # image: mher/flower # command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower # ports: # - 5055:5555 # depends_on: # - amqp zookeeper: image: wurstmeister/zookeeper restart: always kafka: image: wurstmeister/kafka ports: - "5092:9092" env_file: ./env/kafka.env depends_on: - zookeeper kafka-manager: image: hlebalbau/kafka-manager:stable ports: - "5093:9000" environment: ZK_HOSTS: zookeeper:2181 APPLICATION_SECRET: random-secret command: -Dpidfile.path=/dev/null prometheus: image: prom/prometheus depends_on: - prometheus-statsd-exporter command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped prometheus-rabbitmq-exporter: image: kbudde/rabbitmq-exporter restart: unless-stopped environment: SKIP_QUEUES: "RPC_.*" MAX_QUEUES: 5000 RABBIT_URL: http://amqp:15672 grafana: image: grafana/grafana restart: unless-stopped depends_on: - prometheus environment: GF_SERVER_ROOT_URL: http://localhost:5080/grafana volumes: - "./conf/grafana/provisioning:/etc/grafana/provisioning:ro" - "./conf/grafana/dashboards:/var/lib/grafana/dashboards" nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:11 env_file: - ./env/common_python.env - ./env/scheduler-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-scheduler: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-listener: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-listener depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-runner -p 10 depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" # Graph storage swh-storage-db: image: postgres:11 env_file: - ./env/storage-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-storage: image: swh/stack build: ./ ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage - kafka env_file: - ./env/common_python.env - ./env/storage-db.env environment: SWH_CONFIG_FILENAME: /storage.yml STORAGE_BACKEND: postgresql entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # Object storage swh-objstorage: build: ./ image: swh/stack ports: - 5003:5003 env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /objstorage.yml entrypoint: /entrypoint.sh volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" - "./services/swh-objstorage/entrypoint.sh:/entrypoint.sh:ro" # Indexer storage swh-idx-storage-db: image: postgres:11 env_file: - ./env/indexers-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-idx-storage: image: swh/stack build: ./ ports: - 5007:5007 depends_on: - swh-idx-storage-db env_file: - ./env/common_python.env - ./env/indexers-db.env environment: SWH_CONFIG_FILENAME: /indexer_storage.yml entrypoint: /entrypoint.sh volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" - "./services/swh-indexer-storage/entrypoint.sh:/entrypoint.sh:ro" # Web interface swh-web: build: ./ image: swh/stack ports: - 5004:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage env_file: - ./env/common_python.env environment: VERBOSITY: 3 - DJANGO_SETTINGS_MODULE: swh.web.settings.development + DJANGO_SETTINGS_MODULE: swh.web.settings.production SWH_CONFIG_FILENAME: /web.yml entrypoint: /entrypoint.sh volumes: - "./conf/web.yml:/web.yml:ro" - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" swh-deposit-db: image: postgres:11 env_file: - ./env/deposit-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-deposit: image: swh/stack build: ./ ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler env_file: - ./env/common_python.env - ./env/deposit-db.env environment: VERBOSITY: 3 SWH_CONFIG_FILENAME: /deposit.yml DJANGO_SETTINGS_MODULE: swh.deposit.settings.production entrypoint: /entrypoint.sh volumes: - "./conf/deposit.yml:/deposit.yml:ro" - "./services/swh-deposit/entrypoint.sh:/entrypoint.sh:ro" swh-vault-db: image: postgres:11 env_file: - ./env/vault-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-vault: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/vault-db.env environment: SWH_CONFIG_FILENAME: /vault.yml command: server ports: - 5005:5005 depends_on: - swh-vault-db - swh-objstorage - swh-storage - swh-scheduler entrypoint: /entrypoint.sh volumes: - "./conf/vault.yml:/vault.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" swh-vault-worker: image: swh/stack build: ./ command: worker env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault - swh-storage entrypoint: /entrypoint.sh volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" # Lister Celery workers swh-listers-db: image: postgres:11 env_file: - ./env/listers-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-lister: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/listers-db.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: listers SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-listers-db - swh-scheduler - swh-scheduler-runner - swh-storage - amqp entrypoint: /entrypoint.sh volumes: - "./conf/lister.yml:/lister.yml:ro" - "./services/swh-listers-worker/entrypoint.sh:/entrypoint.sh:ro" # Loader + deposit checker Celery workers swh-loader: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - swh-deposit - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" # Indexer Celery workers swh-indexer: image: swh/stack build: ./ user: swh env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/workers.env environment: SWH_WORKER_INSTANCE: indexer SWH_CONFIG_FILENAME: /indexer.yml CONCURRENCY: 4 entrypoint: /entrypoint.sh depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" - "./services/swh-indexer-worker/entrypoint.sh:/entrypoint.sh:ro" # Journal related swh-indexer-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: - kafka - swh-storage - swh-scheduler volumes: - "./conf/indexer_journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" - "./services/swh-indexer-journal-client/entrypoint.sh:/entrypoint.sh:ro" diff --git a/docker/services/swh-web/entrypoint.sh b/docker/services/swh-web/entrypoint.sh index 7affc60..0e9102a 100755 --- a/docker/services/swh-web/entrypoint.sh +++ b/docker/services/swh-web/entrypoint.sh @@ -1,39 +1,42 @@ #!/bin/bash set -e create_admin_script=" from django.contrib.auth import get_user_model; username = 'admin'; password = 'admin'; email = 'admin@swh-web.org'; User = get_user_model(); if not User.objects.filter(username = username).exists(): User.objects.create_superuser(username, email, password); " source /srv/softwareheritage/utils/pyutils.sh setup_pip case "$1" in "shell") exec bash -i ;; *) + echo "Starting memcached" + memcached& + echo "Migrating db using ${DJANGO_SETTINGS_MODULE}" django-admin migrate --settings=${DJANGO_SETTINGS_MODULE} echo "Creating admin user" echo "$create_admin_script" | python3 -m swh.web.manage shell echo "starting the swh-web server" exec gunicorn --bind 0.0.0.0:5004 \ --threads 2 \ --workers 2 \ --timeout 3600 \ --config 'python:swh.web.gunicorn_config' \ 'django.core.wsgi:get_wsgi_application()' esac