diff --git a/Dockerfile.indexers b/Dockerfile.indexers new file mode 100644 index 0000000..a3defb9 --- /dev/null +++ b/Dockerfile.indexers @@ -0,0 +1,8 @@ +ARG BASE + +FROM $BASE + +COPY swh-storage /app/swh-storage +COPY swh-indexer /app/swh-indexer + +RUN pip install /app/swh-storage && pip install /app/swh-indexer diff --git a/docker/utils/pgsql.sh b/docker/utils/pgsql.sh index 2053b6c..9e779f1 100644 --- a/docker/utils/pgsql.sh +++ b/docker/utils/pgsql.sh @@ -1,53 +1,57 @@ #!/bin/bash setup_pgsql () { : > ~/.pgpass : > ~/.pg_service.conf - echo "${PGHOST}:5432:template1:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass - echo "${PGHOST}:5432:${PGUSER}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass - echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass + # Configure the standard PG env variables (out of the container expected env + # variables) + [ -z "$PGDATABASE" ] && export PGDATABASE=$POSTGRES_DB + [ -z "$PGPASSWORD" ] && export PGPASSWORD=$POSTGRES_PASSWORD + + echo "${PGHOST}:5432:${PGUSER}:${PGUSER}:${PGPASSWORD}" >> ~/.pgpass + echo "${PGHOST}:5432:${PGDATABASE}:${PGUSER}:${PGPASSWORD}" >> ~/.pgpass cat >> ~/.pg_service.conf <> ~/.pgpass cat >> ~/.pg_service.conf </dev/null 2>/dev/null } diff --git a/kubernetes/71-idx-storage.yml b/kubernetes/71-idx-storage.yml new file mode 100644 index 0000000..387770a --- /dev/null +++ b/kubernetes/71-idx-storage.yml @@ -0,0 +1,224 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: idx-storage +data: + config.yml: | + indexer_storage: + cls: local + db: postgresql:///?service=swh + journal_writer: + cls: kafka + args: + brokers: + - kafka + prefix: swh.journal.indexed + client_id: swh.idx_storage.master + producer_config: + message.max.bytes: 1000000000 + entrypoint-init-db.sh: | + #!/bin/bash + + set -e + + source /srv/softwareheritage/utils/pgsql.sh + + echo -n "waiting for database availability" + wait_pgsql ${PGDATABASE} + + echo Database setup + echo Creating extensions... + swh db init-admin --db-name ${PGDATABASE} indexer + + echo Initializing the database... + swh db init --db-name ${PGDATABASE} indexer + + entrypoint.sh: | + #!/bin/bash + + set -e + + source /srv/softwareheritage/utils/pgsql.sh + setup_pgsql + + echo Starting the swh-indexer-storage RPC server + exec gunicorn --bind 0.0.0.0:5007 \ + --reload \ + --threads 4 \ + --workers 2 \ + --log-level ${LOGLEVEL} \ + --timeout 3600 \ + --config 'python:swh.core.api.gunicorn_config' \ + 'swh.indexer.storage.api.server:make_app_from_configfile()' +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: idx-storage + labels: + app: idx-storage +spec: + replicas: 1 + selector: + matchLabels: + app: idx-storage + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: idx-storage + spec: + initContainers: + - name: idx-storage-init + image: swh/indexers:latest + imagePullPolicy: Always + command: + - /entrypoint.sh + env: + - name: PGHOST + value: "idx-storage-db" + - name: PGUSER + valueFrom: + configMapKeyRef: + name: idx-storage-db + key: POSTGRES_USER + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: idx-storage-db + key: POSTGRES_PASSWORD + - name: PGDATABASE + valueFrom: + configMapKeyRef: + name: idx-storage-db + key: POSTGRES_DB + - name: SWH_CONFIG_FILENAME + value: /etc/softwareheritage/config.yml + volumeMounts: + - name: db-password + mountPath: /run/secrets/postgres-password + subPath: POSTGRES_PASSWORD + readOnly: true + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint-init-db.sh + readOnly: true + containers: + - name: idx-storage + image: swh/indexers:latest + imagePullPolicy: Always + command: + - /entrypoint.sh + ports: + - containerPort: 5007 + readinessProbe: + httpGet: + path: / + port: 5007 + scheme: "HTTP" + initialDelaySeconds: 0 + failureThreshold: 2 + periodSeconds: 10 + startupProbe: + httpGet: + path: / + port: 5007 + scheme: "HTTP" + initialDelaySeconds: 5 + failureThreshold: 30 + periodSeconds: 1 + env: + - name: PORT + value: "5007" + - name: STATSD_HOST + value: "prometheus-statsd-exporter" + - name: STATSD_PORT + value: "9125" + - name: POSTGRES_PASSWORD_FILE + value: "/run/secrets/postgres-password" + - name: PGHOST + value: "idx-storage-db" + - name: PGUSER + valueFrom: + configMapKeyRef: + name: idx-storage-db + key: POSTGRES_USER + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: idx-storage-db + key: POSTGRES_PASSWORD + - name: PGDATABASE + valueFrom: + configMapKeyRef: + name: idx-storage-db + key: POSTGRES_DB + - name: SWH_CONFIG_FILENAME + value: /etc/softwareheritage/config.yml + - name: LOGLEVEL + value: INFO + volumeMounts: + - name: db-password + mountPath: /run/secrets/postgres-password + subPath: POSTGRES_PASSWORD + readOnly: true + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + resources: + requests: + memory: "512Mi" + cpu: "200m" + limits: + memory: "1536Mi" + cpu: "300m" + volumes: + - name: config + configMap: + name: idx-storage + defaultMode: 0777 + - name: db-password + secret: + secretName: idx-storage-db +--- +apiVersion: v1 +kind: Service +metadata: + name: idx-storage +spec: + type: ClusterIP + selector: + app: idx-storage + ports: + - port: 5007 + targetPort: 5007 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: idx-storage +spec: + rules: + - host: idx-storage.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: idx-storage + port: + number: 5007 diff --git a/kubernetes/Readme.md b/kubernetes/Readme.md index fa7e9cd..b5ec1b1 100644 --- a/kubernetes/Readme.md +++ b/kubernetes/Readme.md @@ -1,113 +1,113 @@ ## Prerequisite ### Directories ``` sudo mkdir -p /srv/softwareheritage-kube/dev/{objects,storage-db,scheduler-db,kafka,web-db,prometheus,zookeeper/data,zookeeper/datalog,grafana,elasticsearch,redis,registry,idx-storage-db} sudo chown 1000:1000 /srv/softwareheritage-kube/dev/{objects,elasticsearch} sudo chown -R 999:999 /srv/softwareheritage-kube/dev/*-db sudo chown 472:0 /srv/softwareheritage-kube/dev/grafana sudo chown nobody:nogroup /srv/softwareheritage-kube/dev/prometheus ``` ### Registry - Add the following line on your `/etc/hosts` file. It's needed to be able to push the image to it from docker ``` 127.0.0.1 registry.default ``` - Start the registry in kubernetes ``` kubectl apply -f kubernetes/registry/00-registry.yml ``` If you are using k3s, the registry must be declared on the `/etc/rancher/k3s/registries.yaml` as it's insecure: ``` mirrors: registry.default: endpoint: - "http://registry.default/v2/" ``` ## Build the base image ``` cd docker docker build --no-cache -t swh/stack . docker tag swh/stack:latest registry.default/swh/stack:latest docker push registry.default/swh/stack:latest ``` ## Development To access the services, they must be declared on the `/etc/hosts` file: ``` -127.0.0.1 objstorage.default storage.default webapp.default scheduler.default rabbitmq.default grafana.default prometheus.default counters.default registry-ui +127.0.0.1 objstorage.default storage.default webapp.default scheduler.default rabbitmq.default grafana.default prometheus.default counters.default registry-ui idx-storage.default ``` ### Skaffold To start the development environment using skaffold, use the following command: ``` skaffold --default-repo registry.default dev ``` It will build the images, deploy them on the local registry and start the services. It will monitor the projects to detect the changes and restart the containers when needed ## Basic commands Hint: Use tabulation to ease finding out new commands - List pods: ``` $ kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 46m objstorage-8587d58b68-76jbn 1/1 Running 0 12m ``` - List services: ``` $ kubectl get services objstorage NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE objstorage ClusterIP 10.43.185.191 5003/TCP 17m ``` - Check service is responding: ``` $ curl http://$(kubectl get services objstorage -o jsonpath='{.spec.clusterIP}'):5003 SWH Objstorage API server% $ curl http://$(kubectl get services scheduler -o jsonpath='{.spec.clusterIP}'):5008 Software Heritage scheduler RPC server

You have reached the Software Heritage scheduler RPC server.
See its documentation and API for more information

% ``` - Force a pod to redeploy itself ``` kubectl delete pod storage-db-- ``` - Clean up registry due to too much disk space used ``` kubectl exec -ti $(kubectl get pods --no-headers -l app=registry | grep -i running | awk '{print $1}) -- /bin/registry garbage-collect -m /etc/docker/registry/config.yml ``` diff --git a/skaffold.yaml b/skaffold.yaml index ccbd3ad..79b99c9 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,89 +1,96 @@ apiVersion: skaffold/v2beta13 kind: Config metadata: name: swh-environment build: local: useBuildkit: true concurrency: 2 artifacts: - image: swh/stack-base context: docker docker: dockerfile: Dockerfile - image: swh/objstorage docker: dockerfile: Dockerfile.objstorage requires: - image: swh/stack-base alias: BASE - image: swh/storage docker: dockerfile: Dockerfile.storage requires: - image: swh/stack-base alias: BASE - image: swh/scheduler docker: dockerfile: Dockerfile.scheduler requires: - image: swh/stack-base alias: BASE - image: swh/webapp docker: dockerfile: Dockerfile.webapp requires: - image: swh/stack-base alias: BASE - image: swh/loaders docker: dockerfile: Dockerfile.loaders requires: - image: swh/stack-base alias: BASE - image: swh/listers docker: dockerfile: Dockerfile.listers requires: - image: swh/stack-base alias: BASE - image: swh/grafana docker: dockerfile: Dockerfile.grafana - image: swh/search docker: dockerfile: Dockerfile.search requires: - image: swh/stack-base alias: BASE - image: swh/counters docker: dockerfile: Dockerfile.counters requires: - image: swh/stack-base alias: BASE + - image: swh/indexers + docker: + dockerfile: Dockerfile.indexers + requires: + - image: swh/stack-base + alias: BASE deploy: kubectl: manifests: - kubernetes/01-journal.yml - kubernetes/02-monitoring.yml - kubernetes/05-storage-db.yml - kubernetes/10-objstorage.yml - kubernetes/11-storage.yml - kubernetes/15-scheduler-db.yml - kubernetes/16-rabbitmq.yml - kubernetes/20-scheduler.yml - kubernetes/21-scheduler-runner.yml - kubernetes/22-scheduler-listener.yml - kubernetes/23-scheduler-journal-client.yml - kubernetes/29-web-db.yml - kubernetes/30-webapp.yml - kubernetes/40-loaders.yml - kubernetes/45-listers.yml - kubernetes/50-elasticsearch.yml - kubernetes/55-search.yml - kubernetes/56-search-journal-client.yml - kubernetes/60-redis.yml - kubernetes/61-counters.yml - kubernetes/62-counters-journal-client.yml - kubernetes/70-idx-storage-db.yml + - kubernetes/71-idx-storage.yml