diff --git a/kubernetes/01-journal.yml b/kubernetes/01-journal.yml index 1e7d024..946594f 100644 --- a/kubernetes/01-journal.yml +++ b/kubernetes/01-journal.yml @@ -1,66 +1,155 @@ - # zookeeper: - # image: wurstmeister/zookeeper - # restart: always - - # kafka: - # image: wurstmeister/kafka - # ports: - # - "5092:9092" - # env_file: ./env/kafka.env - # environment: - # KAFKA_CREATE_TOPICS: swh.journal.objects.content:1:1, - # swh.journal.objects.origin:1:1, - # swh.journal.objects.origin_visit:1:1, - # swh.journal.objects.origin_visit_status:1:1, - # swh.journal.objects.skipped_content:1:1, - # swh.journal.objects.revision:1:1, - # swh.journal.indexed.origin_intrinsic_metadata:1:1 - # depends_on: - # - zookeeper - # healthcheck: - # test: "[ `JMX_PORT= kafka-topics.sh --list --zookeeper zookeeper:2181 | wc -l` -ge 6 ]" - # interval: 10s - # timeout: 5s - # retries: 10 --- apiVersion: apps/v1 kind: Deployment metadata: name: zookeeper labels: app: zookeeper spec: replicas: 1 selector: matchLabels: app: zookeeper template: metadata: labels: app: zookeeper spec: containers: - - name: zookeeper - image: wurstmeister/zookeeper - imagePullPolicy: Always - ports: - - containerPort: 2181 - resources: - requests: - memory: "512Mi" - cpu: "500m" - limits: - memory: "1024Mi" - cpu: "500m" + - name: zookeeper + image: wurstmeister/zookeeper + imagePullPolicy: Always + ports: + - containerPort: 2181 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1024Mi" + cpu: "500m" --- apiVersion: v1 kind: Service metadata: name: zookeeper spec: type: ClusterIP selector: app: zookeeper ports: - port: 2181 targetPort: 2181 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kafka +data: + KAFKA_ADVERTISED_HOST_NAME: kafka + KAFKA_ADVERTISED_PORT: "9092" + KAFKA_PORT: "9092" + KAFKA_LISTENERS: PLAINTEXT://:9092 + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092 + KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 + KAFKA_MESSAGE_MAX_BYTES: "104857600" + KAFKA_LOG_DIRS: /kafka/logs + KAFKA_JMX_OPTS: -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=kafka -Dcom.sun.management.jmxremote.rmi.port=1099 + JMX_PORT: "1099" + LOG4J_LOGGER_KAFKA_AUTHORIZER_LOGGER: DEBUG, authorizerAppender +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: kafka-pv +spec: + capacity: + storage: 10Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: kafka-pv + local: + path: /srv/softwareheritage-kube/dev/kafka + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/os + operator: In + values: + - linux +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: kafka-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: kafka-pv + resources: + requests: + storage: 10Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: kafka +spec: + selector: + app: kafka + ports: + - port: 9092 + targetPort: 9092 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kafka + labels: + app: kafka +spec: + replicas: 1 + selector: + matchLabels: + app: kafka + template: + metadata: + labels: + app: kafka + spec: + containers: + - name: kafka + image: wurstmeister/kafka + imagePullPolicy: Always + ports: + - containerPort: 9092 + env: + - name: KAFKA_CREATE_TOPICS + value: swh.journal.objects.content:10:1:compact, + swh.journal.objects.origin:10:1:compact, + swh.journal.objects.origin_visit:10:1:compact, + swh.journal.objects.origin_visit_status:10:1:compact, + swh.journal.objects.skipped_content:10:1:compact, + swh.journal.objects.revision:10:1:compact, + swh.journal.indexed.origin_intrinsic_metadata:10:1:compact + envFrom: + - configMapRef: + name: kafka + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1024Mi" + cpu: "500m" + volumeMounts: + - mountPath: "/kafka" + name: kafka-pvc + volumes: + - name: kafka-pvc + persistentVolumeClaim: + claimName: kafka-pvc diff --git a/kubernetes/Readme.md b/kubernetes/Readme.md index 7d2a180..1a8570e 100644 --- a/kubernetes/Readme.md +++ b/kubernetes/Readme.md @@ -1,196 +1,196 @@ ## Prerequisite ### Directories ``` -# sudo mkdir -p /srv/softwareheritage-kube/dev/{objects,storage-db,scheduler-db} +# sudo mkdir -p /srv/softwareheritage-kube/dev/{objects,storage-db,scheduler-db,kafka} ``` Must match the content of `05-storage-db.yaml` ### Registry - Add the following line on your `/etc/hosts` file. It's needed to be able to push the image to it from docker ``` 127.0.0.1 registry.default ``` - Start the registry in kubernetes ``` # cd kubernetes # kubectl apply -f registry/00-registry.yml ``` ## Build the base image ``` # cd docker # docker build --no-cache -t swh/stack . # docker tag swh/stack:latest registry.default/swh/stack:latest # docker push registry.default/swh/stack:latest ``` ## start the objstorage - build image ``` # docker build -f Dockerfile.objstorage -t swh/objstorage --build-arg BASE=swh/stack . # docker tag swh/objstorage:latest registry.default/swh/objstorage:latest # docker push registry.default/swh/objstorage:latest ``` - start the service ``` # cd kubernetes # kubectl apply -f 10-objstorage.yml configmap/objstorage created persistentvolume/objstorage-pv created persistentvolumeclaim/objstorage-pvc created deployment.apps/objstorage created service/objstorage created ``` - test it ``` # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 46m objstorage-8587d58b68-76jbn 1/1 Running 0 12m # kubectl get services objstorage NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE objstorage ClusterIP 10.43.185.191 5003/TCP 17m # curl http://$(kubectl get services objstorage -o jsonpath='{.spec.clusterIP}'):5003 SWH Objstorage API server% ``` ## Start the storage - Start the db ``` # cd kubernetes # kubectl apply -f 05-storage-db.yml persistentvolume/storage-db-pv created persistentvolumeclaim/storage-db-pvc created secret/storage-db created configmap/storage-db created deployment.apps/storage-db created service/storage-db created # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 46m objstorage-8587d58b68-76jbn 1/1 Running 0 15m storage-db-64b7f8b684-48n7w 1/1 Running 0 4m52s # kubectl get services storage-db NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE storage-db ClusterIP 10.43.213.178 5432/TCP 8m19s ``` - Start the storage ``` # cd kubernetes # kubectl apply -f 11-storage.yml configmap/storage created deployment.apps/storage created service/storage created ``` - Test the service ``` # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 49m storage-db-64b7f8b684-48n7w 1/1 Running 0 7m40s storage-6b759fb974-w9rzj 1/1 Running 0 66s # kubectl get services storage NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE storage ClusterIP 10.43.212.116 5002/TCP 2m24s # curl http://$(kubectl get services storage -o jsonpath='{.spec.clusterIP}'):5002 Software Heritage storage server

You have reached the Software Heritage storage server.
See its documentation and API for more information

``` ## Start the scheduler - Start the db ``` # cd kubernetes # kubectl apply -f 15-scheduler-db.yml persistentvolume/scheduler-db-pv unchanged persistentvolumeclaim/scheduler-db-pvc created secret/scheduler-db configured configmap/scheduler-db unchanged deployment.apps/scheduler-db unchanged service/scheduler-db unchanged # kubectl get services scheduler-db NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE scheduler-db ClusterIP 10.43.115.249 5433/TCP 110s ``` - Test the service ``` # kubectl apply -f 20-scheduler.yml configmap/scheduler created deployment.apps/scheduler created service/scheduler created ingress.networking.k8s.io/scheduler created # kubectl get services scheduler NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE scheduler ClusterIP 10.43.218.74 5008/TCP 23s # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-5f6894c5b-9wkmr 1/1 Running 0 28m objstorage-5b87c549b6-f6jvc 1/1 Running 0 12m storage-db-79bfbff68-mg7fr 1/1 Running 0 107s storage-6bfcb87b6-7s7t8 1/1 Running 0 87s scheduler-db-666c8dc8b4-qxm9d 1/1 Running 0 73s scheduler-595f944854-hbsj4 1/1 Running 0 62s # curl http://$(kubectl get services scheduler -o jsonpath='{.spec.clusterIP}'):5008 Software Heritage scheduler RPC server

You have reached the Software Heritage scheduler RPC server.
See its documentation and API for more information

% ``` ## Development ### Skaffold To start the development environment using skaffold, use the following command: ``` skaffold --default-repo registry.default dev ``` It will build the images, deploy them on the local registry and start the services. It will monitor the projects to detect the changes and restart the containers when needed