diff --git a/.gitignore b/.gitignore new file mode 100644 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +docker-compose.override.yml \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,6 +7,19 @@ ports: - 5018:15672 + zookeeper: + image: wurstmeister/zookeeper + ports: + - "2181:2181" + + kafka: + image: wurstmeister/kafka + ports: + - "9092:9092" + env_file: ./kafka.env + depends_on: + - zookeeper + # Scheduler swh-scheduler-db: @@ -58,11 +71,7 @@ depends_on: - swh-storage-db - swh-objstorage - environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-storage - PGHOST: swh-storage-db - PGUSER: postgres + env_file: ./storage.env # Object storage @@ -186,3 +195,27 @@ - swh-storage - swh-objstorage - amqp + +# Journal related + + swh-storage-listener: + image: swh/storage-listener + build: ./dockerfiles/swh-storage-listener + env_file: ./storage.env + depends_on: + - swh-storage-db + - kafka + + swh-journal-publisher: + image: swh/journal-publisher + build: ./dockerfiles/swh-journal-publisher + depends_on: + - kafka + - swh-storage-listener + + swh-journal-client: + image: swh/journal-client + build: ./dockerfiles/swh-journal-client + depends_on: + - swh-journal-publisher + diff --git a/dockerfiles/swh-journal-client/Dockerfile b/dockerfiles/swh-journal-client/Dockerfile new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-journal-client/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3 + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y libsystemd-dev + +RUN pip install swh-journal +COPY logger.yml /etc/softwareheritage/journal/ +COPY client.py / +COPY entrypoint.sh / + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-journal-client/client.py b/dockerfiles/swh-journal-client/client.py new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-journal-client/client.py @@ -0,0 +1,45 @@ +# Copyright (C) 2018 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import logging + +from swh.journal.client import JournalClient + + +class JournalClientLogger(JournalClient): + """Client in charge of listing new received origins and origin_visits + in the swh journal. + + """ + CONFIG_BASE_FILENAME = 'journal/logger' + + def __init__(self): + # Only interested in content here so override the configuration + super().__init__(extra_configuration={'object_types': + ['origin', 'origin_visit']}) + + def process_objects(self, messages): + """Simply log messages received. + + """ + logging.info('client received the following messages: %s' % messages) + + +if __name__ == '__main__': + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(process)d %(levelname)s %(message)s' + ) + + import click + + @click.command() + def main(): + """Log the new received origin and origin_visits. + + """ + JournalClientLogger().process() + + main() diff --git a/dockerfiles/swh-journal-client/entrypoint.sh b/dockerfiles/swh-journal-client/entrypoint.sh new file mode 100755 --- /dev/null +++ b/dockerfiles/swh-journal-client/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e + +if [[ -d /src ]] ; then + for srcrepo in /src/swh-* ; do + pushd $srcrepo + pip install -e . + popd + done +fi + +case "$1" in + "shell") + exec bash -i + ;; + *) + echo "Starting an swh-journal client" + exec python3 -m client + ;; +esac diff --git a/dockerfiles/swh-journal-client/logger.yml b/dockerfiles/swh-journal-client/logger.yml new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-journal-client/logger.yml @@ -0,0 +1,4 @@ +brokers: + - kafka +# small number of batch +max_messages: 1 diff --git a/dockerfiles/swh-journal-publisher/Dockerfile b/dockerfiles/swh-journal-publisher/Dockerfile new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-journal-publisher/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3 + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y libsystemd-dev + +RUN pip install swh-journal +COPY publisher.yml /etc/softwareheritage/journal/ +COPY entrypoint.sh / + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-journal-publisher/entrypoint.sh b/dockerfiles/swh-journal-publisher/entrypoint.sh new file mode 100755 --- /dev/null +++ b/dockerfiles/swh-journal-publisher/entrypoint.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +set -e + +if [[ -d /src ]] ; then + for srcrepo in /src/swh-* ; do + pushd $srcrepo + pip install -e . + popd + done +fi + +case "$1" in + "shell") + exec bash -i + ;; + *) + echo "Starting swh-journal publisher" + exec python3 -m swh.journal.publisher --verbose + ;; +esac diff --git a/dockerfiles/swh-journal-publisher/publisher.yml b/dockerfiles/swh-journal-publisher/publisher.yml new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-journal-publisher/publisher.yml @@ -0,0 +1,19 @@ +brokers: + - kafka + +temporary_prefix: swh.tmp.journal.new + +storage: + cls: remote + args: + url: http://swh-storage:5002/ + +max_messages: 1 + +object_types: + - content + - revision + - release + - origin + - origin_visit + - snapshot diff --git a/dockerfiles/swh-storage-listener/Dockerfile b/dockerfiles/swh-storage-listener/Dockerfile new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-storage-listener/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3 + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y \ + libsystemd-dev postgresql-client + +RUN pip install swh-storage kafka_python +COPY listener.yml /etc/softwareheritage/storage/ +COPY entrypoint.sh / + +ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-storage-listener/entrypoint.sh b/dockerfiles/swh-storage-listener/entrypoint.sh new file mode 100755 --- /dev/null +++ b/dockerfiles/swh-storage-listener/entrypoint.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +set -e + +if [[ -d /src ]] ; then + for srcrepo in /src/swh-* ; do + pushd $srcrepo + pip install -e . + popd + done +fi + +echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass +cat > ~/.pg_service.conf <&2 >/dev/null; do sleep 0.1; done + + echo "Starting swh-storage's listener" + exec python3 -m swh.storage.listener --verbose + ;; +esac diff --git a/dockerfiles/swh-storage-listener/listener.yml b/dockerfiles/swh-storage-listener/listener.yml new file mode 100644 --- /dev/null +++ b/dockerfiles/swh-storage-listener/listener.yml @@ -0,0 +1,4 @@ +database: service=swh +brokers: + - kafka +topic_prefix: swh.tmp.journal.new diff --git a/dockerfiles/swh-storage/entrypoint.sh b/dockerfiles/swh-storage/entrypoint.sh --- a/dockerfiles/swh-storage/entrypoint.sh +++ b/dockerfiles/swh-storage/entrypoint.sh @@ -4,9 +4,9 @@ if [[ -d /src ]] ; then for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd + pushd $srcrepo + pip install -e . + popd done fi @@ -23,18 +23,17 @@ case "$1" in "shell") - exec bash -i - ;; + exec bash -i + ;; *) + echo Waiting for postgresql to start + until psql service=swh -c "select 1" > /dev/null 2> /dev/null; do sleep 0.1; done - echo Waiting for postgresql to start - until psql service=swh -c "select 1" > /dev/null 2> /dev/null; do sleep 0.1; done + echo Setup the database + PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \ + --db-name ${POSTGRES_DB} - echo Setup the database - PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \ - --db-name ${POSTGRES_DB} - - echo Starting the swh-storage API server - exec python -m swh.storage.api.server /storage.yml - ;; + echo Starting the swh-storage API server + exec python -m swh.storage.api.server /storage.yml + ;; esac diff --git a/kafka.env b/kafka.env new file mode 100644 --- /dev/null +++ b/kafka.env @@ -0,0 +1,5 @@ +KAFKA_LISTENERS=PLAINTEXT://:9092 +KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092 +# zookeeper setup +KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 +LOG4J_LOGGER_KAFKA_AUTHORIZER_LOGGER=DEBUG, authorizerAppender diff --git a/storage.env b/storage.env new file mode 100644 --- /dev/null +++ b/storage.env @@ -0,0 +1,4 @@ +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-storage +PGHOST=swh-storage-db +PGUSER=postgres