Page MenuHomeSoftware Heritage

D860.id2744.diff
No OneTemporary

D860.id2744.diff

diff --git a/docker-compose.yml b/docker-compose.yml
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -7,6 +7,21 @@
ports:
- 5018:15672
+ zookeeper:
+ image: wurstmeister/zookeeper
+ ports:
+ - "2181:2181"
+
+ kafka:
+ image: wurstmeister/kafka
+ ports:
+ - "9092:9092"
+ env_file: ./kafka.env
+ volumes:
+ - /var/run/docker.sock:/var/run/docker.sock
+ depends_on:
+ - zookeeper
+
# Scheduler
swh-scheduler-db:
@@ -58,11 +73,7 @@
depends_on:
- swh-storage-db
- swh-objstorage
- environment:
- POSTGRES_PASSWORD: testpassword
- POSTGRES_DB: swh-storage
- PGHOST: swh-storage-db
- PGUSER: postgres
+ env_file: ./storage.env
# Object storage
@@ -186,3 +197,27 @@
- swh-storage
- swh-objstorage
- amqp
+
+# Journal related
+
+ swh-storage-listener:
+ image: swh/storage-listener
+ build: ./dockerfiles/swh-storage-listener
+ env_file: ./storage.env
+ depends_on:
+ - swh-storage-db
+ - kafka
+
+ swh-journal-publisher:
+ image: swh/journal-publisher
+ build: ./dockerfiles/swh-journal-publisher
+ depends_on:
+ - kafka
+ - swh-storage-listener
+
+ swh-journal-client:
+ image: swh/journal-client
+ build: ./dockerfiles/swh-journal-client
+ depends_on:
+ - swh-journal-publisher
+
diff --git a/dockerfiles/swh-journal-client/Dockerfile b/dockerfiles/swh-journal-client/Dockerfile
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-journal-client/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y libsystemd-dev
+
+RUN pip install swh-journal
+COPY logger.yml /etc/softwareheritage/journal/
+COPY client.py /
+COPY entrypoint.sh /
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-journal-client/client.py b/dockerfiles/swh-journal-client/client.py
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-journal-client/client.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+
+from swh.journal.client import JournalClient
+
+
+class JournalClientLogger(JournalClient):
+ """Client in charge of listing new received origins and origin_visits
+ in the swh journal.
+
+ """
+ CONFIG_BASE_FILENAME = 'journal/logger'
+
+ def __init__(self):
+ # Only interested in content here so override the configuration
+ super().__init__(extra_configuration={'object_types':
+ ['origin', 'origin_visit']})
+
+ def process_objects(self, messages):
+ """Simply log messages received.
+
+ """
+ for msg in messages:
+ logging.info('msg: %s' % msg)
+
+
+if __name__ == '__main__':
+ logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s %(process)d %(levelname)s %(message)s'
+ )
+
+ import click
+
+ @click.command()
+ def main():
+ """Log the new received origin and origin_visits.
+
+ """
+ JournalClientLogger().process()
+
+ main()
diff --git a/dockerfiles/swh-journal-client/entrypoint.sh b/dockerfiles/swh-journal-client/entrypoint.sh
new file mode 100755
--- /dev/null
+++ b/dockerfiles/swh-journal-client/entrypoint.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ pip install -e .
+ popd
+ done
+fi
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo "Starting an swh-journal client"
+ exec python3 -m client
+ ;;
+esac
diff --git a/dockerfiles/swh-journal-client/logger.yml b/dockerfiles/swh-journal-client/logger.yml
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-journal-client/logger.yml
@@ -0,0 +1,2 @@
+brokers:
+ - kafka
diff --git a/dockerfiles/swh-journal-publisher/Dockerfile b/dockerfiles/swh-journal-publisher/Dockerfile
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-journal-publisher/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y libsystemd-dev
+
+RUN pip install swh-journal
+COPY publisher.yml /etc/softwareheritage/journal/
+COPY entrypoint.sh /
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-journal-publisher/entrypoint.sh b/dockerfiles/swh-journal-publisher/entrypoint.sh
new file mode 100755
--- /dev/null
+++ b/dockerfiles/swh-journal-publisher/entrypoint.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ pip install -e .
+ popd
+ done
+fi
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo "Starting swh-journal publisher"
+ exec python3 -m swh.journal.publisher --verbose
+ ;;
+esac
diff --git a/dockerfiles/swh-journal-publisher/publisher.yml b/dockerfiles/swh-journal-publisher/publisher.yml
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-journal-publisher/publisher.yml
@@ -0,0 +1,20 @@
+brokers:
+ - kafka
+
+temporary_prefix: swh.tmp.journal.new
+
+storage:
+ cls: remote
+ args:
+ url: http://swh-storage:5002/
+
+max_messages: 1
+
+objects:
+ # - content
+ # - directory
+ - revision
+ # - release
+ - origin
+ - origin_visit
+ # - snapshot
diff --git a/dockerfiles/swh-storage-listener/Dockerfile b/dockerfiles/swh-storage-listener/Dockerfile
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-storage-listener/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y \
+ libsystemd-dev postgresql-client
+
+RUN pip install swh-storage kafka_python
+COPY listener.yml /etc/softwareheritage/storage/
+COPY entrypoint.sh /
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-storage-listener/entrypoint.sh b/dockerfiles/swh-storage-listener/entrypoint.sh
new file mode 100755
--- /dev/null
+++ b/dockerfiles/swh-storage-listener/entrypoint.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ pip install -e .
+ popd
+ done
+fi
+
+echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass
+cat > ~/.pg_service.conf <<EOF
+[swh]
+dbname=${POSTGRES_DB}
+host=${PGHOST}
+port=5432
+user=${PGUSER}
+EOF
+
+chmod 0400 ~/.pgpass
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo "Waiting for postgresql to start"
+ until psql service=swh -c "select 1" 1>&2 >/dev/null; do sleep 0.1; done
+
+ echo "Starting swh-storage's listener"
+ exec python3 -m swh.storage.listener --verbose
+ ;;
+esac
diff --git a/dockerfiles/swh-storage-listener/listener.yml b/dockerfiles/swh-storage-listener/listener.yml
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-storage-listener/listener.yml
@@ -0,0 +1,4 @@
+database: service=swh
+brokers:
+ - kafka
+topic_prefix: swh.tmp.journal.new
diff --git a/dockerfiles/swh-storage/entrypoint.sh b/dockerfiles/swh-storage/entrypoint.sh
--- a/dockerfiles/swh-storage/entrypoint.sh
+++ b/dockerfiles/swh-storage/entrypoint.sh
@@ -4,9 +4,9 @@
if [[ -d /src ]] ; then
for srcrepo in /src/swh-* ; do
- pushd $srcrepo
- pip install -e .
- popd
+ pushd $srcrepo
+ pip install -e .
+ popd
done
fi
@@ -23,18 +23,17 @@
case "$1" in
"shell")
- exec bash -i
- ;;
+ exec bash -i
+ ;;
*)
+ echo Waiting for postgresql to start
+ until psql service=swh -c "select 1" > /dev/null 2> /dev/null; do sleep 0.1; done
- echo Waiting for postgresql to start
- until psql service=swh -c "select 1" > /dev/null 2> /dev/null; do sleep 0.1; done
+ echo Setup the database
+ PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \
+ --db-name ${POSTGRES_DB}
- echo Setup the database
- PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \
- --db-name ${POSTGRES_DB}
-
- echo Starting the swh-storage API server
- exec python -m swh.storage.api.server /storage.yml
- ;;
+ echo Starting the swh-storage API server
+ exec python -m swh.storage.api.server /storage.yml
+ ;;
esac
diff --git a/kafka.env b/kafka.env
new file mode 100644
--- /dev/null
+++ b/kafka.env
@@ -0,0 +1,5 @@
+KAFKA_LISTENERS=PLAINTEXT://:9092
+KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
+# zookeeper setup
+KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
+LOG4J_LOGGER_KAFKA_AUTHORIZER_LOGGER=DEBUG, authorizerAppender
diff --git a/storage.env b/storage.env
new file mode 100644
--- /dev/null
+++ b/storage.env
@@ -0,0 +1,4 @@
+POSTGRES_PASSWORD=testpassword
+POSTGRES_DB=swh-storage
+PGHOST=swh-storage-db
+PGUSER=postgres

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 1:02 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3230488

Event Timeline