Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/docker-compose.yml b/docker-compose.yml
index 07f21cb..ad88a4e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,450 +1,397 @@
version: '2'
services:
amqp:
image: rabbitmq:3.6-management
ports:
- 5072:5672
flower:
image: mher/flower
command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower
ports:
- 5055:5555
depends_on:
- amqp
zookeeper:
image: wurstmeister/zookeeper
kafka:
image: wurstmeister/kafka
ports:
- 5092:9092
env_file: ./kafka.env
depends_on:
- zookeeper
prometheus:
image: prom/prometheus
command:
# Needed for the reverse-proxy
- "--web.external-url=/prometheus"
volumes:
- "./prometheus.yml:/etc/prometheus/prometheus.yml:ro"
restart: unless-stopped
prometheus-statsd-exporter:
image: prom/statsd-exporter
command:
- "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml"
volumes:
- "./prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro"
restart: unless-stopped
nginx:
image: nginx
volumes:
- "./nginx.conf:/etc/nginx/nginx.conf:ro"
ports:
- 5080:5080
# Scheduler
swh-scheduler-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-scheduler
swh-scheduler-api:
image: swh/scheduler-api
build: ./dockerfiles/swh-scheduler-api
env_file: ./scheduler.env
depends_on:
- swh-scheduler-db
ports:
- 5008:5008
swh-scheduler-listener:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
command: listener
depends_on:
- swh-scheduler-api
- amqp
swh-scheduler-runner:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
command: runner -p 10
depends_on:
- swh-scheduler-api
- amqp
# Graph storage
swh-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-storage
swh-storage:
build: ./dockerfiles/swh-storage
image: swh/storage
ports:
- 5002:5002
depends_on:
- swh-storage-db
- swh-objstorage
env_file: ./storage.env
# Object storage
swh-objstorage:
build: ./dockerfiles/swh-objstorage
image: swh/objstorage
ports:
- 5003:5003
# Indexer storage
swh-idx-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
swh-idx-storage:
build: ./dockerfiles/swh-indexer-storage
image: swh/indexer-storage
ports:
- 5007:5007
depends_on:
- swh-idx-storage-db
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
PGHOST: swh-idx-storage-db
PGUSER: postgres
# Web interface
swh-web:
build: ./dockerfiles/swh-web
image: swh/web
ports:
- 5004:5004
depends_on:
- swh-objstorage
- swh-storage
- swh-idx-storage
swh-deposit-db:
image: postgres:10
env_file: ./deposit.env
swh-deposit:
build: ./dockerfiles/swh-deposit
image: swh/deposit
ports:
- 5006:5006
depends_on:
- swh-deposit-db
- swh-scheduler-api
env_file: ./deposit.env
environment:
PGHOST: swh-deposit-db
# Lister Celery workers
swh-listers-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
swh-lister-debian:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: debian
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
swh-lister-bitbucket:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: bitbucket
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
swh-lister-github:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: github
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
swh-lister-gitlab:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: gitlab
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
swh-lister-npm:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: npm
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
swh-lister-pypi:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: pypi
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-scheduler-runner
- swh-storage
- amqp
# Indexer Celery workers
- swh-indexer-mimetype:
+ swh-indexer:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
- SWH_WORKER_INSTANCE: content_mimetype
depends_on:
- swh-scheduler-runner
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
- swh-indexer-license:
- image: swh/indexer-worker
- build: ./dockerfiles/swh-indexer-worker
- env_file: ./indexers.env
- environment:
- STATSD_HOST: prometheus-statsd-exporter
- STATSD_PORT: 9125
- SWH_WORKER_INSTANCE: content_fossology_license
- depends_on:
- - swh-scheduler-runner
- - swh-idx-storage
- - swh-storage
- - swh-objstorage
- - amqp
-
- swh-indexer-origin-head:
- image: swh/indexer-worker
- build: ./dockerfiles/swh-indexer-worker
- env_file: ./indexers.env
- environment:
- STATSD_HOST: prometheus-statsd-exporter
- STATSD_PORT: 9125
- SWH_WORKER_INSTANCE: origin_head
- depends_on:
- - swh-scheduler-api
- - swh-scheduler-runner
- - swh-idx-storage
- - swh-storage
- - amqp
-
- swh-indexer-revision-metadata:
- image: swh/indexer-worker
- build: ./dockerfiles/swh-indexer-worker
- env_file: ./indexers.env
- environment:
- STATSD_HOST: prometheus-statsd-exporter
- STATSD_PORT: 9125
- SWH_WORKER_INSTANCE: revision_metadata
depends_on:
- swh-scheduler-runner
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
- swh-indexer-origin-intrinsic-metadata:
- image: swh/indexer-worker
- build: ./dockerfiles/swh-indexer-worker
- env_file: ./indexers.env
- environment:
- STATSD_HOST: prometheus-statsd-exporter
- STATSD_PORT: 9125
- SWH_WORKER_INSTANCE: origin_intrinsic_metadata
- depends_on:
- - swh-scheduler-runner
- - swh-idx-storage
- - swh-storage
- - amqp
-
swh-indexer-journal-client:
image: swh/indexer-journal-client
build: ./dockerfiles/swh-indexer-journal-client
depends_on:
- swh-journal-publisher
- swh-scheduler-api
# Journal related
swh-storage-listener:
image: swh/storage-listener
build: ./dockerfiles/swh-storage-listener
env_file: ./storage.env
depends_on:
- swh-storage-db
- kafka
swh-journal-publisher:
image: swh/journal-publisher
build: ./dockerfiles/swh-journal-publisher
depends_on:
- kafka
- swh-storage-listener
swh-journal-client:
image: swh/journal-client
build: ./dockerfiles/swh-journal-client
depends_on:
- swh-journal-publisher
# Loader Celery workers
swh-loader-debian:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: debian
depends_on:
- swh-storage
- amqp
swh-loader-dir:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: dir
depends_on:
- swh-storage
- amqp
swh-loader-git:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: git
depends_on:
- swh-storage
- amqp
swh-loader-mercurial:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: mercurial
depends_on:
- swh-storage
- amqp
swh-loader-pypi:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: pypi
depends_on:
- swh-storage
- amqp
swh-loader-svn:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: svn
depends_on:
- swh-storage
- amqp
swh-loader-tar:
image: swh/loaders-worker
build: ./dockerfiles/swh-loaders-worker
env_file: ./listers.env
environment:
STATSD_HOST: prometheus-statsd-exporter
STATSD_PORT: 9125
SWH_WORKER_INSTANCE: tar
depends_on:
- swh-storage
- amqp
diff --git a/dockerfiles/swh-indexer-worker/Dockerfile b/dockerfiles/swh-indexer-worker/Dockerfile
index 254f725..77a07b3 100644
--- a/dockerfiles/swh-indexer-worker/Dockerfile
+++ b/dockerfiles/swh-indexer-worker/Dockerfile
@@ -1,20 +1,16 @@
FROM python:3.6
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt-get install -y \
libsystemd-dev postgresql-client
+RUN pip install --upgrade pip
RUN pip install swh-indexer
RUN useradd -ms /bin/bash swh
-COPY entrypoint.sh /
-
-COPY mimetype.yml /home/swh/.config/swh/indexer/
-COPY fossology_license.yml /home/swh/.config/swh/indexer/
-COPY origin_head.yml /home/swh/.config/swh/indexer/
-COPY revision_metadata.yml /home/swh/.config/swh/indexer/
-COPY origin_intrinsic_metadata.yml /home/swh/.config/swh/indexer/
-RUN chown -R swh: /home/swh/.config/
USER swh
-ENTRYPOINT ["/entrypoint.sh"]
+COPY entrypoint.sh /
+COPY indexer.yml /home/swh/
+
+ENTRYPOINT ["/home/swh/entrypoint.sh"]
diff --git a/dockerfiles/swh-indexer-worker/fossology_license.yml b/dockerfiles/swh-indexer-worker/fossology_license.yml
deleted file mode 100644
index 55ab356..0000000
--- a/dockerfiles/swh-indexer-worker/fossology_license.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-storage:
- cls: remote
- args:
- url: http://swh-storage:5002/
-objstorage:
- cls: remote
- args:
- url: http://swh-objstorage:5003/
-indexer_storage:
- cls: remote
- args:
- url: http://swh-idx-storage:5007/
diff --git a/dockerfiles/swh-indexer-worker/revision_metadata.yml b/dockerfiles/swh-indexer-worker/indexer.yml
similarity index 98%
rename from dockerfiles/swh-indexer-worker/revision_metadata.yml
rename to dockerfiles/swh-indexer-worker/indexer.yml
index 78654ed..22303d4 100644
--- a/dockerfiles/swh-indexer-worker/revision_metadata.yml
+++ b/dockerfiles/swh-indexer-worker/indexer.yml
@@ -1,19 +1,16 @@
storage:
cls: remote
args:
url: http://swh-storage:5002/
-
objstorage:
cls: remote
args:
url: http://swh-objstorage:5003/
-
indexer_storage:
cls: remote
args:
url: http://swh-idx-storage:5007/
-
scheduler:
cls: remote
args:
url: http://swh-scheduler-api:5008/
diff --git a/dockerfiles/swh-indexer-worker/mimetype.yml b/dockerfiles/swh-indexer-worker/mimetype.yml
deleted file mode 100644
index 55ab356..0000000
--- a/dockerfiles/swh-indexer-worker/mimetype.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-storage:
- cls: remote
- args:
- url: http://swh-storage:5002/
-objstorage:
- cls: remote
- args:
- url: http://swh-objstorage:5003/
-indexer_storage:
- cls: remote
- args:
- url: http://swh-idx-storage:5007/
diff --git a/dockerfiles/swh-indexer-worker/origin_head.yml b/dockerfiles/swh-indexer-worker/origin_head.yml
deleted file mode 100644
index 72724af..0000000
--- a/dockerfiles/swh-indexer-worker/origin_head.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-storage:
- cls: remote
- args:
- url: http://swh-storage:5002/
-
-indexer_storage:
- cls: remote
- args:
- url: http://swh-idx-storage:5007/
-
-scheduler:
- cls: remote
- args:
- url: http://swh-scheduler-api:5008/
-
-tasks:
- revision_metadata: indexer_revision_metadata
- origin_intrinsic_metadata: indexer_origin_metadata
-
diff --git a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml b/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml
deleted file mode 100644
index 33e933c..0000000
--- a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-storage:
- cls: remote
- args:
- url: http://swh-storage:5002/
-
-indexer_storage:
- cls: remote
- args:
- url: http://swh-idx-storage:5007/
diff --git a/indexers.env b/indexers.env
index 0f02e61..3e15cd5 100644
--- a/indexers.env
+++ b/indexers.env
@@ -1,3 +1,5 @@
-CONCURRENCY=1
+CONCURRENCY=4
MAX_TASKS_PER_CHILD=10
LOGLEVEL=DEBUG
+SWH_WORKER_INSTANCE=indexer
+SWH_CONFIG_FILENAME=/home/swh/indexer.yml

File Metadata

Mime Type
text/x-diff
Expires
Tue, Jun 3, 7:44 AM (12 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3213086

Event Timeline