diff --git a/docker/README.rst b/docker/README.rst --- a/docker/README.rst +++ b/docker/README.rst @@ -185,7 +185,7 @@ Several services have their listening ports exposed on the host: - amqp: 5072 -- kafka: 5092 +- kafka: 29092 - nginx: 5080 And for SWH services: @@ -213,6 +213,22 @@ loader@61704103668c: OK [...] + +To consume ``kafka`` topics from the host, for example to run the `swh +dataset graph export` command, a configuration file could be:: + + ~/swh-environment/docker$ cat dataset_config.yml + journal: + brokers: + - 127.0.0.1:29092 + + ~/swh-environment/docker$ swh dataset -C dataset_config.yml graph export output + Exporting release: + - Partition offsets: 100%|███████████████████████████████| 16/16 [00:00<00:00, 1863.62it/s] + - Export (release): 100%|████████████████| 3650/3650 [00:08<00:00, 437.89it/s, workers=1/1] + [...] + + .. _docker-manage-tasks: Managing tasks diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -17,20 +17,24 @@ kafka: image: wurstmeister/kafka ports: - - "9092:9092" + - "29092:29092" env_file: ./env/kafka.env environment: KAFKA_BROKER_ID: 1 - KAFKA_CREATE_TOPICS: swh.journal.objects.content:16:1, - swh.journal.objects.directory:16:1, - swh.journal.objects.origin:16:1, - swh.journal.objects.origin_visit:16:1, - swh.journal.objects.origin_visit_status:16:1, - swh.journal.objects.revision:16:1, - swh.journal.objects.release:16:1, - swh.journal.objects.skipped_content:16:1, - swh.journal.objects.snapshot:16:1, - swh.journal.indexed.origin_intrinsic_metadata:16:1 + KAFKA_CREATE_TOPICS: swh.journal.objects.content:16:1:compact, + swh.journal.objects.directory:16:1:compact, + swh.journal.objects.origin:16:1:compact, + swh.journal.objects.origin_visit:16:1:compact, + swh.journal.objects.origin_visit_status:16:1:compact, + swh.journal.objects.revision:16:1:compact, + swh.journal.objects.release:16:1:compact, + swh.journal.objects.skipped_content:16:1:compact, + swh.journal.objects.snapshot:16:1:compact, + swh.journal.indexed.origin_intrinsic_metadata:16:1:compact + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://127.0.0.1:29092 + KAFKA_LISTENERS: PLAINTEXT://:9092,PLAINTEXT_HOST://:29092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT depends_on: - zookeeper healthcheck: