diff --git a/.gitignore b/.gitignore --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -docker-compose.override.yml \ No newline at end of file +docker-compose.override.yml +docker-compose.storage-replica.override.yml \ No newline at end of file diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -659,3 +659,21 @@ Note that the object storage is not replicated here, only the graph storage. + +## Starting the backfiller + +Reading from the storage the objects from within range +[start-object, end-object] to the kafka topics. + +``` +(swh) $ docker-compose \ + -f docker-compose.yml \ + -f docker-compose.storage-replica.yml \ + -f docker-compose.storage-replica.override.yml \ + run \ + swh-journal-backfiller \ + snapshot \ + --start-object 000000 \ + --end-object 000001 \ + --dry-run +``` diff --git a/conf/journal_backfiller.yml b/conf/journal_backfiller.yml new file mode 100644 --- /dev/null +++ b/conf/journal_backfiller.yml @@ -0,0 +1,9 @@ +brokers: + - kafka + +final_prefix: swh.journal.objects +client_id: swh.journal.backfiller +object_types: + - content + +storage_dbconn: postgresql:///?service=swh-storage diff --git a/docker-compose.storage-replica.yml b/docker-compose.storage-replica.yml --- a/docker-compose.storage-replica.yml +++ b/docker-compose.storage-replica.yml @@ -49,3 +49,18 @@ volumes: - "./conf/storage-replica.yml:/storage-replica.yml:ro" - "./services/swh-storage-replayer/entrypoint.sh:/entrypoint.sh:ro" + + swh-journal-backfiller: + image: swh/stack + build: ./ + entrypoint: /entrypoint.sh + environment: + SWH_CONFIG_FILENAME: /journal_backfiller.yml + env_file: + - ./env/storage-db.env + depends_on: + - swh-storage-db + - kafka + volumes: + - "./conf/journal_backfiller.yml:/journal_backfiller.yml:ro" + - "./services/swh-journal-backfiller/entrypoint.sh:/entrypoint.sh:ro" diff --git a/services/swh-journal-backfiller/entrypoint.sh b/services/swh-journal-backfiller/entrypoint.sh new file mode 100755 --- /dev/null +++ b/services/swh-journal-backfiller/entrypoint.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e +source /srv/softwareheritage/utils/pyutils.sh +setup_pip + +source /srv/softwareheritage/utils/pgsql.sh +setup_pgsql + +case "$1" in + "shell") + exec bash -i + ;; + *) + wait_pgsql + echo "Starting swh-journal-backfiller" + exec wait-for-it kafka:9092 -s --timeout=0 -- swh-journal backfiller $@ + ;; +esac