diff --git a/Dockerfile b/Dockerfile --- a/Dockerfile +++ b/Dockerfile @@ -24,6 +24,7 @@ RUN pip install \ swh-core[db,http] \ + cassandra-driver \ swh-deposit \ swh-indexer \ swh-journal \ diff --git a/conf/storage_cassandra.yml b/conf/storage_cassandra.yml new file mode 100644 --- /dev/null +++ b/conf/storage_cassandra.yml @@ -0,0 +1,11 @@ +storage: + cls: cassandra + args: + hosts: + - cassandra-seed + keyspace: swh + objstorage: + cls: remote + args: + url: http://swh-objstorage:5003/ + diff --git a/docker-compose.cassandra.yml b/docker-compose.cassandra.yml --- a/docker-compose.cassandra.yml +++ b/docker-compose.cassandra.yml @@ -14,6 +14,7 @@ volumes: - "./services/cassandra/swh_entrypoint.sh:/swh_entrypoint.sh:ro" - "./conf/cassandra.yaml:/cassandra.yaml:ro" + cassandra: # Additional Cassandra instance(s), which may be scaled up, but not # down. They will automatically connect to 'cassandra-seed', and @@ -26,3 +27,20 @@ - "./conf/cassandra.yaml:/cassandra.yaml:ro" env_file: - ./env/cassandra.env + + swh-storage: + volumes: + # note: you need to be on the cassandra-backend2 branch + - "/home/dev/swh-environment/swh-storage:/src/swh-storage" + - "/home/dev/swh-environment/swh-model:/src/swh-model" + - "./conf/storage_cassandra.yml:/storage.yml:ro" + - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" + depends_on: + - swh-storage-db + - cassandra-seed + - swh-objstorage + - kafka + environment: + CASSANDRA_SEED: cassandra-seed + STORAGE_BACKEND: cassandra + PYTHONUNBUFFERED: 1 diff --git a/docker-compose.yml b/docker-compose.yml --- a/docker-compose.yml +++ b/docker-compose.yml @@ -157,6 +157,7 @@ - ./env/storage-db.env environment: SWH_CONFIG_FILENAME: /storage.yml + STORAGE_BACKEND: postgresql entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" diff --git a/services/swh-storage/entrypoint.sh b/services/swh-storage/entrypoint.sh --- a/services/swh-storage/entrypoint.sh +++ b/services/swh-storage/entrypoint.sh @@ -5,19 +5,33 @@ source /srv/softwareheritage/utils/pyutils.sh setup_pip -source /srv/softwareheritage/utils/pgsql.sh -setup_pgsql +if [ "$STORAGE_BACKEND" = "postgresql" ]; then + source /srv/softwareheritage/utils/pgsql.sh + setup_pgsql + +elif [ "$STORAGE_BACKEND" = "cassandra" ]; then + echo Waiting for Cassandra to start + wait-for-it ${CASSANDRA_SEED}:9042 -s --timeout=0 + echo Creating keyspace + cat << EOF | python3 +from swh.storage.cassandra import create_keyspace +create_keyspace(['cassandra-seed'], 'swh') +EOF + +fi case "$1" in "shell") exec bash -i ;; *) - wait_pgsql + if [ "$STORAGE_BACKEND" = "postgresql" ]; then + wait_pgsql - echo Setup the database - PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \ - --db-name ${POSTGRES_DB} + echo Setup the database + PGPASSWORD=${POSTGRES_PASSWORD} swh-db-init storage \ + --db-name ${POSTGRES_DB} + fi echo Starting the swh-storage API server exec gunicorn --bind 0.0.0.0:5002 \