diff --git a/docker/Dockerfile b/docker/Dockerfile index 366b3b6..9f9753d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,68 +1,69 @@ FROM python:3.7 RUN . /etc/os-release && \ echo "deb [signed-by=/usr/share/keyrings/pgdg-archive-keyring.gpg] http://apt.postgresql.org/pub/repos/apt ${VERSION_CODENAME}-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \ curl -fsSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor > /usr/share/keyrings/pgdg-archive-keyring.gpg && \ echo "deb [signed-by=/usr/share/keyrings/nodejs-archive-keyring.gpg] https://deb.nodesource.com/node_12.x ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/nodejs.list && \ curl -fsSL https://deb.nodesource.com/gpgkey/nodesource.gpg.key | gpg --dearmor > /usr/share/keyrings/nodejs-archive-keyring.gpg && \ echo "deb [signed-by=/usr/share/keyrings/yarnpkg-archive-keyring.gpg] https://dl.yarnpkg.com/debian/ stable main" > /etc/apt/sources.list.d/yarnpkg.list && \ curl -fsSL https://dl.yarnpkg.com/debian/pubkey.gpg | gpg --dearmor > /usr/share/keyrings/yarnpkg-archive-keyring.gpg RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && \ apt-get install -y \ libapr1-dev \ libaprutil1-dev \ libcmph-dev \ libpq-dev \ libsvn-dev \ libsystemd-dev \ gcc \ openjdk-11-jre \ pkg-config \ pv \ postgresql-client-12 \ wait-for-it \ ngrep \ rsync \ nodejs \ yarn \ zstd && \ apt-get install -y --no-install-recommends \ opam \ r-base-core \ r-cran-jsonlite && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN useradd -md /srv/softwareheritage -s /bin/bash swh USER swh RUN python3 -m venv /srv/softwareheritage/venv ENV PATH="/srv/softwareheritage/venv/bin:${PATH}" # Avoid 21.3 release which is preventing override to work # https://github.com/pypa/pip/issues/10573 RUN pip install --upgrade 'pip!=21.3' setuptools wheel RUN pip install gunicorn httpie RUN pip install \ swh-core[db,http] \ swh-counters \ swh-deposit[server] \ swh-indexer \ swh-journal \ swh-lister \ swh-loader-core \ + swh-loader-bzr \ swh-loader-git \ swh-loader-mercurial \ swh-loader-svn \ swh-storage \ swh-objstorage \ swh-scheduler \ swh-vault \ swh-web COPY utils/*.sh /srv/softwareheritage/utils/ RUN mkdir -p /srv/softwareheritage/objects RUN rm -rd /srv/softwareheritage/.cache diff --git a/docker/conf/loader.yml b/docker/conf/loader.yml index 45a6918..6202873 100644 --- a/docker/conf/loader.yml +++ b/docker/conf/loader.yml @@ -1,38 +1,40 @@ storage: cls: pipeline steps: - cls: buffer min_batch_size: content: 10000 content_bytes: 104857600 directory: 1000 revision: 1000 - cls: filter - cls: remote url: http://nginx:5080/storage celery: task_broker: amqp://guest:guest@amqp// task_queues: + - swh.loader.bzr.tasks.LoadBazaar - swh.loader.dir.tasks.LoadDirRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - swh.loader.git.tasks.UpdateGitRepository - swh.loader.mercurial.tasks.LoadArchiveMercurial - swh.loader.mercurial.tasks.LoadMercurial - swh.loader.package.archive.tasks.LoadArchive - swh.loader.package.cran.tasks.LoadCRAN - swh.loader.package.debian.tasks.LoadDebian - swh.loader.package.maven.tasks.LoadMaven - swh.loader.package.nixguix.tasks.LoadNixguix - swh.loader.package.npm.tasks.LoadNpm - swh.loader.package.pypi.tasks.LoadPyPI - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - swh.loader.svn.tasks.LoadSvnRepository - swh.loader.svn.tasks.MountAndLoadSvnRepository + - save_code_now:swh.loader.bzr.tasks.LoadBazaar - save_code_now:swh.loader.git.tasks.UpdateGitRepository - save_code_now:swh.loader.mercurial.tasks.LoadArchiveMercurial - save_code_now:swh.loader.mercurial.tasks.LoadMercurial - save_code_now:swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - save_code_now:swh.loader.svn.tasks.MountAndLoadSvnRepository - save_code_now:swh.loader.package.archive.tasks.LoadArchive