diff --git a/docker/Dockerfile b/docker/Dockerfile index 4012fde..e09382c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,84 +1,85 @@ FROM python:3.7 ARG PGDG_REPO=http://apt.postgresql.org/pub/repos/apt ARG PGDG_GPG_KEY=https://www.postgresql.org/media/keys/ACCC4CF8.asc ARG PGDG_KEYRING=/usr/share/keyrings/pgdg-archive-keyring.gpg ARG NODE_REPO=https://deb.nodesource.com/node_12.x ARG NODE_GPG_KEY=https://deb.nodesource.com/gpgkey/nodesource.gpg.key ARG NODE_KEYRING=/usr/share/keyrings/nodejs-archive-keyring.gpg ARG YARN_REPO=https://dl.yarnpkg.com/debian/ ARG YARN_GPG_KEY=https://dl.yarnpkg.com/debian/pubkey.gpg ARG YARN_KEYRING=/usr/share/keyrings/yarnpkg-archive-keyring.gpg RUN . /etc/os-release && \ echo "deb [signed-by=${PGDG_KEYRING}] ${PGDG_REPO} ${VERSION_CODENAME}-pgdg main" \ > /etc/apt/sources.list.d/pgdg.list && \ curl -fsSL ${PGDG_GPG_KEY} | gpg --dearmor > ${PGDG_KEYRING} && \ echo "deb [signed-by=${NODE_KEYRING}] ${NODE_REPO} ${VERSION_CODENAME} main" \ > /etc/apt/sources.list.d/nodejs.list && \ curl -fsSL ${NODE_GPG_KEY} | gpg --dearmor > ${NODE_KEYRING} && \ echo "deb [signed-by=${YARN_KEYRING}] ${YARN_REPO} stable main" \ > /etc/apt/sources.list.d/yarnpkg.list && \ curl -fsSL ${YARN_GPG_KEY} | gpg --dearmor > ${YARN_KEYRING} RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && \ apt-get install -y \ libapr1-dev \ libaprutil1-dev \ libcmph-dev \ libpq-dev \ libsvn-dev \ libsystemd-dev \ gcc \ openjdk-11-jre \ pkg-config \ pv \ postgresql-client-12 \ wait-for-it \ ngrep \ rsync \ nodejs \ yarn \ zstd && \ apt-get install -y --no-install-recommends \ opam \ r-base-core \ r-cran-jsonlite && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN useradd -md /srv/softwareheritage -s /bin/bash swh USER swh RUN python3 -m venv /srv/softwareheritage/venv ENV PATH="/srv/softwareheritage/venv/bin:${PATH}" # Avoid 21.3 release which is preventing override to work # https://github.com/pypa/pip/issues/10573 RUN pip install --upgrade 'pip!=21.3' setuptools wheel RUN pip install gunicorn httpie RUN pip install \ swh-core[db,http] \ swh-counters \ swh-deposit[server] \ swh-indexer \ swh-journal \ swh-lister \ swh-loader-core \ swh-loader-bzr \ + swh-loader-cvs \ swh-loader-git \ swh-loader-mercurial \ swh-loader-svn \ swh-storage \ swh-objstorage \ swh-scheduler \ swh-vault \ swh-web COPY utils/*.sh /srv/softwareheritage/utils/ RUN mkdir -p /srv/softwareheritage/objects RUN rm -rd /srv/softwareheritage/.cache diff --git a/docker/conf/loader.yml b/docker/conf/loader.yml index 6202873..6de563b 100644 --- a/docker/conf/loader.yml +++ b/docker/conf/loader.yml @@ -1,40 +1,42 @@ storage: cls: pipeline steps: - cls: buffer min_batch_size: content: 10000 content_bytes: 104857600 directory: 1000 revision: 1000 - cls: filter - cls: remote url: http://nginx:5080/storage celery: task_broker: amqp://guest:guest@amqp// task_queues: - swh.loader.bzr.tasks.LoadBazaar + - swh.loader.cvs.tasks.LoadCvsRepository - swh.loader.dir.tasks.LoadDirRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - swh.loader.git.tasks.UpdateGitRepository - swh.loader.mercurial.tasks.LoadArchiveMercurial - swh.loader.mercurial.tasks.LoadMercurial - swh.loader.package.archive.tasks.LoadArchive - swh.loader.package.cran.tasks.LoadCRAN - swh.loader.package.debian.tasks.LoadDebian - swh.loader.package.maven.tasks.LoadMaven - swh.loader.package.nixguix.tasks.LoadNixguix - swh.loader.package.npm.tasks.LoadNpm - swh.loader.package.pypi.tasks.LoadPyPI - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - swh.loader.svn.tasks.LoadSvnRepository - swh.loader.svn.tasks.MountAndLoadSvnRepository - save_code_now:swh.loader.bzr.tasks.LoadBazaar + - save_code_now:swh.loader.cvs.tasks.LoadCvsRepository - save_code_now:swh.loader.git.tasks.UpdateGitRepository - save_code_now:swh.loader.mercurial.tasks.LoadArchiveMercurial - save_code_now:swh.loader.mercurial.tasks.LoadMercurial - save_code_now:swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - save_code_now:swh.loader.svn.tasks.MountAndLoadSvnRepository - save_code_now:swh.loader.package.archive.tasks.LoadArchive