Page MenuHomeSoftware Heritage

D7508.diff
No OneTemporary

D7508.diff

diff --git a/docker/Dockerfile b/docker/Dockerfile
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,16 +1,30 @@
FROM adoptopenjdk/openjdk11:debian-jre
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && apt-get upgrade -y && \
+ apt-get install -y python3 python3-click python3-requests wget && \
+ apt-get clean && \
+ rm -rf /var/lib/apt/lists/*
+
# Download and install jars
-ADD https://github.com/javasoze/clue/releases/download/release-6.2.0-1.0.0/clue-6.2.0-1.0.0.jar /opt/
-ADD https://repo1.maven.org/maven2/org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar /opt/
+RUN cd /opt && wget -q \
+ https://github.com/javasoze/clue/releases/download/release-6.2.0-1.0.0/clue-6.2.0-1.0.0.jar
+# coming out of https://github.com/javasoze/clue/releases/tag/release-6.2.0-1.0.0
+RUN cd /opt && wget -q \
+ https://repo1.maven.org/maven2/org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar
+# FIXME: Retrieve https://repo1.maven.org/maven2/org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar.sha1
+# indexer-cli-6.0.0.jar.sha1: eeb98596b7fed4aa13fa13ecafcbb843ef8ab697
# Copy index extraction script
+
+COPY run_full_export.py /opt/
COPY extract_indexes.sh /opt/
+RUN mkdir /work/ && chmod +x /opt/extract_indexes.sh
WORKDIR /work/
-RUN ls /opt/
-RUN ls -R /work/
+ENV MVN_IDX_EXPORTER_BASE_URL=https://repo.maven.apache.org/maven2/
+ENV MVN_IDX_EXPORTER_WORK_DIR=/work
+ENV MVN_IDX_EXPORTER_PUBLISH_DIR=/publish
-# Parse default index file (will be overriden by cli parameters)
-CMD ["sh", "/opt/extract_indexes.sh"]
+CMD ["python3", "/opt/run_full_export.py"]
diff --git a/docker/extract_indexes.sh b/docker/extract_indexes.sh
--- a/docker/extract_indexes.sh
+++ b/docker/extract_indexes.sh
@@ -5,7 +5,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-WORKDIR=/work
+WORKDIR=${1-/work}
FILE_IN=$WORKDIR/nexus-maven-repository-index.gz
localtime=$(date +"%Y-%m-%d %H:%M:%S")
diff --git a/scripts/run_full_export.py b/docker/run_full_export.py
rename from scripts/run_full_export.py
rename to docker/run_full_export.py
--- a/scripts/run_full_export.py
+++ b/docker/run_full_export.py
@@ -9,6 +9,7 @@
import datetime
import glob
import re
+from subprocess import check_call
from os import chdir, makedirs
from os.path import getsize, isabs, isdir, isfile, join, basename
from pathlib import Path
@@ -17,10 +18,6 @@
import requests
-import docker
-from docker.client import DockerClient
-from docker.models.images import Image
-
logger = logging.getLogger(__name__)
@@ -28,51 +25,10 @@
MAVEN_INDEX_ARCHIVE = f"{MAVEN_INDEX_NAME}.gz"
-def _docker_image_get(
- client: DockerClient, docker_image_name: str, docker_image_update: bool = False
-) -> Image:
- """Retrieve docker image locally."""
- if docker_image_update:
- return client.images.pull(repository=docker_image_name)
- try:
- image = next(iter(client.images.list(name=docker_image_name)))
- except StopIteration:
- logger.info("Docker: Could not find %s. Pulling it.", docker_image_name)
- image = client.images.pull(repository=docker_image_name)
- else:
- logger.info(
- "Docker: Found image %s locally, ID is %s.", image, image.attrs["Id"]
- )
- return image
-
-
-def _extract_indexes(
- work_dir: str, docker_image_name: str, docker_image_update: bool = False
-) -> None:
- """Extract indexes out of the working directory.
-
- As an implementation details, this starts the maven indexer exporter container using
- a docker image. This will use the local image if present, otherwise pull it from
- docker hub first.
-
- """
- # Initialise the docker client.
- client = docker.from_env()
- image = _docker_image_get(client, docker_image_name, docker_image_update)
- # Run the extraction process through the docker image (which runs the extract index
- # script), see ../docker/Dockerfile.
- ret = client.containers.run(
- image,
- tty=True,
- volumes={work_dir: {"bind": "/work", "mode": "rw"}},
- )
-
- logger.info("Docker log:\n%s", ret.decode())
-
-
def _download_indexes(work_dir: str, instance_url: str) -> None:
- """ Download all required indexes from the .index/ directory
+ """Download all required indexes from the .index/ directory
of the specified instance.
+
"""
logger.info("Downloading all required indexes")
@@ -143,16 +99,7 @@
help="Absolute path to the final directory.",
default="/tmp/maven-index-exporter/publish/",
)
-@click.option(
- "--docker-image-name", help="Docker image", default="maven-index-exporter"
-)
-@click.option(
- "--docker-image-update",
- is_flag=True,
- help="Trigger a docker image update.",
- default=False,
-)
-def main(base_url, work_dir, publish_dir, docker_image_name, docker_image_update):
+def main(base_url, work_dir, publish_dir):
now = datetime.datetime.now()
logger.info("Script: run_full_export")
logger.info("Timestamp: %s", now.strftime("%Y-%m-%d %H:%M:%S"))
@@ -177,10 +124,8 @@
# Only fetch the new ones, existing files won't be re-downloaded.
_download_indexes(work_dir, base_url)
- # Run Docker on the downloaded indexes.
- _extract_indexes(
- work_dir, docker_image_name, docker_image_update=docker_image_update
- )
+ # Extract indexes into a .fld file
+ check_call(["/opt/extract_indexes.sh", work_dir])
logger.info("Export directory has the following files:")
export_dir = join(work_dir, "export")
@@ -218,4 +163,4 @@
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
- main()
+ main(auto_envvar_prefix='MVN_IDX_EXPORTER')

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 6:09 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225245

Event Timeline