diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4561a63 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +test_docker_image.log +tests/repository_test/indexes/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..dcdddf8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.1.0 + hooks: + - id: trailing-whitespace + - id: check-json + - id: check-yaml + + - repo: https://gitlab.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 + + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + name: Check source code spelling + args: [-L crate] + stages: [commit] + - id: codespell + name: Check commit message spelling + stages: [commit-msg] + + - repo: local + hooks: + - id: mypy + name: mypy + entry: mypy + args: [swh] + pass_filenames: false + language: system + types: [python] + + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + + - repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/README.md b/README.md index b154cde..b93216f 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,26 @@ - # Maven Index Exporter -This Docker image reads a Maven Indexer index and extract information about the indexed documents as a convenient text file. +This Docker image reads a Maven Indexer index and extracts information about the indexed +documents as a convenient text file. -It takes as input the full set of Maven indexes files, as can be seen in the central maven repository, and uses two Java tools ([maven-indexer-cli](https://maven.apache.org/maven-indexer/) and [clue](https://github.com/javasoze/clue)) to extract the indexes (in `indexes/`) and export them in the `export/` directory. +It takes as input the full set of Maven indexes files, as can be seen in the central +maven repository. It then uses +([maven-indexer-cli](https://maven.apache.org/maven-indexer/) and +[clue](https://github.com/javasoze/clue)) to extract the indexes (in `indexes/`) and +export them in the `export/` directory. -* You can read more about the sequence of actions in the `docs/` directory, including: -* [more information about the process](docs/README.md). -* [instructions to run the exporter](docs/run_maven_index_exporter.md). -* [instructions to build and test](docs/build_and_test.md) the Docker image. +You can read more about the sequence of actions in the `docs/` directory, including: +* [more information about the process](docs/README.md). +* [instructions to run the exporter](docs/run_maven_index_exporter.md). +* [instructions to build and test](docs/build_and_test.md) the Docker image. -An official Docker image is provided for quick tests on [DockerHub](https://hub.docker.com/r/bbaldassari/maven-index-exporter). +An official Docker image is provided for quick tests on +[DockerHub](https://hub.docker.com/r/bbaldassari/maven-index-exporter). ## List of maven repositories -We also provide a curated list of maven repositories that can be used with the Docker images, i.e. they use the Maven indexer, make their indexes publicly available, and use the same version as the exporter. +We also provide a curated list of maven repositories that can be used with the Docker +images, i.e. they use the Maven indexer, make their indexes publicly available, and use +the same version as the exporter. See [docs/maven_repositories.md](docs/maven_repositories.md). - diff --git a/ThirdPartyLicenses.txt b/ThirdPartyLicenses.txt index 4ae9512..ac5acac 100644 --- a/ThirdPartyLicenses.txt +++ b/ThirdPartyLicenses.txt @@ -1,216 +1,216 @@ This repository includes third-party libraries or other resources that may be distributed under licenses different than the provided code. -The `repository_src` and `repository_test` directories are only used to test -the maven-index-exporter. They include the source jar and maven pom files from +The `repository_src` and `repository_test` directories are only used to test +the maven-index-exporter. They include the source jar and maven pom files from sprova4j [sprova4j], without any modification. [sprova4j] https://github.com/aldialimucaj/sprova4j/ ## License Notice for Sprova4j --------------------------------------- Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/docker/Dockerfile b/docker/Dockerfile index 21896e7..8e85ab5 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,18 +1,16 @@ - FROM adoptopenjdk/openjdk11:alpine-jre # Download and install jars ADD https://github.com/javasoze/clue/releases/download/release-6.2.0-1.0.0/clue-6.2.0-1.0.0.jar /opt/ ADD https://repo1.maven.org/maven2/org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar /opt/ # Copy index extraction script COPY extract_indexes.sh /opt/ WORKDIR /work/ RUN ls /opt/ RUN ls -R /work/ # Parse default index file (will be overriden by cli parameters) CMD ["sh", "/opt/extract_indexes.sh", "/work/nexus-maven-repository-index.gz"] - diff --git a/docs/README.md b/docs/README.md index efed837..a0e771b 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,78 +1,78 @@ -# Documentation +# Documentation ## Sequence The index files can be dowloaded from any maven repository that uses maven-indexer, like maven central: https://repo1.maven.org/maven2/.index/ Copy all files (i.e. the main index, the updates and properties file) into the volume directory (`$WORKDIR`). It will be mounted as `/work/` in the docker image. The export is then achieved in two steps: * Unpack the Lucene indexes from the Maven Indexer indexes using `maven-indexer-cli`. The command used is: ``` $ java --illegal-access=permit -jar $INDEXER_JAR \ --unpack $FILE_IN \ --destination $WORKDIR/indexes/ \ --type full ``` This generates a set of binary lucene files as shown below: ``` $ ls -lh $WORKDIR/indexes/ total 5,2G -rw-r--r-- 1 root root 500M juil. 7 22:06 _4m.fdt -rw-r--r-- 1 root root 339K juil. 7 22:06 _4m.fdx -rw-r--r-- 1 root root 2,2K juil. 7 22:07 _4m.fnm -rw-r--r-- 1 root root 166M juil. 7 22:07 _4m_Lucene50_0.doc -rw-r--r-- 1 root root 147M juil. 7 22:07 _4m_Lucene50_0.pos [SNIP] -rw-r--r-- 1 root root 363 juil. 7 22:06 _e0.si -rw-r--r-- 1 root root 1,7K juil. 7 22:07 segments_2 -rw-r--r-- 1 root root 8 juil. 7 21:54 timestamp ``` * Export the Lucene documents from the Lucene indexes using `clue`. This generates a set of text files as shown below: ``` $ java --illegal-access=permit -jar $JAR_CLUE $WORKDIR/indexes/ \ export $WORKDIR/export/ text ``` This generates a bunch of text files relating to the Lucene indexes, made available in `$WORKDIR/export/`. For our purpose we only keep the `*.fld` file that includes the indexed documents. ## Output The clue command is documented on [its github page](https://github.com/javasoze/clue). The indexed Lucene documents are located in the `*.fld` file. A description of the fields used by maven-indexer can be found in the project's API docs: https://maven.apache.org/maven-indexer-archives/maven-indexer-6.0.0/indexer-core/apidocs/org/apache/maven/index/ArtifactInfo.html ## How to build The build downloads binaries for both tools (maven-indexer-cli and clue), so make sure there is an internet connection. Go to the `docker/` dorectory and issue the folowing command: ``` $ docker build . -t bbaldassari/maven-index-exporter --no-cache ``` An up-to-date docker image is also available on docker hub at [bbaldassari/maven-index-exporter](https://hub.docker.com/r/bbaldassari/maven-index-exporter). ``` $ docker pull bbaldassari/maven-index-exporter ``` diff --git a/docs/build_and_test.md b/docs/build_and_test.md index 7a58849..df5de31 100644 --- a/docs/build_and_test.md +++ b/docs/build_and_test.md @@ -1,195 +1,199 @@ # Build and test Maven index exporter ## How to test (the quick way) -There is a bash script called `test_docker_image.sh` in the `scripts/` directory, -simply execute it. Tests cover the creation of the docker image, its execution, and the +There is a bash script called `test_docker_image.sh` in the `scripts/` directory, simply +execute it. Tests cover the creation of the docker image, its execution, and the resulting output. ``` $ bash test_docker_image.sh Script started on 20210911_181912. * Writing log to test_docker_image.log. * Docker image [maven-index-exporter] doesn't exist. * Building docker image. PASS: docker build returned 0. PASS: Docker image is listed. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has been created. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has 7 docs. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has 26 fields. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.0-sources.jar. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.0.pom. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.1-sources.jar. -PASS: file [/home/boris/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.1.pom. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has been created. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has 7 docs. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has 26 fields. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.0-sources.jar. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.0.pom. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.1-sources.jar. +PASS: file [$HOME/Projects/gh_maven-index-exporter/repository_test/export/_1.fld] has sprova4j-0.1.1.pom. $ ``` ## How to test (the long road) -This repository has a simple, almost-empty maven-indexer index that can be used to test the docker build. To use it, make sure that the directory `repository_test/` is present and run this command: +This repository has a simple, almost-empty maven-indexer index that can be used to test +the docker build. To use it, make sure that the directory `repository_test/` is present +and run this command: ``` $ docker run -v $(pwd)/repository_test:/work bbaldassari/maven-index-exporter ``` -The exported files will be stored in `repository_test/export/`, and output should look like this: +The exported files will be stored in `repository_test/export/`, and output should look +like this: ``` $ docker run -v $(pwd)/repository_test:/work bbaldassari/maven-index-exporter Docker Script started on 2021-08-27 06:32:22. # Checks.. * Content of /opt: total 32156 -rw------- 1 root root 18000742 Jan 8 2018 clue-6.2.0-1.0.0.jar -rw-r--r-- 1 root root 2574 Aug 25 18:28 extract_indexes.sh -rw------- 1 root root 14914610 Nov 28 2017 indexer-cli-6.0.0.jar drwxr-xr-x 3 root root 4096 Jun 29 16:23 java * Content of /work: total 36 -rw-r--r-- 1 1000 1000 254 Aug 26 09:21 nexus-maven-repository-index.1.gz -rw-r--r-- 1 1000 1000 32 Aug 26 09:21 nexus-maven-repository-index.1.gz.md5 -rw-r--r-- 1 1000 1000 40 Aug 26 09:21 nexus-maven-repository-index.1.gz.sha1 -rw-r--r-- 1 1000 1000 344 Aug 26 09:21 nexus-maven-repository-index.gz -rw-r--r-- 1 1000 1000 32 Aug 26 09:21 nexus-maven-repository-index.gz.md5 -rw-r--r-- 1 1000 1000 40 Aug 26 09:21 nexus-maven-repository-index.gz.sha1 -rw-r--r-- 1 1000 1000 193 Aug 26 09:21 nexus-maven-repository-index.properties -rw-r--r-- 1 1000 1000 32 Aug 26 09:21 nexus-maven-repository-index.properties.md5 -rw-r--r-- 1 1000 1000 40 Aug 26 09:21 nexus-maven-repository-index.properties.sha1 * Will read files from [/work/nexus-maven-repository-index.gz]. * Found file [/work/nexus-maven-repository-index.gz]. * Found indexer [/opt/indexer-cli-6.0.0.jar]. * Found clue [/opt/clue-6.2.0-1.0.0.jar]. * Java version:. openjdk version "11.0.11" 2021-04-20 OpenJDK Runtime Environment AdoptOpenJDK-11.0.11+9 (build 11.0.11+9) OpenJDK 64-Bit Server VM AdoptOpenJDK-11.0.11+9 (build 11.0.11+9, mixed mode) ############################# Unpacking [/work/nexus-maven-repository-index.gz] to /work/indexes SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. Index Folder: /work Output Folder: /work/indexes Total time: 0 sec Final memory: 41M/1004M Unpacking finished on 2021-08-27 06:32:23. ############################# Exporting indexes /work/indexes to /work/export no configuration found, using default configuration Analyzer: class org.apache.lucene.analysis.standard.StandardAnalyzer Query Builder: class com.senseidb.clue.api.DefaultQueryBuilder Directory Builder: class com.senseidb.clue.api.DefaultDirectoryBuilder IndexReader Factory: class com.senseidb.clue.api.DefaultIndexReaderFactory Term Bytesref Display: class com.senseidb.clue.api.StringBytesRefDisplay Payload Bytesref Display: class com.senseidb.clue.api.RawBytesRefDisplay exporting index to text Exporting finished on 2021-08-27 06:32:23. ############################# Cleaning useless files. Size before cleaning: 32.0K /work/export 28.0K /work/indexes 4.0K /work/nexus-maven-repository-index.1.gz 4.0K /work/nexus-maven-repository-index.1.gz.md5 4.0K /work/nexus-maven-repository-index.1.gz.sha1 4.0K /work/nexus-maven-repository-index.gz 4.0K /work/nexus-maven-repository-index.gz.md5 4.0K /work/nexus-maven-repository-index.gz.sha1 4.0K /work/nexus-maven-repository-index.properties 4.0K /work/nexus-maven-repository-index.properties.md5 4.0K /work/nexus-maven-repository-index.properties.sha1 * Removing useless exports. Keeping only fld text extract. Size after cleaning: 8.0K /work/export 28.0K /work/indexes 4.0K /work/nexus-maven-repository-index.1.gz 4.0K /work/nexus-maven-repository-index.1.gz.md5 4.0K /work/nexus-maven-repository-index.1.gz.sha1 4.0K /work/nexus-maven-repository-index.gz 4.0K /work/nexus-maven-repository-index.gz.md5 4.0K /work/nexus-maven-repository-index.gz.sha1 4.0K /work/nexus-maven-repository-index.properties 4.0K /work/nexus-maven-repository-index.properties.md5 4.0K /work/nexus-maven-repository-index.properties.sha1 * Make files modifiable by the end-user. Docker Script execution finished on 2021-08-27 06:32:23. ``` The `_1.fld` file contains the fields for each document: ``` $ head repository_test/export/_1.fld doc 0 field 0 name u type string value al.aldi|sprova4j|0.1.0|sources|jar field 1 name m type string value 1626111735737 field 2 ``` ### Building the test repository -The test repository `repository_test` can be rebuilt from the `repository_src` -structure using [indexer-cli](https://search.maven.org/remotecontent?filepath=org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar) +The test repository `repository_test` can be rebuilt from the `repository_src` structure +using +[indexer-cli](https://search.maven.org/remotecontent?filepath=org/apache/maven/indexer/indexer-cli/6.0.0/indexer-cli-6.0.0.jar) with the following commands: ``` $ cd repository_src $ java -jar ~/Downloads/indexer-cli-6.0.0.jar -i index/ -d repository_test/ -r repo1 -s -c SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. -Repository Folder: /home/boris/Projects/maven-index-exporter/repository_src/repo1 -Index Folder: /home/boris/Projects/maven-index-exporter/repository_src/index -Output Folder: /home/boris/Projects/maven-index-exporter/repository_src/repository_test +Repository Folder: $HOME/Projects/maven-index-exporter/repository_src/repo1 +Index Folder: $HOME/Projects/maven-index-exporter/repository_src/index +Output Folder: $HOME/Projects/maven-index-exporter/repository_src/repository_test Repository name: index Indexers: [min, jarContent] Will create checksum files for all published files (sha1, md5). Will create incremental chunks for changes, along with baseline file. Scanning started Artifacts added: 2 Artifacts deleted: 0 Total time: 1 sec Final memory: 48M/1012M $ java -jar ~/Downloads/indexer-cli-6.0.0.jar -i index/ -d repository_test/ -r repo2 -s -c SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. -Repository Folder: /home/boris/Projects/maven-index-exporter/repository_src/repo2 -Index Folder: /home/boris/Projects/maven-index-exporter/repository_src/index -Output Folder: /home/boris/Projects/maven-index-exporter/repository_src/repository_test +Repository Folder: $HOME/Projects/maven-index-exporter/repository_src/repo2 +Index Folder: $HOME/Projects/maven-index-exporter/repository_src/index +Output Folder: $HOME/Projects/maven-index-exporter/repository_src/repository_test Repository name: index Indexers: [min, jarContent] Will create checksum files for all published files (sha1, md5). Will create incremental chunks for changes, along with baseline file. Scanning started Artifacts added: 2 Artifacts deleted: 0 Total time: 0 sec Final memory: 7M/1012M $ java -jar ~/Downloads/indexer-cli-6.0.0.jar -i index/ -d repository_test/ -r repo3 -s -c SLF4J: Failed to load class "org.slf4j.impl.StaticLoggerBinder". SLF4J: Defaulting to no-operation (NOP) logger implementation SLF4J: See http://www.slf4j.org/codes.html#StaticLoggerBinder for further details. -Repository Folder: /home/boris/Projects/maven-index-exporter/repository_src/repo3 -Index Folder: /home/boris/Projects/maven-index-exporter/repository_src/index -Output Folder: /home/boris/Projects/maven-index-exporter/repository_src/repository_test +Repository Folder: $HOME/Projects/maven-index-exporter/repository_src/repo3 +Index Folder: $HOME/Projects/maven-index-exporter/repository_src/index +Output Folder: $HOME/Projects/maven-index-exporter/repository_src/repository_test Repository name: index Indexers: [min, jarContent] Will create checksum files for all published files (sha1, md5). Will create incremental chunks for changes, along with baseline file. Scanning started Artifacts added: 1 Artifacts deleted: 2 Total time: 0 sec Final memory: 8M/1012M $ ``` diff --git a/docs/maven_repositories.md b/docs/maven_repositories.md index ab9d9f9..fc88a28 100644 --- a/docs/maven_repositories.md +++ b/docs/maven_repositories.md @@ -1,128 +1,166 @@ -A list of remote Maven repositories using [Maven Indexer](https://maven.apache.org/maven-indexer/) for their catalogue. +A list of remote Maven repositories using [Maven +Indexer](https://maven.apache.org/maven-indexer/) for their catalogue. # Introduction -In the Maven ecosystem, dependencies and artefacts required to develop Java projects can be automatically downloaded from remote Maven repositories using a set of unique identifiers (aka coordinates): the groupId, artefactId and version. - -Maven repositories use a standard directory structure for their hosting, which enables to easily identify and download any artefact with its (groupId, artefactid, version) coordinates. Although it is technically not *required*, Maven repositories often provide an index of all the files they host, mostly for IDEs ( e.g. Eclipse, IntelliJ IDEA, or NetBeans). These index files are usually generated with [Maven Indexer](https://maven.apache.org/maven-indexer/) and consist of gzipped Lucene indexes stored in a `.index/` directory at the root of the repository. - -The largest and most used Maven repository is of course [Maven Central](https://search.maven.org/), but there are many, many [other repositories](https://mvnrepository.com/repos/central) available around. These are set up by individuals, companies and organisations to provide their own builds or domain-specific repositories. Since it is by no means necessary to register repositories, and as far as we know, there is no exhaustive list of Maven repositories. - -The resources in this directory are an attempt to identify a list of Maven repository servers, as complete as possible. We also publish a list of servers that provide public indexes that can be analysed and exported with the [Maven index exporter](https://github.com/borisbaldassari/maven-index-exporter) Docker image. +In the Maven ecosystem, dependencies and artefacts required to develop Java projects can +be automatically downloaded from remote Maven repositories using a set of unique +identifiers (aka coordinates): the groupId, artefactId and version. + +Maven repositories use a standard directory structure for their hosting, which enables +to easily identify and download any artefact with its (groupId, artefactid, version) +coordinates. Although it is technically not *required*, Maven repositories often provide +an index of all the files they host, mostly for IDEs ( e.g. Eclipse, IntelliJ IDEA, or +NetBeans). These index files are usually generated with [Maven +Indexer](https://maven.apache.org/maven-indexer/) and consist of gzipped Lucene indexes +stored in a `.index/` directory at the root of the repository. + +The largest and most used Maven repository is of course [Maven +Central](https://search.maven.org/), but there are many, many [other +repositories](https://mvnrepository.com/repos/central) available around. These are set +up by individuals, companies and organisations to provide their own builds or +domain-specific repositories. Since it is by no means necessary to register +repositories, and as far as we know, there is no exhaustive list of Maven repositories. + +The resources in this directory are an attempt to identify a list of Maven repository +servers, as complete as possible. We also publish a list of servers that provide public +indexes that can be analysed and exported with the [Maven index +exporter](https://github.com/borisbaldassari/maven-index-exporter) Docker image. # Method ## Build a list of URLs from poms -We started from a dump of all pom files hosted on Maven Central (6.9 million files XML files at the time of collection). For each pom we looked for XML nodes that can represent Maven repositories; starting from the root of the document and using XPath expressions we specifically looked for: +We started from a dump of all pom files hosted on Maven Central (6.9 million files XML +files at the time of collection). For each pom we looked for XML nodes that can +represent Maven repositories; starting from the root of the document and using XPath +expressions we specifically looked for: * `.//m:repositories/m:repository/` * `.//m:pluginRepository` * `.//m:distributionManagement/m:snapshotRepository` * `.//m:distributionManagement/m:repository` The transformation can be reproduced with the scripts in the `scripts/` directory: ``` time bash extract_repositories_from_stock.sh list_poms.txt | tee extract.log ``` -The full execution took 61 hours and produced a list of "only" 928808 lines. Each line provides the origin of the URL in the POM, the repository id, and the URL itself. +The full execution took 61 hours and produced a list of "only" 928808 lines. Each line +provides the origin of the URL in the POM, the repository id, and the URL itself. ``` distrib_snapshot,ossrh,https://oss.sonatype.org/content/repositories/snapshots distrib_repo,ossrh,https://oss.sonatype.org/service/local/staging/deploy/maven2/ ``` ## Download properties -In the resulting set, there are many duplicates, non-existent, private or invalid URLs. +In the resulting set, there are many duplicates, non-existent, private or invalid URLs. -To make sure that we only list publicly available servers we tried to download the Maven index properties file from every server. This properties file is mandatory in Maven indexer; it can be found at `.index/nexus-maven-repository-index.properties` and contains the list of incremental updates to the index. +To make sure that we only list publicly available servers we tried to download the Maven +index properties file from every server. This properties file is mandatory in Maven +indexer; it can be found at `.index/nexus-maven-repository-index.properties` and +contains the list of incremental updates to the index. The sequence of actions is as follows: * Remove printed comments, sort and remove duplicate lines: ``` grep -Ev "^# " extract.log | sort -u > extract_uniq.txt ``` * Extract the list of URLs (3rd column) and filter all but http(s) links: ``` cat result_uniq.txt | cut -d, -f 3 | grep -E '^http' > list_urls.txt ``` -* The output list has 7145 lines URLs to test. For each item, we try to get the file in `/.index/nexus-maven-repository-index.properties`. If it yields a file, save it. +* The output list has 7145 lines URLs to test. For each item, we try to get the file in + `/.index/nexus-maven-repository-index.properties`. If it yields a file, save it. ```shell SUFFIX="/.index/nexus-maven-repository-index.properties" for url in `cat list_urls.txt`; do echo "Testing URL [$url]." full_url="${url}${SUFFIX}" name=$(echo $url | cut -d/ -f 3- | tr '/' '_') full_name="${name}.properties" echo " Writing to [$full_name]." wget -O servers/"$full_name" --tries=2 $full_url & done ``` -* This downloads in the `servers/` directory 3820 properties files. Most of them are empty or contain invalid information, leaving only files that contain an actual list of Maven indexer compressed files. -* Rebuild the list of URLs by removing 404s (i.e. servers that did not create a file). Remove trailing slashes to prevent duplicates, sort and make unique: +* This downloads in the `servers/` directory 3820 properties files. Most of them are + empty or contain invalid information, leaving only files that contain an actual list + of Maven indexer compressed files. +* Rebuild the list of URLs by removing 404s (i.e. servers that did not create a file). + Remove trailing slashes to prevent duplicates, sort and make unique: ```shell for f in `ls ../servers/`; do - url=$(echo ${f%.properties} | tr '_' '/'); + url=$(echo ${f%.properties} | tr '_' '/'); grep ${url%/} list_urls_full.txt; done | sed 's:/*$::' | sort -u > list_urls_final.txt ``` -The result is a list of 339 unique URLs: to be downloaded here: +The result is a list of 339 unique URLs: to be downloaded here: [list_urls_final.txt](https://files.nuclino.com/files/e75205b3-354e-4794-a43a-d9f98ad08039/list_urls_final.txt) ## Checking compatibility -To ensure that these repositories can be actually parsed with the Maen index exporter, there is no better way than parsing them and generating the index and text export. For this, we first need to download all indexes from all servers: +To ensure that these repositories can be actually parsed with the Maen index exporter, +there is no better way than parsing them and generating the index and text export. For +this, we first need to download all indexes from all servers: ``` bash scripts/convert_url_to_repo.sh ``` -This will rely on the list of directories downloaded previously, and generate a series of subdirectories for each server, with the index files. If the index files already exist they won't be downloaded again. +This will rely on the list of directories downloaded previously, and generate a series +of subdirectories for each server, with the index files. If the index files already +exist they won't be downloaded again. -The next step is to execute the docker image from [bbaldassari/maven-index-exporter](https://github.com/borisbaldassari/maven-index-exporter) to export all text indexes in `/export/`. +The next step is to execute the docker image from +[bbaldassari/maven-index-exporter](https://github.com/borisbaldassari/maven-index-exporter) +to export all text indexes in `/export/`. ```shell mkdir -p ../maven_repositories/ -for i in `ls`; do - time docker run -v /data/work/$i:/work bbaldassari/maven-index-exporter | tee ../logs/$i.log; - mv $i/ ../maven_repositories/; +for i in `ls`; do + time docker run -v /data/work/$i:/work bbaldassari/maven-index-exporter | tee ../logs/$i.log; + mv $i/ ../maven_repositories/; done ``` -This again filters out some servers that use a Maven Indexer version different from the Docker image's compatibility. +This again filters out some servers that use a Maven Indexer version different from the +Docker image's compatibility. # Result The final list contains only Maven repositories that: * use Maven Indexer for their indexing, * are publicly available, * are still available as of 2021-11-20, and * can be extracted using the Maven index exporter Docker image. -Please note that there will probably be a huge amount of artefact duplicates, as several server names can map to to the same repository, and some repositories might mirror existing content. +Please note that there will probably be a huge amount of artefact duplicates, as several +server names can map to to the same repository, and some repositories might mirror +existing content. List of downloads: -* The curated list of maven repositories (333 servers): [list_maven_repositories_with_index.txt](maven_repositories/list_maven_repositories_with_index.txt) -* A list of compressed text exports for the above maven repositories (as of 2021-11-28): https://icedrive.net/1/01BQpqC6rA - We will add more downloads as they are generated, so stay tuned. - +* The curated list of maven repositories (333 servers): + [list_maven_repositories_with_index.txt](maven_repositories/list_maven_repositories_with_index.txt) +* A list of compressed text exports for the above maven repositories (as of 2021-11-28): + https://icedrive.net/1/01BQpqC6rA We will add more downloads as they are generated, so + stay tuned. diff --git a/docs/run_maven_index_exporter.md b/docs/run_maven_index_exporter.md index 3174d3b..720d34d 100644 --- a/docs/run_maven_index_exporter.md +++ b/docs/run_maven_index_exporter.md @@ -1,60 +1,59 @@ - # Run Maven index exporter - ## Running the full export The `run_full_export.py` script located in `scripts/` provides an easy way to run the export as a cron batch job, and copy the resulting text export to a specific location. ## Running the image only -The Docker image uses volumes to exchanges files. Prepare a directory with -enough space disk (see warning below) and pass it to docker: +The Docker image uses volumes to exchanges files. Prepare a directory with enough space +disk (see warning below) and pass it to docker: ``` -$ docker run -v /local/work/dir:/work bbaldassari/maven-index-exporter +$ LOCAL_DIR=/tmp/work +$ docker run -v $LOCAL_DIR:/work $USER/maven-index-exporter ``` -Please note that the local work dir MUST be an absolute path, as docker won't -mount relative paths as volumes. +Please note that the local work dir MUST be an absolute path, as docker won't mount +relative paths as volumes. -For our purpose only the fld file is kept, so if you need other export files -you should simply edit the `extract_indexes.sh` script and comment the lines -that do the cleaning. Then rebuild the Docker image and run it. +For our purpose only the fld file is kept, so if you need other export files you should +simply edit the `extract_indexes.sh` script and comment the lines that do the cleaning. +Then rebuild the Docker image and run it. ## Running as cron The `run_full_export.py` script located in `scripts/` provides an easy way to run the export as a cron batch job, and copy the resulting text export to a specific location. -Simply use and adapt the crontab command as follows: + %Simply use and adapt the crontab command as follows: ``` -cd /home/boris/maven-index-exporter/scripts/ && /home/boris/maven-index-exporter/scripts/myvenv/bin/python /home/boris/maven-index-exporter/scripts/run_full_export.py https://repo.maven.apache.org/maven2/ /tmp/maven-index\ --exporter/ /var/www/html/maven_index_exporter/ 2>&1 > /home/boris/run_maven_exporter_$(date +"%Y%m%d-%H%M%S").log - +cd $HOME/maven-index-exporter/scripts/ && \ + ./myvenv/bin/python $HOME/maven-index-exporter/scripts/run_full_export.py https://repo.maven.apache.org/maven2/ /tmp/maven-index \ + -exporter/ /var/www/html/maven_index_exporter/ 2>&1 > /tmp/run_maven_exporter_$(date +"%Y%m%d-%H%M%S").log ``` The script takes three mandatory arguments: ``` -Usage: run_full_export.py url work_dir publish_dir +Usage: run_full_export.py - url is the base url of the maven repository instance. Example: https://repo.maven.apache.org/maven2/ - work_dir must be an absolute path to the temp directory. Example: /tmp/maven-index-exporter/ - publish_dir must be an absolute path to the final directory. Example: /var/www/html/ ``` It is recommended to setup a virtual environment to run the script. ``` $ python3 -m venv myvenv $ source venv/bin/activate ``` Python modules to be installed are provided in the `requirements.txt` file. diff --git a/scripts/run_full_export.py b/scripts/run_full_export.py index efe8bbc..8ba4084 100644 --- a/scripts/run_full_export.py +++ b/scripts/run_full_export.py @@ -1,167 +1,168 @@ # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import docker -import requests -import re +import datetime import glob +import re import sys -import datetime -from os import getcwd, chdir -from os.path import getsize, isdir, isfile, isabs, join +from os import chdir, getcwd +from os.path import getsize, isabs, isdir, isfile, join from pathlib import Path -from urllib.parse import urljoin from shutil import copy2 +from urllib.parse import urljoin + +import requests +import docker # Check paramaters if len(sys.argv) != 4: print("Usage:", sys.argv[0], "url work_dir publish_dir") print(" - url is the base url of the maven repository instance.") print(" Example: https://repo.maven.apache.org/maven2/") print(" - work_dir must be an absolute path to the temp directory.") print(" Example: /tmp/maven-index-exporter/") print(" - publish_dir must be an absolute path to the final directory.") print(" Example: /var/www/html/") exit() - + base_url = sys.argv[1] work_dir = sys.argv[2] publish_dir = sys.argv[3] + def _docker_run(docker_image: str): """ Start the container for the maven index export, using the image 'bbaldassari/maven-index-exporter'. If needed the image is pulled from docker hub. If it already exists, simply use the local one. """ # Initialise the docker client. client = docker.from_env() myimage = None for image in client.images.list(name=docker_image): myimage = image break - + if myimage is None: print(f"Docker: Could not find {docker_image}. Pulling it.") myimage = client.images.pull(repository=docker_image) else: print("Docker: Found image {myimage} locally, ID is {myimage.attrs['Id']}.") - + ret = client.containers.run( myimage, tty=True, command=["sh", "/opt/extract_indexes.sh", "/work/"], volumes={work_dir: {"bind": "/work", "mode": "rw"}}, ) print(f"Docker log:\n{ret.decode()}") + def _download_indexes(instance_url: str): """ Download all required indexes from the .index/ directory of the specified instance. """ print(f"# Downloading all required indexes") index_url = urljoin(instance_url, ".index/") properties_name = "nexus-maven-repository-index.properties" properties_file = join(work_dir, properties_name) properties_url = urljoin(index_url, properties_name) # Retrieve properties file. print(f" - Downloading {properties_file}.") content = requests.get(properties_url).content.decode() open(properties_file, "w").write(content) diff_re = re.compile("^nexus.index.incremental-[0-9]+=([0-9]+)") for line in content.split("\n"): diff_group = diff_re.match(line) if diff_group is not None: ind_name = "nexus-maven-repository-index." + diff_group.group(1) + ".gz" ind_path = join(work_dir, ind_name) ind_url = urljoin(index_url, ind_name) if isfile(ind_path): print(f" - File {ind_path} exists, skipping download.") else: print( ( f" - File {ind_path} doesn't exist. " f"Downloading file from {ind_url}." ) ) # Retrieve incremental gz file contentb = requests.get(ind_url).content open(ind_path, "wb").write(contentb) # Retrieve main index file. ind_path = join(work_dir, "nexus-maven-repository-index.gz") ind_url = urljoin(index_url, "nexus-maven-repository-index.gz") if isfile(ind_path): print(f" - File {ind_path} exists, skipping download.") else: print(f" - File {ind_path} doesn't exist. Downloading file from {ind_url}") contentb = requests.get(ind_url).content open(ind_path, "wb").write(contentb) - ############################################### # Start execution ############################################### now = datetime.datetime.now() print(f"Script: {sys.argv[0]}") print("Timestamp:", now.strftime("%Y-%m-%d %H:%M:%S")) print(f"* URL: {base_url}") print(f"* Work_Dir: {work_dir}") # Check work_dir and create it if needed. if isdir(work_dir): print("Work_Dir {work_dir} exists. Reusing it.") else: try: print("Cannot find work_dir {work_dir}. Creating it.") Path(work_dir).mkdir(parents=True, exist_ok=True) except OSError as error: print(f"Could not create work_dir {work_dir}: {error}.") - + assert isdir(work_dir) assert isabs(work_dir) # Grab all the indexes # Only fetch the new ones, existing files won't be re-downloaded. _download_indexes(base_url) # Run Docker on the downloaded indexes. _docker_run("bbaldassari/maven-index-exporter") print("Export directory has the following files:") owd = getcwd() chdir(join(work_dir, "export")) myfile = None re_fld = re.compile(r".*\.fld$") for file in glob.glob("*.*"): print(" -", file, "size", getsize(file)) - if (re_fld.match(file)): + if re_fld.match(file): myfile = file # Now copy the results to the desired location: publish_dir. if isfile(myfile): print("Found fld file:", myfile) else: print("Cannot find .fld file. Exiting") exit(4) publish_file = join(publish_dir, "export.fld") print(f"Copying files to {publish_file}..") try: copy2(myfile, publish_file) except OSError as error: - print(f"Could not publish results in {publish_dir}: {error}.") + print(f"Could not publish results in {publish_dir}: {error}.") now = datetime.datetime.now() print(f"Script finished on", now.strftime("%Y-%m-%d %H:%M:%S")) - diff --git a/scripts/test_docker_image.sh b/scripts/test_docker_image.sh old mode 100644 new mode 100755 index fe664cc..7331a29 --- a/scripts/test_docker_image.sh +++ b/scripts/test_docker_image.sh @@ -1,121 +1,121 @@ #!/bin/bash # Copyright (C) 2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information DOCKER_IMAGE="maven-index-exporter" LOG=test_docker_image.log -# This script builds the docker image for maven-index-exporter, and +# This script builds the docker image for maven-index-exporter, and # executes it on a known set of indexes and checks the results in order # to test the full tool chain. echo "Script started on `date +%Y%m%d_%H%M%S`." echo "* Writing log to $LOG." # Find location of script directory OLD_DIR=$(pwd) REPO_DIR=$(CDPATH= cd -- "$(dirname -- "$0")/.." && pwd) cd $OLD_DIR WORK_DIR=$REPO_DIR/tests/repository_test EXPORT_DIR=$WORK_DIR/export # First clean up and remove any docker image with our own name docker rmi $DOCKER_IMAGE >>$LOG 2>&1 RET=$? if [[ $RET -eq 0 ]]; then echo "* Docker image [$DOCKER_IMAGE] deleted." elif [[ $RET -eq 1 ]]; then echo "* Docker image [$DOCKER_IMAGE] doesn't exist." else echo "Error when deleting docker image [$DOCKER_IMAGE]." fi # Build the image and tag it as $DOCKER_IMAGE cd $REPO_DIR/docker echo "* Building docker image." docker build . -t $DOCKER_IMAGE --no-cache >>$LOG RET=$? if [[ $RET -eq 0 ]]; then echo "PASS: docker build returned 0." -else +else echo "FAIL: docker build returned $RET." exit 20 fi # Assert docker image has been created. COUNT=$(docker images | grep -E "^$DOCKER_IMAGE\s" | wc -l) -if [[ $COUNT -eq 0 ]]; then +if [[ $COUNT -eq 0 ]]; then echo "FAIL: Docker image cannot be listed." exit 10 -else +else echo "PASS: Docker image is listed." fi # Run the image on the maven indexes. docker run -v $WORK_DIR:/work $DOCKER_IMAGE >>$LOG 2>&1 # Assert exported text files are there, with the correct content. EXPORT_FILE=$(ls $EXPORT_DIR/*.fld) -if [[ -e $EXPORT_FILE ]]; then +if [[ -e $EXPORT_FILE ]]; then echo "PASS: file [$EXPORT_FILE] has been created." -else +else echo "FAIL: file [$EXPORT_FILE] has NOT been created." exit 20 fi DOCS=$(grep -E "^doc" $EXPORT_FILE | wc -l) -if [[ $DOCS -eq 10 ]]; then +if [[ $DOCS -eq 10 ]]; then echo "PASS: file [$EXPORT_FILE] has 10 docs." -else +else echo "FAIL: file [$EXPORT_FILE] has $DOCS docs, should be 10." exit 20 fi FIELDS=$(grep -E "^ field" $EXPORT_FILE | wc -l) -if [[ $FIELDS -eq 35 ]]; then +if [[ $FIELDS -eq 35 ]]; then echo "PASS: file [$EXPORT_FILE] has 35 fields." -else +else echo "FAIL: file [$EXPORT_FILE] has $FIELDS fields, should be 35." exit 20 fi FIELDS=$(grep "value al.aldi|sprova4j|0.1.0|sources|jar" $EXPORT_FILE | wc -l) -if [[ $FIELDS -eq 1 ]]; then +if [[ $FIELDS -eq 1 ]]; then echo "PASS: file [$EXPORT_FILE] has sprova4j-0.1.0-sources.jar." -else +else echo "FAIL: file [$EXPORT_FILE] has NOT sprova4j-0.1.0-sources.jar." exit 20 fi FIELDS=$(grep "value al.aldi|sprova4j|0.1.0|NA|pom" $EXPORT_FILE | wc -l) -if [[ $FIELDS -eq 1 ]]; then +if [[ $FIELDS -eq 1 ]]; then echo "PASS: file [$EXPORT_FILE] has sprova4j-0.1.0.pom." -else +else echo "FAIL: file [$EXPORT_FILE] has NOT sprova4j-0.1.0.pom." exit 20 fi FIELDS=$(grep "value al.aldi|sprova4j|0.1.1|sources|jar" $EXPORT_FILE | wc -l) -if [[ $FIELDS -eq 1 ]]; then +if [[ $FIELDS -eq 1 ]]; then echo "PASS: file [$EXPORT_FILE] has sprova4j-0.1.1-sources.jar." -else +else echo "FAIL: file [$EXPORT_FILE] has NOT sprova4j-0.1.1-sources.jar." exit 20 fi FIELDS=$(grep "value al.aldi|sprova4j|0.1.1|NA|pom" $EXPORT_FILE | wc -l) -if [[ $FIELDS -eq 1 ]]; then +if [[ $FIELDS -eq 1 ]]; then echo "PASS: file [$EXPORT_FILE] has sprova4j-0.1.1.pom." -else +else echo "FAIL: file [$EXPORT_FILE] has NOT sprova4j-0.1.1.pom." exit 20 fi # Cleanup rm -rf $EXPORT_DIR cd $OLD_DIR