diff --git a/swh/loader/mercurial/tests/data/build.py b/swh/loader/mercurial/tests/data/build.py new file mode 100755 index 0000000..7ef0ca1 --- /dev/null +++ b/swh/loader/mercurial/tests/data/build.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information +""" +This build script purpose is to create example repositories from bash scripts +and extract assertion data from them into json files. + +Advantages: + + - the bash script documents the repository creation + - automating creation allow easy repository update + - automation extraction allow easier update of assertion data + +Create a new repository +----------------------- + +First, create a bash script `myscript.sh` which will serve to create, update +and document the repository. + +Here is a minimal working example: + + #!/usr/bin/env bash + + # Setup bash in strict mode + set -euo pipefail + + # Allow direct call to call the script: `./myscript.sh repository-name` + if [ ! -z "$1" ]; then + HG_REPO="$1" + fi + + # Prepare the repository + hg init "$HG_REPO" + cd "$HG_REPO" + cat > .hg/hgrc << EOL + [ui] + username = Full Name + EOL + + # Populate the repository + touch README.md + hg add README.md + hg commit -m "Add README" + +Then build the repository and the associated json file which containing +the repository objects identities: + + ./build.py json myscript.sh + +You should now have a `myscript.tgz` containing the repository +and a `myscript.json` with the repository objects identities. + +Update a repository +------------------- + +When there is a build script file: + + Update the source `repository.sh` and run `./build.py json repository.sh`. + New `tgz` and `json` files will be produced. + The old files will be renamed. + +When There is no build script file: + + Maybe consider not updating the repository and add a new one instead. + + Otherwise, uncompress the `repository.tgz`, manually update the `repository` + and run `./build.py json repository`. + New `tgz` and `json` files will be produced. + The old files will be renamed. + +Extract json from existing repository +------------------------------------- + +For existing repository without build scripts it is possible to extract +the corresponding json file by running `./build.py json repository.tgz` +""" + +import json +import shutil +import subprocess +from datetime import datetime +from pathlib import Path + +import click + + +def abort(message): + """Abort the script with a message.""" + click.echo(message, err=True) + click.get_current_context().abort() + + +def backup(path: Path): + """Rename an existing path.""" + click.echo(f"Creating backup of {path}") + now = datetime.now() + backup_path = path.with_suffix(f"{path.suffix}.bak.{now:%Y%m%d%H%M%S}") + path.rename(backup_path) + click.echo(f"Backup created: {str(backup_path)!r}") + + +def _build_repository(script: str) -> Path: + """Build a repository from a bash script.""" + script_path = Path(script).absolute() + + if not script_path.exists(): + abort(f"Path {script_path!r} does not exists.") + + if script_path.suffix != ".sh": + abort(f"Wrong suffix: {script_path.suffix!r}. Expected: '.sh'") + + repository_path = script_path.with_suffix("") + + if repository_path.exists(): + backup(repository_path) + + click.echo(f"Running build script: {str(script_path)!r}") + subprocess.call( + ["bash", "-euo", "pipefail", script_path], env={"HG_REPO": str(repository_path)} + ) + + return repository_path + + +def _build_json(source: str) -> Path: + if source.endswith(".tgz"): + archive_path = Path(source).absolute() + repository_path = archive_path.with_suffix("") + + if repository_path.exists(): + backup(repository_path) + + subprocess.call(["tar", "-xf", archive_path], cwd=archive_path.parent) + elif source.endswith(".sh"): + repository_path = _build_repository(source) + else: + repository_path = Path(source).absolute() + + click.echo(f"Extracting object identities: {str(repository_path)!r}") + output = subprocess.check_output(["swh-hg-identify", "all"], cwd=repository_path) + lines = output.decode().splitlines() + + directory_swhids = [] + revision_swhids = [] + release_swhids = [] + + for line in lines: + uri, _ = line.split("\t") + _, _, swhid_type, swhid = uri.split(":") + if swhid_type == "dir": + directory_swhids.append(swhid) + elif swhid_type == "rev": + revision_swhids.append(swhid) + elif swhid_type == "rel": + release_swhids.append(swhid) + elif swhid_type == "snp": + snapshot_swhid = swhid + else: + abort(f"{line!r} unknown type {swhid_type!r}") + + json_path = repository_path.with_suffix(".json") + + if json_path.exists(): + backup(json_path) + + click.echo(f"Creating object identities file: {str(json_path)!r}") + json_path.write_text( + json.dumps( + { + "directories": directory_swhids, + "revisions": revision_swhids, + "releases": release_swhids, + "snapshot": snapshot_swhid, + } + ) + ) + + return json_path + + +@click.group(help=__doc__.split("\n\n")[0]) +def main(): + pass + + +@main.command() +def man(): + """Display script's manual.""" + click.echo(__doc__) + + +@main.command("repository") +@click.argument("script") +def build_repository(script: str): + """Build a repository. + + SCRIPT must be is a bash script with a `.sh` suffix + + The generated repository will have the same path minor the `.sh` suffix. + + The script will be passed repository name as the `HG_REPO` environment variable. + """ + _build_repository(script) + + +@main.command("json") +@click.argument("source") +def build_json(source: str): + """Build a json file of object identities. + + SOURCE can be a script as required by the `repository` command + (see repository --help), a repository archive, or an existing repository. + + The produced file will have the source path with the `.json` suffix. + """ + _build_json(source) + + +@main.command("archive") +@click.option( + "--clean", "-c", default=False, is_flag=True, help="Remove created artifacts", +) +@click.argument("source") +def build_archive(source: str, clean: bool = False): + """Build a repository archive and is associated json file. + + SOURCE can be a script as required by the `repository` command + (see repository --help), or an existing repository. + + The produced archive will have the source path with the `.tgz` suffix. + The produced json file will have the source path with the `.json` suffix. + """ + if source.endswith(".sh"): + repository_path = _build_repository(source) + else: + repository_path = Path(source).absolute() + if not (repository_path / ".hg").exists(): + abort(f"{str(repository_path)!r} is not a Mercurial repository") + + json_path = _build_json(str(repository_path)) + + archive_path = repository_path.with_suffix(".tgz") + if archive_path.exists(): + backup(archive_path) + + subprocess.call( + [ + "tar", + "-cf", + archive_path.relative_to(archive_path.parent), + repository_path.relative_to(archive_path.parent), + json_path.relative_to(archive_path.parent), + ], + cwd=archive_path.parent, + ) + + if clean: + shutil.rmtree(repository_path) + json_path.unlink() + + +if __name__ == "__main__": + main()