diff --git a/swh/loader/mercurial/tests/data/build.py b/swh/loader/mercurial/tests/data/build.py new file mode 100755 --- /dev/null +++ b/swh/loader/mercurial/tests/data/build.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +# Copyright (C) 2018-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import shutil +import subprocess +from datetime import datetime +from pathlib import Path + +import click + + +def abort(message): + """Abort the script with a message.""" + click.echo(message, err=True) + click.get_current_context().abort() + + +def backup(path: Path): + """Rename an existing path.""" + click.echo(f"Creating backup of {path}") + now = datetime.now() + backup_path = path.with_suffix(f"{path.suffix}.bak.{now:%Y%m%d%H%M%S}") + path.rename(backup_path) + click.echo(f"Backup created: {str(backup_path)!r}") + + +def _build_repository(script: str) -> Path: + """Build a repository from a bash script.""" + script_path = Path(script).absolute() + + if not script_path.exists(): + abort(f"Path {script_path!r} does not exists.") + + if script_path.suffix != ".sh": + abort(f"Wrong suffix: {script_path.suffix!r}. Expected: '.sh'") + + repository_path = script_path.with_suffix("") + + if repository_path.exists(): + backup(repository_path) + + click.echo(f"Running build script: {str(script_path)!r}") + subprocess.call( + ["bash", "-euo", "pipefail", script_path], env={"HG_REPO": str(repository_path)} + ) + + return repository_path + + +def _build_json(source: str) -> Path: + if source.endswith(".tgz"): + archive_path = Path(source).absolute() + repository_path = archive_path.with_suffix("") + + if repository_path.exists(): + backup(repository_path) + + subprocess.call(["tar", "-xf", archive_path], cwd=archive_path.parent) + elif source.endswith(".sh"): + repository_path = _build_repository(source) + else: + repository_path = Path(source).absolute() + + click.echo(f"Extracting object identities: {str(repository_path)!r}") + output = subprocess.check_output(["swh-hg-identify", "all"], cwd=repository_path) + lines = output.decode().splitlines() + + directory_swhids = [] + revision_swhids = [] + release_swhids = [] + + for line in lines: + uri, _ = line.split("\t") + _, _, swhid_type, swhid = uri.split(":") + if swhid_type == "dir": + directory_swhids.append(swhid) + elif swhid_type == "rev": + revision_swhids.append(swhid) + elif swhid_type == "rel": + release_swhids.append(swhid) + elif swhid_type == "snp": + snapshot_swhid = swhid + else: + abort(f"{line!r} unknown type {swhid_type!r}") + + json_path = repository_path.with_suffix(".json") + + if json_path.exists(): + backup(json_path) + + click.echo(f"Creating object identities file: {str(json_path)!r}") + json_path.write_text( + json.dumps( + { + "directories": directory_swhids, + "revisions": revision_swhids, + "releases": release_swhids, + "snapshot": snapshot_swhid, + } + ) + ) + + return json_path + + +@click.group() +def main(): + """Build example repositories archive from bash scripts.""" + + +@main.command("repository") +@click.argument("script") +def build_repository(script: str): + """Build a repository. + + SCRIPT must be is a bash script with a `.sh` suffix + + The generated repository will have the same path minor the `.sh` suffix. + + The script will be executed in an already initialized repository. + So it only need to execute commands to populate the repository. + """ + _build_repository(script) + + +@main.command("json") +@click.argument("source") +def build_json(source: str): + """Build a json file of object identities. + + SOURCE can be a script as required by the `repository` command + (see repository --help), a repository archive, or an existing repository. + + The produced file will have the source path the `.json` suffix. + """ + _build_json(source) + + +@main.command("archive") +@click.option( + "--clean", "-c", default=False, is_flag=True, help="Remove created artifacts", +) +@click.argument("source") +def build_archive(source: str, clean: bool = False): + """Build a repository archive. + + SOURCE can be a script as required by the `repository` command + (see repository --help), or an existing repository. + + The produced archive will have the source path with the `.tgz` suffix. + It will contain the repository along with the json file of object identities. + """ + if source.endswith(".sh"): + repository_path = _build_repository(source) + else: + repository_path = Path(source).absolute() + if not (repository_path / ".hg").exists(): + abort(f"{str(repository_path)!r} is not a Mercurial repository") + + json_path = _build_json(str(repository_path)) + + archive_path = repository_path.with_suffix(".tgz") + if archive_path.exists(): + backup(archive_path) + + subprocess.call( + [ + "tar", + "-cf", + archive_path.relative_to(archive_path.parent), + repository_path.relative_to(archive_path.parent), + json_path.relative_to(archive_path.parent), + ], + cwd=archive_path.parent, + ) + + if clean: + shutil.rmtree(repository_path) + json_path.unlink() + + +if __name__ == "__main__": + main()