diff --git a/swh/provenance/tests/data/generate_repo.py b/swh/provenance/tests/data/generate_repo.py --- a/swh/provenance/tests/data/generate_repo.py +++ b/swh/provenance/tests/data/generate_repo.py @@ -61,6 +61,19 @@ ) for rev_d in repo_desc: + parents = rev_d.get("parents") + if parents: + # move at the proper (first) parent position, if any + check_call(["git", "checkout", parents[0]], stdout=PIPE) + + # give a branch name (the msg) to each commit to make it esier to + # navigate in history + check_call(["git", "checkout", "-b", rev_d["msg"]], stdout=PIPE) + + if parents and len(parents) > 1: + # it's a merge + check_call(["git", "merge", "--no-commit", *parents[1:]], stdout=PIPE) + clean_wd() for path, content in rev_d["content"].items(): p = pathlib.Path(path) @@ -78,6 +91,7 @@ "git", "commit", "--all", + "--allow-empty", "-m", rev_d["msg"], ], @@ -92,7 +106,7 @@ @click.option("-C", "--clean-output/--no-clean-output", default=False) def main(input_file, output_dir, clean_output): repo_desc = yaml.load(open(input_file)) - if clean_output: + if clean_output and os.path.exists(output_dir): shutil.rmtree(output_dir) generate_repo(repo_desc, output_dir) diff --git a/swh/provenance/tests/data/generate_storage_from_git.py b/swh/provenance/tests/data/generate_storage_from_git.py --- a/swh/provenance/tests/data/generate_storage_from_git.py +++ b/swh/provenance/tests/data/generate_storage_from_git.py @@ -78,8 +78,8 @@ @click.option( "-r", "--head", - default="master", - help="head revision to start from", + default=None, + help="head revision to start from; if unset, load all the headss", ) @click.option("-o", "--output", default=None, help="output file") @click.argument("git-repo") @@ -95,18 +95,30 @@ if output is None: output = f"{git_repo}.msgpack" - if not re.match("[0-9a-fA-F]{40}", head): - headhash = ( - check_output(["git", "-C", git_repo, "rev-parse", head]).decode().strip() + if head is None: + # head is None, list all branches + heads = ( + check_output(["git", "-C", git_repo, "show-ref", "--heads", "-s"]) + .decode() + .splitlines() ) - click.echo(f"Revision hash for {head} is {headhash}") else: - headhash = head + if not re.match("[0-9a-fA-F]{40}", head): + click.echo(f"Revision hash for {head} ", nl=False) + head = ( + check_output(["git", "-C", git_repo, "rev-parse", head]) + .decode() + .strip() + ) + click.echo(f"is {head}") + heads = [head] + cache: Dict[bytes, dict] = {} outf = open(output, "wb") outd = [] - for e in dump_git_revision(h2b(headhash), storage=sto, cache=cache): - outd.append(e) + for head in heads: + for e in dump_git_revision(h2b(head), storage=sto, cache=cache): + outd.append(e) outf.write(msgpack_dumps(outd)) click.echo(f"Wrote {len(outd)} objects in {output}") diff --git a/swh/provenance/tests/data/repo_with_merges.yaml b/swh/provenance/tests/data/repo_with_merges.yaml new file mode 100644 --- /dev/null +++ b/swh/provenance/tests/data/repo_with_merges.yaml @@ -0,0 +1,73 @@ +# generate a git history with a multi-merge revision +# *-. R08 +# |\ \ +# | * | R07 +# | | | +# | | * R06 +# | | | +# * | | R05 +# | | | +# * | | R04 +# |/ | +# * | R03 +# | / +# * / R02 +# |/ +# * R01 +# | +# * R00 +- msg: R00 + date: 1000000000 + content: + A/B/C/a: "content a" +- msg: R01 + date: 1000000010 + content: + A/B/C/a: "content a" + A/B/C/b: "content b" +- msg: R02 + date: 1000000020 + content: + A/C/a: "content a" + A/C/b: "content b" +- msg: R03 + date: 1000000030 + content: + A/B/C/a: "content a" + A/B/C/b: "content b" +- msg: R04 + date: 1000000040 + content: + A/C/a: "content a" + A/C/b: "content b" +- msg: R05 + date: 1000000050 + content: + A/B/C/a: "content a" + A/B/C/b: "content b" + A/B/c: "content c" +- msg: R06 + parents: + - R01 + date: 1000000005 # /!\ we add an earlier version of the 'b' file + content: + A/B/C/a: "content a" + A/B/C/b: "content b" +- msg: R07 + parents: + - R03 + date: 1000000035 # /!\ we add an earlier version of the 'b' file + content: + A/B/C/a: "content a" + A/B/C/b: "content b" + A/B/c: "content c" +- msg: R08 + parents: + - R05 + - R06 + - R07 + date: 1000000060 + content: + A/B/C/a: "content a" + A/B/C/b: "content b" + A/B/c: "content c"