Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/mercurial/tests/data/make_expected.py
- This file was added.
Property | Old Value | New Value |
---|---|---|
File Mode | null | 100755 |
#!/usr/bin/env python | |||||
""" Extract expected disk load data to be used in loder tests. | |||||
This script is used as part of make_example.sh | |||||
""" | |||||
import hashlib | |||||
import json | |||||
import subprocess | |||||
import sys | |||||
class Repository: | |||||
def __init__(self, path): | |||||
self._path = path | |||||
def __call__(self, *args): | |||||
return subprocess.check_output(["git", *args], cwd=self._path) | |||||
def commits(self): | |||||
return self("log", "--format=%H", "--branches").decode().splitlines() | |||||
def cat_file_content(self, hash): | |||||
return self("cat-file", "-p", hash) | |||||
def refs(self): | |||||
result = [ | |||||
line.split(" ", 2) | |||||
for line in self( | |||||
"for-each-ref", "--format=%(objectname) %(objecttype) %(refname)" | |||||
) | |||||
.decode() | |||||
.splitlines() | |||||
] | |||||
return result | |||||
def heads(self): | |||||
return [ | |||||
{"target": objectname, "name": refname[len("refs/heads/") :]} | |||||
for objectname, objecttype, refname in self.refs() | |||||
if refname.startswith("refs/heads/") | |||||
] | |||||
def tags(self): | |||||
result = [ | |||||
{"target": objectname, "name": refname[len("refs/tags/") :]} | |||||
for objectname, objecttype, refname in self.refs() | |||||
if refname.startswith("refs/tags/") | |||||
] | |||||
return result | |||||
def head_ref(self): | |||||
return self("describe", "--all", "HEAD") | |||||
def git_obj_from_line(repo, line): | |||||
obj_perm, obj_type, obj_hash, obj_name = line.decode().split() | |||||
type_map = { | |||||
"tree": Tree, | |||||
"blob": Blob, | |||||
} | |||||
return type_map[obj_type](repo, obj_hash, obj_perm, obj_name) | |||||
class GitObj: | |||||
def __init__(self, repo, hash): | |||||
self._repo = repo | |||||
self._hash = hash | |||||
def id(self): | |||||
return self._hash | |||||
def cat_file(self): | |||||
return self._repo.cat_file_content(self._hash) | |||||
class Commit(GitObj): | |||||
def __init__(self, repo, hash, commits): | |||||
super().__init__(repo, hash) | |||||
self._commits = commits | |||||
self._tree = None | |||||
self._object = None | |||||
def id(self): | |||||
return self.object()["id"] | |||||
def tree(self): | |||||
if self._tree is None: | |||||
self._tree = Tree(self._repo, self.object()["tree"]) | |||||
return self._tree | |||||
def rm_hg_metadata(self, data): | |||||
""" Removes metadata added by hg-git. """ | |||||
lines = [] | |||||
for line in data.split(b"\n"): | |||||
if line.startswith(b"HG:"): | |||||
continue | |||||
if b"--HG--" in line: | |||||
break | |||||
lines.append(line) | |||||
return b"\n".join(lines).strip() | |||||
def cat_parts(self): | |||||
""" Return commit object parts. | |||||
Used to rehash the commit id with removed hg-git metadata. | |||||
""" | |||||
data = self.rm_hg_metadata(self.cat_file()) | |||||
parts = {"parents": []} | |||||
line, remaining = data.split(b"\n", 1) | |||||
parts["tree"] = line.split()[1] | |||||
while remaining.startswith(b"parent"): | |||||
line, remaining = remaining.split(b"\n", 1) | |||||
parts["parents"].append(line.split()[1]) | |||||
author, committer, _, message = remaining.split(b"\n", 3) | |||||
parts["author"] = author.split(b" ", 1)[1] | |||||
parts["committer"] = committer.split(b" ", 1)[1] | |||||
parts["message"] = message | |||||
return parts | |||||
def object(self): | |||||
""" Return the object as dict for test expected result. """ | |||||
if self._object is None: | |||||
parts = self.cat_parts() | |||||
self._object = { | |||||
"type": "commit", | |||||
"tree": parts["tree"].decode(), | |||||
"parents": [], | |||||
"author": parts["author"].decode(), | |||||
"committer": parts["committer"].decode(), | |||||
"message": parts["message"].decode(), | |||||
} | |||||
# replace parents with re-hashed ids (removed hg-git metadata) | |||||
for parent in map(bytes.decode, parts["parents"]): | |||||
if parent not in self._commits: | |||||
self._commits[parent] = Commit(self._repo, parent, self._commits) | |||||
self._object["parents"].append(self._commits[parent].id()) | |||||
parts["parents"] = [parent.encode() for parent in self._object["parents"]] | |||||
# re-hash id because parents id has changed | |||||
self._object["id"] = commit_id_from_parts(parts) | |||||
return self._object | |||||
def objects(self): | |||||
""" Return a stream objects composing the commit. """ | |||||
yield from self.tree().objects() | |||||
yield self.object() | |||||
def commit_id_from_parts(parts): | |||||
data = b"\n".join( | |||||
[ | |||||
b"tree " + parts["tree"], | |||||
*[b"parent " + parent for parent in parts["parents"]], | |||||
b"author " + parts["author"], | |||||
b"committer " + parts["committer"], | |||||
b"\n" + parts["message"], | |||||
] | |||||
) | |||||
return commit_id(data) | |||||
def commit_id(data): | |||||
data_len = str(len(data)).encode() | |||||
commit = b"commit %b\0%b" % (data_len, data) | |||||
hash = hashlib.sha1(commit) | |||||
return hash.hexdigest() | |||||
class Tree(GitObj): | |||||
def __init__(self, repo, hash, perm=None, name=None): | |||||
super().__init__(repo, hash) | |||||
self._perm = perm | |||||
self._name = name | |||||
self._items = None | |||||
def items(self): | |||||
if self._items is None: | |||||
self._items = [ | |||||
git_obj_from_line(self._repo, line) | |||||
for line in self.cat_file().splitlines() | |||||
] | |||||
return self._items | |||||
def content(self): | |||||
for obj in self.items(): | |||||
yield from obj.content() | |||||
def object(self): | |||||
""" Return the object as dict for test expected result. """ | |||||
return { | |||||
"type": "tree", | |||||
"perm": self._perm, | |||||
"id": self.id(), | |||||
"name": self._name, | |||||
"objects": [obj.id() for obj in self.items()], | |||||
} | |||||
def objects(self): | |||||
""" Return a stream objects composing the tree. """ | |||||
for obj in self.items(): | |||||
yield from obj.objects() | |||||
yield self.object() | |||||
class Blob(GitObj): | |||||
def __init__(self, repo, hash, perm=None, name=None): | |||||
super().__init__(repo, hash) | |||||
self._perm = perm | |||||
self._name = name | |||||
def content(self): | |||||
yield self._hash | |||||
def object(self): | |||||
""" Return the object as dict for test expected result. """ | |||||
return { | |||||
"type": "blob", | |||||
"perm": self._perm, | |||||
"id": self.id(), | |||||
"name": self._name, | |||||
"cat-file": self.cat_file().decode(), | |||||
} | |||||
def objects(self): | |||||
""" Return self as a stream. """ | |||||
yield self.object() | |||||
def make_expected(path): | |||||
repo = Repository(path) | |||||
commits_cache = {} | |||||
objects = [] | |||||
for hash in repo.commits(): | |||||
# calling commit.objects() will add parent commits | |||||
# make sure to not redo the work twice | |||||
if hash not in commits_cache: | |||||
commits_cache[hash] = Commit(repo, hash, commits_cache) | |||||
objects.extend(commits_cache[hash].objects()) | |||||
refs = {} | |||||
for tag in repo.tags(): | |||||
target = commits_cache[tag["target"]].id() | |||||
refs[f"tag/{tag['name']}"] = { | |||||
"target": target, | |||||
"target_type": "revision", | |||||
} | |||||
seen_branches = [] | |||||
for head in sorted(repo.heads(), key=lambda h: h["name"]): | |||||
target = commits_cache[head["target"]].id() | |||||
rev, name = head["name"].split("-", 1) | |||||
if name in seen_branches: | |||||
refs[f"wild-branch/{name}/{target}"] = { | |||||
"target": target, | |||||
"target_type": "revision", | |||||
} | |||||
else: | |||||
refs[f"branch/{name}"] = { | |||||
"target": target, | |||||
"target_type": "revision", | |||||
} | |||||
seen_branches.append(name) | |||||
return {"objects": {obj["id"]: obj for obj in objects}, "branches": refs} | |||||
if __name__ == "__main__": | |||||
json.dump( | |||||
make_expected(sys.argv[1]), open(sys.argv[2], "w"), indent=2, | |||||
) |