Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/tests/test_git_bare_cooker.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
""" | """ | ||||
This module contains additional tests for the bare cooker. | This module contains additional tests for the bare cooker. | ||||
Generic cooker tests (eg. without swh-graph) in test_cookers.py also | Generic cooker tests (eg. without swh-graph) in test_cookers.py also | ||||
run on the bare cooker. | run on the bare cooker. | ||||
""" | """ | ||||
import datetime | import datetime | ||||
import glob | |||||
import io | import io | ||||
import itertools | |||||
import subprocess | import subprocess | ||||
import tarfile | import tarfile | ||||
import tempfile | import tempfile | ||||
import unittest.mock | import unittest.mock | ||||
import pytest | import pytest | ||||
from swh.model.from_disk import DentryPerms | from swh.model.from_disk import DentryPerms | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
DirectoryEntry, | DirectoryEntry, | ||||
ObjectType, | |||||
Person, | Person, | ||||
Release, | |||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Snapshot, | |||||
SnapshotBranch, | |||||
TargetType, | |||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.vault.cookers.git_bare import GitBareCooker | from swh.vault.cookers.git_bare import GitBareCooker | ||||
from swh.vault.in_memory_backend import InMemoryVaultBackend | from swh.vault.in_memory_backend import InMemoryVaultBackend | ||||
def get_objects(last_revision_in_graph): | def get_objects(up_to_date_graph, release): | ||||
""" | """ | ||||
Build objects:: | Build objects:: | ||||
rel2 | |||||
| | |||||
v | |||||
rev1 <------ rev2 | rev1 <------ rev2 | ||||
| | | | | | ||||
v v | v v | ||||
dir1 dir2 | dir1 dir2 | ||||
| / | | | / | | ||||
v / v | v / v | ||||
cnt1 <----° cnt2 | cnt1 <----° cnt2 | ||||
If up_to_date_graph is true, then swh-graph contains all objects. | |||||
Else, rev2, rel2, and the snapshot are missing from the graph. | |||||
""" | """ | ||||
date = TimestampWithTimezone.from_datetime( | date = TimestampWithTimezone.from_datetime( | ||||
datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) | datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) | ||||
) | ) | ||||
author = Person.from_fullname(b"Foo <foo@example.org>") | author = Person.from_fullname(b"Foo <foo@example.org>") | ||||
cnt1 = Content.from_data(b"hello") | cnt1 = Content.from_data(b"hello") | ||||
cnt2 = Content.from_data(b"world") | cnt2 = Content.from_data(b"world") | ||||
dir1 = Directory( | dir1 = Directory( | ||||
Show All 39 Lines | rev2 = Revision( | ||||
author=author, | author=author, | ||||
committer=author, | committer=author, | ||||
directory=dir2.id, | directory=dir2.id, | ||||
parents=(rev1.id,), | parents=(rev1.id,), | ||||
type=RevisionType.GIT, | type=RevisionType.GIT, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) | ||||
if last_revision_in_graph: | rel2 = Release( | ||||
nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2]] | name=b"1.0.0", | ||||
message=b"tag2", | |||||
target_type=ObjectType.REVISION, | |||||
target=rev2.id, | |||||
synthetic=True, | |||||
) | |||||
branches = { | |||||
b"refs/heads/master": SnapshotBranch( | |||||
target=rev2.id, target_type=TargetType.REVISION | |||||
) | |||||
} | |||||
if release: | |||||
branches[b"refs/tags/1.0.0"] = SnapshotBranch( | |||||
target=rel2.id, target_type=TargetType.RELEASE | |||||
) | |||||
snp = Snapshot(branches=branches) | |||||
if up_to_date_graph: | |||||
nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]] | |||||
edges = [ | edges = [ | ||||
(str(s.swhid()), str(d.swhid())) | |||||
for (s, d) in [ | |||||
(dir1, cnt1), | (dir1, cnt1), | ||||
(dir2, cnt1), | (dir2, cnt1), | ||||
(dir2, cnt2), | (dir2, cnt2), | ||||
(rev1, dir1), | (rev1, dir1), | ||||
(rev2, dir2), | (rev2, dir2), | ||||
(rev2, rev1), | (rev2, rev1), | ||||
(snp, rev2), | |||||
] | ] | ||||
] | if release: | ||||
nodes.append(str(rel2.swhid())) | |||||
edges.append((rel2, rev2)) | |||||
edges.append((snp, rel2)) | |||||
else: | else: | ||||
nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] | nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] | ||||
if release: | |||||
nodes.append(str(rel2.swhid())) | |||||
edges = [ | edges = [ | ||||
(str(s.swhid()), str(d.swhid())) | (dir1, cnt1), | ||||
for (s, d) in [(dir1, cnt1), (dir2, cnt1), (dir2, cnt2), (rev1, dir1),] | (dir2, cnt1), | ||||
(dir2, cnt2), | |||||
(rev1, dir1), | |||||
] | ] | ||||
return (cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) | edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] | ||||
return (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) | |||||
@pytest.mark.graph | @pytest.mark.graph | ||||
@pytest.mark.parametrize("last_revision_in_graph", [True, False]) | @pytest.mark.parametrize( | ||||
def test_graph_revisions(swh_storage, last_revision_in_graph): | "snapshot,up_to_date_graph,release", | ||||
list(itertools.product([False], [True, False], [False])) # no snp implies no rel | |||||
+ list(itertools.product([True], [True, False], [True, False])), | |||||
) | |||||
def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release): | |||||
from swh.graph.naive_client import NaiveClient as GraphClient | from swh.graph.naive_client import NaiveClient as GraphClient | ||||
(cnt1, cnt2, dir1, dir2, rev1, rev2, nodes, edges) = get_objects( | (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) = get_objects( | ||||
last_revision_in_graph | up_to_date_graph, release=release, | ||||
) | ) | ||||
# Add all objects to storage | # Add all objects to storage | ||||
swh_storage.content_add([cnt1, cnt2]) | swh_storage.content_add([cnt1, cnt2]) | ||||
swh_storage.directory_add([dir1, dir2]) | swh_storage.directory_add([dir1, dir2]) | ||||
swh_storage.revision_add([rev1, rev2]) | swh_storage.revision_add([rev1, rev2]) | ||||
swh_storage.release_add([rel2]) | |||||
swh_storage.snapshot_add([snp]) | |||||
# Add spy on swh_storage, to make sure revision_log is not called | # Add spy on swh_storage, to make sure revision_log is not called | ||||
# (the graph must be used instead) | # (the graph must be used instead) | ||||
swh_storage = unittest.mock.MagicMock(wraps=swh_storage) | swh_storage = unittest.mock.MagicMock(wraps=swh_storage) | ||||
# Add all objects to graph | # Add all objects to graph | ||||
swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges)) | swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges)) | ||||
# Cook | # Cook | ||||
backend = InMemoryVaultBackend() | backend = InMemoryVaultBackend() | ||||
if snapshot: | |||||
cooker_name = "snapshot_gitbare" | |||||
cooked_id = snp.id | |||||
else: | |||||
cooker_name = "revision_gitbare" | |||||
cooked_id = rev2.id | |||||
cooker = GitBareCooker( | cooker = GitBareCooker( | ||||
"revision_gitbare", | cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, | ||||
rev2.id, | |||||
backend=backend, | |||||
storage=swh_storage, | |||||
graph=swh_graph, | |||||
) | ) | ||||
cooker.cook() | cooker.cook() | ||||
# Get bundle | # Get bundle | ||||
bundle = backend.fetch("revision_gitbare", rev2.id) | bundle = backend.fetch(cooker_name, cooked_id) | ||||
# Extract bundle and make sure both revisions are in it | # Extract bundle and make sure both revisions are in it | ||||
with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: | with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: | ||||
with tarfile.open(fileobj=io.BytesIO(bundle)) as tf: | with tarfile.open(fileobj=io.BytesIO(bundle)) as tf: | ||||
tf.extractall(tempdir) | tf.extractall(tempdir) | ||||
output = subprocess.check_output( | output = subprocess.check_output( | ||||
[ | [ | ||||
"git", | "git", | ||||
"-C", | "-C", | ||||
f"{tempdir}/{rev2.swhid()}.git", | glob.glob(f"{tempdir}/*{cooked_id.hex()}.git")[0], | ||||
"log", | "log", | ||||
"--format=oneline", | "--format=oneline", | ||||
"--decorate=", | "--decorate=", | ||||
] | ] | ||||
) | ) | ||||
assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n" | assert output.decode() == f"{rev2.id.hex()} msg2\n{rev1.id.hex()} msg1\n" | ||||
# Make sure the graph was used instead of swh_storage.revision_log | # Make sure the graph was used instead of swh_storage.revision_log | ||||
swh_graph.visit_nodes.assert_called_once_with(str(rev2.swhid()), edges="rev:rev") | if snapshot: | ||||
if last_revision_in_graph: | if up_to_date_graph: | ||||
# The graph has everything, so the first call succeeds and returns | |||||
# all objects transitively pointed by the snapshot | |||||
swh_graph.visit_nodes.assert_has_calls( | |||||
[unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"),] | |||||
) | |||||
else: | |||||
# The graph does not have everything, so the first call returns nothing. | |||||
# However, the second call (on the top rev) succeeds and returns | |||||
# all objects but the rev and the rel | |||||
swh_graph.visit_nodes.assert_has_calls( | |||||
[ | |||||
unittest.mock.call(str(snp.swhid()), edges="snp:*,rel:*,rev:rev"), | |||||
unittest.mock.call(str(rev2.swhid()), edges="rev:rev"), | |||||
] | |||||
) | |||||
else: | |||||
swh_graph.visit_nodes.assert_has_calls( | |||||
[unittest.mock.call(str(rev2.swhid()), edges="rev:rev")] | |||||
) | |||||
if up_to_date_graph: | |||||
swh_storage.revision_log.assert_not_called() | swh_storage.revision_log.assert_not_called() | ||||
swh_storage.revision_shortlog.assert_not_called() | swh_storage.revision_shortlog.assert_not_called() | ||||
else: | else: | ||||
swh_storage.revision_log.assert_called() | swh_storage.revision_log.assert_called() |