Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/tests/test_git_bare_cooker.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
""" | """ | ||||
This module contains additional tests for the bare cooker. | This module contains additional tests for the bare cooker. | ||||
Generic cooker tests (eg. without swh-graph) in test_cookers.py also | Generic cooker tests (eg. without swh-graph) in test_cookers.py also | ||||
run on the bare cooker. | run on the bare cooker. | ||||
""" | """ | ||||
import datetime | import datetime | ||||
import glob | import glob | ||||
import io | import io | ||||
import itertools | |||||
import subprocess | import subprocess | ||||
import tarfile | import tarfile | ||||
import tempfile | import tempfile | ||||
import unittest.mock | import unittest.mock | ||||
import pytest | import pytest | ||||
from pytest import param | |||||
from swh.model.from_disk import DentryPerms | from swh.model.from_disk import DentryPerms | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Content, | Content, | ||||
Directory, | Directory, | ||||
DirectoryEntry, | DirectoryEntry, | ||||
ObjectType, | ObjectType, | ||||
Person, | Person, | ||||
Release, | Release, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.vault.cookers.git_bare import GitBareCooker | from swh.vault.cookers.git_bare import GitBareCooker | ||||
from swh.vault.in_memory_backend import InMemoryVaultBackend | from swh.vault.in_memory_backend import InMemoryVaultBackend | ||||
def get_objects(up_to_date_graph, release): | def get_objects(up_to_date_graph, release, tree_ref): | ||||
""" | """ | ||||
Build objects:: | Build objects:: | ||||
rel2 <------ snp | rel2 <------ snp | ||||
| / | |||||
v / | |||||
rev1 <------ rev2 <----° | |||||
| | | |||||
v v | |||||
dir1 dir2 | |||||
| / | | | / | | ||||
v / v | v / v | ||||
cnt1 <----° cnt2 | rev1 <------ rev2 <----° dir4 | ||||
| | | | |||||
v v v | |||||
dir1 dir2 dir3 | |||||
| / | | | |||||
v / v v | |||||
cnt1 <----° cnt2 cnt3 | |||||
If up_to_date_graph is true, then swh-graph contains all objects. | If up_to_date_graph is true, then swh-graph contains all objects. | ||||
Else, rev2, rel2, and the snapshot are missing from the graph. | Else, dir4, rev2, rel2, and snp are missing from the graph. | ||||
""" | """ | ||||
date = TimestampWithTimezone.from_datetime( | date = TimestampWithTimezone.from_datetime( | ||||
datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) | datetime.datetime(2021, 5, 7, 8, 43, 59, tzinfo=datetime.timezone.utc) | ||||
) | ) | ||||
author = Person.from_fullname(b"Foo <foo@example.org>") | author = Person.from_fullname(b"Foo <foo@example.org>") | ||||
cnt1 = Content.from_data(b"hello") | cnt1 = Content.from_data(b"hello") | ||||
cnt2 = Content.from_data(b"world") | cnt2 = Content.from_data(b"world") | ||||
cnt3 = Content.from_data(b"!") | |||||
dir1 = Directory( | dir1 = Directory( | ||||
entries=( | entries=( | ||||
DirectoryEntry( | DirectoryEntry( | ||||
name=b"file1", | name=b"file1", | ||||
type="file", | type="file", | ||||
perms=DentryPerms.content, | perms=DentryPerms.content, | ||||
target=cnt1.sha1_git, | target=cnt1.sha1_git, | ||||
), | ), | ||||
Show All 10 Lines | dir2 = Directory( | ||||
DirectoryEntry( | DirectoryEntry( | ||||
name=b"file2", | name=b"file2", | ||||
type="file", | type="file", | ||||
perms=DentryPerms.content, | perms=DentryPerms.content, | ||||
target=cnt2.sha1_git, | target=cnt2.sha1_git, | ||||
), | ), | ||||
) | ) | ||||
) | ) | ||||
dir3 = Directory( | |||||
entries=( | |||||
DirectoryEntry( | |||||
name=b"file3", | |||||
type="file", | |||||
perms=DentryPerms.content, | |||||
target=cnt3.sha1_git, | |||||
), | |||||
) | |||||
) | |||||
dir4 = Directory( | |||||
entries=( | |||||
DirectoryEntry( | |||||
name=b"directory3", | |||||
type="dir", | |||||
perms=DentryPerms.directory, | |||||
target=dir3.id, | |||||
), | |||||
) | |||||
) | |||||
rev1 = Revision( | rev1 = Revision( | ||||
message=b"msg1", | message=b"msg1", | ||||
date=date, | date=date, | ||||
committer_date=date, | committer_date=date, | ||||
author=author, | author=author, | ||||
committer=author, | committer=author, | ||||
directory=dir1.id, | directory=dir1.id, | ||||
type=RevisionType.GIT, | type=RevisionType.GIT, | ||||
Show All 17 Lines | rel2 = Release( | ||||
target_type=ObjectType.REVISION, | target_type=ObjectType.REVISION, | ||||
target=rev2.id, | target=rev2.id, | ||||
synthetic=True, | synthetic=True, | ||||
) | ) | ||||
branches = { | branches = { | ||||
b"refs/heads/master": SnapshotBranch( | b"refs/heads/master": SnapshotBranch( | ||||
target=rev2.id, target_type=TargetType.REVISION | target=rev2.id, target_type=TargetType.REVISION | ||||
) | ), | ||||
} | } | ||||
if release: | if release: | ||||
branches[b"refs/tags/1.0.0"] = SnapshotBranch( | branches[b"refs/tags/1.0.0"] = SnapshotBranch( | ||||
target=rel2.id, target_type=TargetType.RELEASE | target=rel2.id, target_type=TargetType.RELEASE | ||||
) | ) | ||||
if tree_ref: | |||||
branches[b"refs/heads/tree-ref"] = SnapshotBranch( | |||||
target=dir4.id, target_type=TargetType.DIRECTORY | |||||
) | |||||
snp = Snapshot(branches=branches) | snp = Snapshot(branches=branches) | ||||
if up_to_date_graph: | if up_to_date_graph: | ||||
nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1, rev2, snp]] | nodes = [cnt1, cnt2, dir1, dir2, rev1, rev2, snp] | ||||
edges = [ | edges = [ | ||||
(dir1, cnt1), | (dir1, cnt1), | ||||
(dir2, cnt1), | (dir2, cnt1), | ||||
(dir2, cnt2), | (dir2, cnt2), | ||||
(rev1, dir1), | (rev1, dir1), | ||||
(rev2, dir2), | (rev2, dir2), | ||||
(rev2, rev1), | (rev2, rev1), | ||||
(snp, rev2), | (snp, rev2), | ||||
] | ] | ||||
if release: | if release: | ||||
nodes.append(str(rel2.swhid())) | nodes.append(rel2) | ||||
edges.append((rel2, rev2)) | edges.append((rel2, rev2)) | ||||
edges.append((snp, rel2)) | edges.append((snp, rel2)) | ||||
if tree_ref: | |||||
nodes.extend([cnt3, dir3, dir4]) | |||||
edges.extend( | |||||
[(dir3, cnt3), (dir4, dir3), (snp, dir4),] | |||||
) | |||||
else: | else: | ||||
nodes = [str(n.swhid()) for n in [cnt1, cnt2, dir1, dir2, rev1]] | nodes = [cnt1, cnt2, cnt3, dir1, dir2, dir3, rev1] | ||||
if release: | |||||
nodes.append(str(rel2.swhid())) | |||||
edges = [ | edges = [ | ||||
(dir1, cnt1), | (dir1, cnt1), | ||||
(dir2, cnt1), | (dir2, cnt1), | ||||
(dir2, cnt2), | (dir2, cnt2), | ||||
(dir3, cnt3), | |||||
(rev1, dir1), | (rev1, dir1), | ||||
] | ] | ||||
if release: | |||||
nodes.append(rel2) | |||||
if tree_ref: | |||||
nodes.extend([cnt3, dir3]) | |||||
edges.extend([(dir3, cnt3)]) | |||||
nodes = [str(n.swhid()) for n in nodes] | |||||
edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] | edges = [(str(s.swhid()), str(d.swhid())) for (s, d) in edges] | ||||
return (cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) | r = (cnt1, cnt2, cnt3, dir1, dir2, dir3, dir4, rev1, rev2, rel2, snp, nodes, edges) | ||||
return r | |||||
@pytest.mark.graph | @pytest.mark.graph | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"snapshot,up_to_date_graph,release", | "snapshot,up_to_date_graph,release,tree_ref", | ||||
list(itertools.product([False], [True, False], [False])) # no snp implies no rel | [ | ||||
+ list(itertools.product([True], [True, False], [True, False])), | # 'no snp' imples no release or tree, because there can only be one root object. | ||||
anlambert: implies | |||||
param(False, False, False, False, id="no snp, outdated graph, no release/tree"), | |||||
param(False, True, False, False, id="no snp, updated graph, no release/tree"), | |||||
param(True, False, False, False, id="snp, outdated graph, no release/tree"), | |||||
param(True, True, False, False, id="snp, updated graph, no release/tree"), | |||||
param(True, False, True, False, id="snp, outdated graph, w/ release, no tree"), | |||||
param(True, True, True, False, id="snp, updated graph, w/ release, no tree"), | |||||
param(True, False, True, True, id="snp, outdated graph, w/ release and tree"), | |||||
param(True, True, True, True, id="snp, updated graph, w/ release and tree"), | |||||
], | |||||
) | ) | ||||
def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release): | def test_graph_revisions(swh_storage, up_to_date_graph, snapshot, release, tree_ref): | ||||
from swh.graph.naive_client import NaiveClient as GraphClient | from swh.graph.naive_client import NaiveClient as GraphClient | ||||
(cnt1, cnt2, dir1, dir2, rev1, rev2, rel2, snp, nodes, edges) = get_objects( | r = get_objects(up_to_date_graph, release=release, tree_ref=tree_ref) | ||||
up_to_date_graph, release=release, | (cnt1, cnt2, cnt3, dir1, dir2, dir3, dir4, rev1, rev2, rel2, snp, nodes, edges) = r | ||||
) | |||||
# Add all objects to storage | # Add all objects to storage | ||||
swh_storage.content_add([cnt1, cnt2]) | swh_storage.content_add([cnt1, cnt2, cnt3]) | ||||
swh_storage.directory_add([dir1, dir2]) | swh_storage.directory_add([dir1, dir2, dir3, dir4]) | ||||
swh_storage.revision_add([rev1, rev2]) | swh_storage.revision_add([rev1, rev2]) | ||||
swh_storage.release_add([rel2]) | swh_storage.release_add([rel2]) | ||||
swh_storage.snapshot_add([snp]) | swh_storage.snapshot_add([snp]) | ||||
# Add spy on swh_storage, to make sure revision_log is not called | # Add spy on swh_storage, to make sure revision_log is not called | ||||
# (the graph must be used instead) | # (the graph must be used instead) | ||||
swh_storage = unittest.mock.MagicMock(wraps=swh_storage) | swh_storage = unittest.mock.MagicMock(wraps=swh_storage) | ||||
# Add all objects to graph | # Add all objects to graph | ||||
swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges)) | swh_graph = unittest.mock.Mock(wraps=GraphClient(nodes=nodes, edges=edges)) | ||||
# Cook | # Cook | ||||
backend = InMemoryVaultBackend() | backend = InMemoryVaultBackend() | ||||
if snapshot: | if snapshot: | ||||
cooker_name = "snapshot_gitbare" | cooker_name = "snapshot_gitbare" | ||||
cooked_id = snp.id | cooked_id = snp.id | ||||
else: | else: | ||||
cooker_name = "revision_gitbare" | cooker_name = "revision_gitbare" | ||||
cooked_id = rev2.id | cooked_id = rev2.id | ||||
cooker = GitBareCooker( | cooker = GitBareCooker( | ||||
cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, | cooker_name, cooked_id, backend=backend, storage=swh_storage, graph=swh_graph, | ||||
) | ) | ||||
if tree_ref: | |||||
# git-fsck now rejects refs pointing to trees, but some old git repos have them. | |||||
cooker.use_fsck = False | |||||
cooker.cook() | cooker.cook() | ||||
# Get bundle | # Get bundle | ||||
bundle = backend.fetch(cooker_name, cooked_id) | bundle = backend.fetch(cooker_name, cooked_id) | ||||
# Extract bundle and make sure both revisions are in it | # Extract bundle and make sure both revisions are in it | ||||
with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: | with tempfile.TemporaryDirectory("swh-vault-test-bare") as tempdir: | ||||
with tarfile.open(fileobj=io.BytesIO(bundle)) as tf: | with tarfile.open(fileobj=io.BytesIO(bundle)) as tf: | ||||
▲ Show 20 Lines • Show All 42 Lines • Show Last 20 Lines |
implies