Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/tests/test_cookers.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import contextlib | import contextlib | ||||
import datetime | import datetime | ||||
import gzip | import gzip | ||||
import io | import io | ||||
import os | import os | ||||
import pathlib | import pathlib | ||||
import pytest | |||||
import subprocess | import subprocess | ||||
import tarfile | import tarfile | ||||
import tempfile | import tempfile | ||||
import unittest | import unittest | ||||
import unittest.mock | import unittest.mock | ||||
import dulwich.fastexport | import dulwich.fastexport | ||||
import dulwich.index | import dulwich.index | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | class TestRepo: | ||||
def print_debug_graph(self, reflog=False): | def print_debug_graph(self, reflog=False): | ||||
args = ['log', '--all', '--graph', '--decorate'] | args = ['log', '--all', '--graph', '--decorate'] | ||||
if reflog: | if reflog: | ||||
args.append('--reflog') | args.append('--reflog') | ||||
self.git_shell(*args, stdout=None) | self.git_shell(*args, stdout=None) | ||||
@pytest.fixture | def git_loader(storage, repo_path, visit_date=datetime.datetime.now()): | ||||
def swh_git_loader(swh_vault): | """Instantiate a Git Loader using the storage instance as storage. | ||||
loader = GitLoaderFromDisk() | |||||
loader.storage = swh_vault.storage | |||||
return loader | |||||
def load(loader, repo_path): | """ | ||||
"""Load a repository in the test storage""" | loader = GitLoaderFromDisk( | ||||
loader.load('fake_origin', repo_path, datetime.datetime.now()) | 'fake_origin', directory=repo_path, visit_date=visit_date) | ||||
loader.storage = storage | |||||
return loader | |||||
@contextlib.contextmanager | @contextlib.contextmanager | ||||
def cook_extract_directory(storage, obj_id): | def cook_extract_directory(storage, obj_id): | ||||
"""Context manager that cooks a directory and extract it.""" | """Context manager that cooks a directory and extract it.""" | ||||
backend = unittest.mock.MagicMock() | backend = unittest.mock.MagicMock() | ||||
backend.storage = storage | backend.storage = storage | ||||
cooker = DirectoryCooker( | cooker = DirectoryCooker( | ||||
Show All 38 Lines | |||||
TEST_CONTENT = (" test content\n" | TEST_CONTENT = (" test content\n" | ||||
"and unicode \N{BLACK HEART SUIT}\n" | "and unicode \N{BLACK HEART SUIT}\n" | ||||
" and trailing spaces ") | " and trailing spaces ") | ||||
TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' | TEST_EXECUTABLE = b'\x42\x40\x00\x00\x05' | ||||
class TestDirectoryCooker: | class TestDirectoryCooker: | ||||
def test_directory_simple(self, swh_git_loader): | def test_directory_simple(self, swh_storage): | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o755) | (rp / 'executable').chmod(0o755) | ||||
(rp / 'link').symlink_to('file') | (rp / 'link').symlink_to('file') | ||||
(rp / 'dir1/dir2').mkdir(parents=True) | (rp / 'dir1/dir2').mkdir(parents=True) | ||||
(rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | ||||
c = repo.commit() | c = repo.commit() | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
anlambert: Maybe you can write
```lang=python
git_loader(swh_storage, str(rp)).load()
```
to gain a… | |||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | with cook_extract_directory(swh_storage, obj_id) as p: | ||||
assert (p / 'file').stat().st_mode == 0o100644 | assert (p / 'file').stat().st_mode == 0o100644 | ||||
assert (p / 'file').read_text() == TEST_CONTENT | assert (p / 'file').read_text() == TEST_CONTENT | ||||
assert (p / 'executable').stat().st_mode == 0o100755 | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
assert (p / 'executable').read_bytes() == TEST_EXECUTABLE | assert (p / 'executable').read_bytes() == TEST_EXECUTABLE | ||||
assert (p / 'link').is_symlink | assert (p / 'link').is_symlink | ||||
assert os.readlink(str(p / 'link')) == 'file' | assert os.readlink(str(p / 'link')) == 'file' | ||||
assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | ||||
assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | ||||
directory = Directory.from_disk(path=bytes(p)) | directory = Directory.from_disk(path=bytes(p)) | ||||
assert obj_id_hex == hashutil.hash_to_hex(directory.hash) | assert obj_id_hex == hashutil.hash_to_hex(directory.hash) | ||||
def test_directory_filtered_objects(self, swh_git_loader): | def test_directory_filtered_objects(self, swh_storage): | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
file_1, id_1 = hash_content(b'test1') | file_1, id_1 = hash_content(b'test1') | ||||
file_2, id_2 = hash_content(b'test2') | file_2, id_2 = hash_content(b'test2') | ||||
file_3, id_3 = hash_content(b'test3') | file_3, id_3 = hash_content(b'test3') | ||||
(rp / 'file').write_bytes(file_1) | (rp / 'file').write_bytes(file_1) | ||||
(rp / 'hidden_file').write_bytes(file_2) | (rp / 'hidden_file').write_bytes(file_2) | ||||
(rp / 'absent_file').write_bytes(file_3) | (rp / 'absent_file').write_bytes(file_3) | ||||
c = repo.commit() | c = repo.commit() | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
# FIXME: storage.content_update() should be changed to allow things | # FIXME: storage.content_update() should be changed to allow things | ||||
# like that | # like that | ||||
with swh_git_loader.storage.get_db().transaction() as cur: | with swh_storage.get_db().transaction() as cur: | ||||
cur.execute("""update content set status = 'visible' | cur.execute("""update content set status = 'visible' | ||||
where sha1 = %s""", (id_1,)) | where sha1 = %s""", (id_1,)) | ||||
cur.execute("""update content set status = 'hidden' | cur.execute("""update content set status = 'hidden' | ||||
where sha1 = %s""", (id_2,)) | where sha1 = %s""", (id_2,)) | ||||
cur.execute("""update content set status = 'absent' | cur.execute("""update content set status = 'absent' | ||||
where sha1 = %s""", (id_3,)) | where sha1 = %s""", (id_3,)) | ||||
with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | with cook_extract_directory(swh_storage, obj_id) as p: | ||||
assert (p / 'file').read_bytes() == b'test1' | assert (p / 'file').read_bytes() == b'test1' | ||||
assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | ||||
assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | ||||
def test_directory_bogus_perms(self, swh_git_loader): | def test_directory_bogus_perms(self, swh_storage): | ||||
# Some early git repositories have 664/775 permissions... let's check | # Some early git repositories have 664/775 permissions... let's check | ||||
# if all the weird modes are properly normalized in the directory | # if all the weird modes are properly normalized in the directory | ||||
# cooker. | # cooker. | ||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'file').chmod(0o664) | (rp / 'file').chmod(0o664) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o775) | (rp / 'executable').chmod(0o775) | ||||
(rp / 'wat').write_text(TEST_CONTENT) | (rp / 'wat').write_text(TEST_CONTENT) | ||||
(rp / 'wat').chmod(0o604) | (rp / 'wat').chmod(0o604) | ||||
c = repo.commit() | c = repo.commit() | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
obj_id_hex = repo.repo[c].tree.decode() | obj_id_hex = repo.repo[c].tree.decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with cook_extract_directory(swh_git_loader.storage, obj_id) as p: | with cook_extract_directory(swh_storage, obj_id) as p: | ||||
assert (p / 'file').stat().st_mode == 0o100644 | assert (p / 'file').stat().st_mode == 0o100644 | ||||
assert (p / 'executable').stat().st_mode == 0o100755 | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
assert (p / 'wat').stat().st_mode == 0o100644 | assert (p / 'wat').stat().st_mode == 0o100644 | ||||
def test_directory_revision_data(self, swh_git_loader): | def test_directory_revision_data(self, swh_storage): | ||||
target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | ||||
d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | ||||
dir = { | dir = { | ||||
'id': d, | 'id': d, | ||||
'entries': [ | 'entries': [ | ||||
{ | { | ||||
'name': b'submodule', | 'name': b'submodule', | ||||
'type': 'rev', | 'type': 'rev', | ||||
'target': hashutil.hash_to_bytes(target_rev), | 'target': hashutil.hash_to_bytes(target_rev), | ||||
'perms': 0o100644, | 'perms': 0o100644, | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
swh_git_loader.storage.directory_add([dir]) | swh_storage.directory_add([dir]) | ||||
with cook_extract_directory(swh_git_loader.storage, d) as p: | with cook_extract_directory(swh_storage, d) as p: | ||||
assert (p / 'submodule').is_symlink() | assert (p / 'submodule').is_symlink() | ||||
assert os.readlink(str(p / 'submodule')) == target_rev | assert os.readlink(str(p / 'submodule')) == target_rev | ||||
class TestRevisionGitfastCooker: | class TestRevisionGitfastCooker: | ||||
def test_revision_simple(self, swh_git_loader): | def test_revision_simple(self, swh_storage): | ||||
# | # | ||||
# 1--2--3--4--5--6--7 | # 1--2--3--4--5--6--7 | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
repo.commit('add file1') | repo.commit('add file1') | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('add file2') | repo.commit('add file2') | ||||
(rp / 'dir1/dir2').mkdir(parents=True) | (rp / 'dir1/dir2').mkdir(parents=True) | ||||
(rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | (rp / 'dir1/dir2/file').write_text(TEST_CONTENT) | ||||
repo.commit('add dir1/dir2/file') | repo.commit('add dir1/dir2/file') | ||||
(rp / 'bin1').write_bytes(TEST_EXECUTABLE) | (rp / 'bin1').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'bin1').chmod(0o755) | (rp / 'bin1').chmod(0o755) | ||||
repo.commit('add bin1') | repo.commit('add bin1') | ||||
(rp / 'link1').symlink_to('file1') | (rp / 'link1').symlink_to('file1') | ||||
repo.commit('link link1 to file1') | repo.commit('link link1 to file1') | ||||
(rp / 'file2').unlink() | (rp / 'file2').unlink() | ||||
repo.commit('remove file2') | repo.commit('remove file2') | ||||
(rp / 'bin1').rename(rp / 'bin') | (rp / 'bin1').rename(rp / 'bin') | ||||
repo.commit('rename bin1 to bin') | repo.commit('rename bin1 to bin') | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
assert (p / 'file1').stat().st_mode == 0o100644 | assert (p / 'file1').stat().st_mode == 0o100644 | ||||
assert (p / 'file1').read_text() == TEST_CONTENT | assert (p / 'file1').read_text() == TEST_CONTENT | ||||
assert (p / 'link1').is_symlink | assert (p / 'link1').is_symlink | ||||
assert os.readlink(str(p / 'link1')) == 'file1' | assert os.readlink(str(p / 'link1')) == 'file1' | ||||
assert (p / 'bin').stat().st_mode == 0o100755 | assert (p / 'bin').stat().st_mode == 0o100755 | ||||
assert (p / 'bin').read_bytes() == TEST_EXECUTABLE | assert (p / 'bin').read_bytes() == TEST_EXECUTABLE | ||||
assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | assert (p / 'dir1/dir2/file').read_text() == TEST_CONTENT | ||||
assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | assert (p / 'dir1/dir2/file').stat().st_mode == 0o100644 | ||||
assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_two_roots(self, swh_git_loader): | def test_revision_two_roots(self, swh_storage): | ||||
# | # | ||||
# 1----3---4 | # 1----3---4 | ||||
# / | # / | ||||
# 2---- | # 2---- | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Add file1') | c1 = repo.commit('Add file1') | ||||
del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD | del repo.repo.refs[b'refs/heads/master'] # git update-ref -d HEAD | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('Add file2') | repo.commit('Add file2') | ||||
repo.merge([c1]) | repo.merge([c1]) | ||||
(rp / 'file3').write_text(TEST_CONTENT) | (rp / 'file3').write_text(TEST_CONTENT) | ||||
repo.commit('add file3') | repo.commit('add file3') | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_two_double_fork_merge(self, swh_git_loader): | def test_revision_two_double_fork_merge(self, swh_storage): | ||||
# | # | ||||
# 2---4---6 | # 2---4---6 | ||||
# / / / | # / / / | ||||
# 1---3---5 | # 1---3---5 | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Add file1') | c1 = repo.commit('Add file1') | ||||
repo.repo.refs[b'refs/heads/c1'] = c1 | repo.repo.refs[b'refs/heads/c1'] = c1 | ||||
(rp / 'file2').write_text(TEST_CONTENT) | (rp / 'file2').write_text(TEST_CONTENT) | ||||
repo.commit('Add file2') | repo.commit('Add file2') | ||||
(rp / 'file3').write_text(TEST_CONTENT) | (rp / 'file3').write_text(TEST_CONTENT) | ||||
c3 = repo.commit('Add file3', ref=b'refs/heads/c1') | c3 = repo.commit('Add file3', ref=b'refs/heads/c1') | ||||
repo.repo.refs[b'refs/heads/c3'] = c3 | repo.repo.refs[b'refs/heads/c3'] = c3 | ||||
repo.merge([c3]) | repo.merge([c3]) | ||||
(rp / 'file5').write_text(TEST_CONTENT) | (rp / 'file5').write_text(TEST_CONTENT) | ||||
c5 = repo.commit('Add file3', ref=b'refs/heads/c3') | c5 = repo.commit('Add file3', ref=b'refs/heads/c3') | ||||
repo.merge([c5]) | repo.merge([c5]) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_triple_merge(self, swh_git_loader): | def test_revision_triple_merge(self, swh_storage): | ||||
# | # | ||||
# .---.---5 | # .---.---5 | ||||
# / / / | # / / / | ||||
# 2 3 4 | # 2 3 4 | ||||
# / / / | # / / / | ||||
# 1---.---. | # 1---.---. | ||||
# | # | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file1').write_text(TEST_CONTENT) | (rp / 'file1').write_text(TEST_CONTENT) | ||||
c1 = repo.commit('Commit 1') | c1 = repo.commit('Commit 1') | ||||
repo.repo.refs[b'refs/heads/b1'] = c1 | repo.repo.refs[b'refs/heads/b1'] = c1 | ||||
repo.repo.refs[b'refs/heads/b2'] = c1 | repo.repo.refs[b'refs/heads/b2'] = c1 | ||||
repo.commit('Commit 2') | repo.commit('Commit 2') | ||||
c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') | c3 = repo.commit('Commit 3', ref=b'refs/heads/b1') | ||||
c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') | c4 = repo.commit('Commit 4', ref=b'refs/heads/b2') | ||||
repo.merge([c3, c4]) | repo.merge([c3, c4]) | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | assert ert.repo.refs[b'HEAD'].decode() == obj_id_hex | ||||
def test_revision_filtered_objects(self, swh_git_loader): | def test_revision_filtered_objects(self, swh_storage): | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
file_1, id_1 = hash_content(b'test1') | file_1, id_1 = hash_content(b'test1') | ||||
file_2, id_2 = hash_content(b'test2') | file_2, id_2 = hash_content(b'test2') | ||||
file_3, id_3 = hash_content(b'test3') | file_3, id_3 = hash_content(b'test3') | ||||
(rp / 'file').write_bytes(file_1) | (rp / 'file').write_bytes(file_1) | ||||
(rp / 'hidden_file').write_bytes(file_2) | (rp / 'hidden_file').write_bytes(file_2) | ||||
(rp / 'absent_file').write_bytes(file_3) | (rp / 'absent_file').write_bytes(file_3) | ||||
repo.commit() | repo.commit() | ||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
# FIXME: storage.content_update() should be changed to allow things | # FIXME: storage.content_update() should be changed to allow things | ||||
# like that | # like that | ||||
with storage.get_db().transaction() as cur: | with swh_storage.get_db().transaction() as cur: | ||||
cur.execute("""update content set status = 'visible' | cur.execute("""update content set status = 'visible' | ||||
where sha1 = %s""", (id_1,)) | where sha1 = %s""", (id_1,)) | ||||
cur.execute("""update content set status = 'hidden' | cur.execute("""update content set status = 'hidden' | ||||
where sha1 = %s""", (id_2,)) | where sha1 = %s""", (id_2,)) | ||||
cur.execute("""update content set status = 'absent' | cur.execute("""update content set status = 'absent' | ||||
where sha1 = %s""", (id_3,)) | where sha1 = %s""", (id_3,)) | ||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
assert (p / 'file').read_bytes() == b'test1' | assert (p / 'file').read_bytes() == b'test1' | ||||
assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | assert (p / 'hidden_file').read_bytes() == HIDDEN_MESSAGE | ||||
assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | assert (p / 'absent_file').read_bytes() == SKIPPED_MESSAGE | ||||
def test_revision_bogus_perms(self, swh_git_loader): | def test_revision_bogus_perms(self, swh_storage): | ||||
# Some early git repositories have 664/775 permissions... let's check | # Some early git repositories have 664/775 permissions... let's check | ||||
# if all the weird modes are properly normalized in the revision | # if all the weird modes are properly normalized in the revision | ||||
# cooker. | # cooker. | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
(rp / 'file').chmod(0o664) | (rp / 'file').chmod(0o664) | ||||
(rp / 'executable').write_bytes(TEST_EXECUTABLE) | (rp / 'executable').write_bytes(TEST_EXECUTABLE) | ||||
(rp / 'executable').chmod(0o775) | (rp / 'executable').chmod(0o775) | ||||
(rp / 'wat').write_text(TEST_CONTENT) | (rp / 'wat').write_text(TEST_CONTENT) | ||||
(rp / 'wat').chmod(0o604) | (rp / 'wat').chmod(0o604) | ||||
repo.commit('initial commit') | repo.commit('initial commit') | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
obj_id_hex = repo.repo.refs[b'HEAD'].decode() | obj_id_hex = repo.repo.refs[b'HEAD'].decode() | ||||
obj_id = hashutil.hash_to_bytes(obj_id_hex) | obj_id = hashutil.hash_to_bytes(obj_id_hex) | ||||
with cook_extract_revision_gitfast(storage, obj_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, obj_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
assert (p / 'file').stat().st_mode == 0o100644 | assert (p / 'file').stat().st_mode == 0o100644 | ||||
assert (p / 'executable').stat().st_mode == 0o100755 | assert (p / 'executable').stat().st_mode == 0o100755 | ||||
assert (p / 'wat').stat().st_mode == 0o100644 | assert (p / 'wat').stat().st_mode == 0o100644 | ||||
def test_revision_null_fields(self, swh_git_loader): | def test_revision_null_fields(self, swh_storage): | ||||
# Our schema doesn't enforce a lot of non-null revision fields. We need | # Our schema doesn't enforce a lot of non-null revision fields. We need | ||||
# to check these cases don't break the cooker. | # to check these cases don't break the cooker. | ||||
storage = swh_git_loader.storage | |||||
repo = TestRepo() | repo = TestRepo() | ||||
with repo as rp: | with repo as rp: | ||||
(rp / 'file').write_text(TEST_CONTENT) | (rp / 'file').write_text(TEST_CONTENT) | ||||
c = repo.commit('initial commit') | c = repo.commit('initial commit') | ||||
load(swh_git_loader, str(rp)) | loader = git_loader(swh_storage, str(rp)) | ||||
loader.load() | |||||
repo.repo.refs[b'HEAD'].decode() | repo.repo.refs[b'HEAD'].decode() | ||||
dir_id_hex = repo.repo[c].tree.decode() | dir_id_hex = repo.repo[c].tree.decode() | ||||
dir_id = hashutil.hash_to_bytes(dir_id_hex) | dir_id = hashutil.hash_to_bytes(dir_id_hex) | ||||
test_id = b'56789012345678901234' | test_id = b'56789012345678901234' | ||||
test_revision = { | test_revision = { | ||||
'id': test_id, | 'id': test_id, | ||||
'message': None, | 'message': None, | ||||
'author': {'name': None, 'email': None, 'fullname': ''}, | 'author': {'name': None, 'email': None, 'fullname': ''}, | ||||
'date': None, | 'date': None, | ||||
'committer': {'name': None, 'email': None, 'fullname': ''}, | 'committer': {'name': None, 'email': None, 'fullname': ''}, | ||||
'committer_date': None, | 'committer_date': None, | ||||
'parents': [], | 'parents': [], | ||||
'type': 'git', | 'type': 'git', | ||||
'directory': dir_id, | 'directory': dir_id, | ||||
'metadata': {}, | 'metadata': {}, | ||||
'synthetic': True | 'synthetic': True | ||||
} | } | ||||
storage.revision_add([test_revision]) | swh_storage.revision_add([test_revision]) | ||||
with cook_extract_revision_gitfast(storage, test_id) as (ert, p): | with cook_extract_revision_gitfast(swh_storage, test_id) as (ert, p): | ||||
ert.checkout(b'HEAD') | ert.checkout(b'HEAD') | ||||
assert (p / 'file').stat().st_mode == 0o100644 | assert (p / 'file').stat().st_mode == 0o100644 | ||||
def test_revision_revision_data(self, swh_git_loader): | def test_revision_revision_data(self, swh_storage): | ||||
storage = swh_git_loader.storage | |||||
target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' | ||||
d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') | ||||
r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') | r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') | ||||
dir = { | dir = { | ||||
'id': d, | 'id': d, | ||||
'entries': [ | 'entries': [ | ||||
{ | { | ||||
'name': b'submodule', | 'name': b'submodule', | ||||
'type': 'rev', | 'type': 'rev', | ||||
'target': hashutil.hash_to_bytes(target_rev), | 'target': hashutil.hash_to_bytes(target_rev), | ||||
'perms': 0o100644, | 'perms': 0o100644, | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
storage.directory_add([dir]) | swh_storage.directory_add([dir]) | ||||
rev = { | rev = { | ||||
'id': r, | 'id': r, | ||||
'message': None, | 'message': None, | ||||
'author': {'name': None, 'email': None, 'fullname': ''}, | 'author': {'name': None, 'email': None, 'fullname': ''}, | ||||
'date': None, | 'date': None, | ||||
'committer': {'name': None, 'email': None, 'fullname': ''}, | 'committer': {'name': None, 'email': None, 'fullname': ''}, | ||||
'committer_date': None, | 'committer_date': None, | ||||
'parents': [], | 'parents': [], | ||||
'type': 'git', | 'type': 'git', | ||||
'directory': d, | 'directory': d, | ||||
'metadata': {}, | 'metadata': {}, | ||||
'synthetic': True | 'synthetic': True | ||||
} | } | ||||
storage.revision_add([rev]) | swh_storage.revision_add([rev]) | ||||
with cook_stream_revision_gitfast(storage, r) as stream: | with cook_stream_revision_gitfast(swh_storage, r) as stream: | ||||
pattern = 'M 160000 {} submodule'.format(target_rev).encode() | pattern = 'M 160000 {} submodule'.format(target_rev).encode() | ||||
assert pattern in stream.read() | assert pattern in stream.read() |
Maybe you can write
to gain a couple of lines, but that's pure nitpicking.