diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py --- a/swh/vault/cookers/revision_gitfast.py +++ b/swh/vault/cookers/revision_gitfast.py @@ -11,6 +11,7 @@ from fastimport.commands import (CommitCommand, ResetCommand, BlobCommand, FileDeleteCommand, FileModifyCommand) +from swh.model import hashutil from swh.model.toposort import toposort from swh.model.from_disk import mode_to_perms from swh.vault.cookers.base import BaseVaultCooker @@ -185,6 +186,16 @@ mode=mode_to_perms(f['perms']).value, dataref=(b':' + self.mark(f['sha1'])), data=None) + # A revision is added or modified if it was not in the tree or + # if its target changed + elif (f['type'] == 'rev' + and (fname not in prev_dir + or f['target'] != prev_dir[fname]['target'])): + yield FileModifyCommand( + path=os.path.join(root, fname), + mode=0o160000, + dataref=hashutil.hash_to_hex(f['target']).encode(), + data=None) # A directory is added or modified if it was not in the tree or # if its target changed. elif f['type'] == 'dir': diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -101,8 +101,8 @@ yield pathlib.Path(td) / hashutil.hash_to_hex(obj_id) @contextlib.contextmanager - def cook_extract_revision_gitfast(self, obj_id): - """Context manager that cooks a revision and extract it.""" + def cook_stream_revision_gitfast(self, obj_id): + """Context manager that cooks a revision and stream its fastexport.""" cooker = RevisionGitfastCooker('revision_gitfast', obj_id) cooker.storage = self.storage cooker.backend = unittest.mock.MagicMock() @@ -111,10 +111,16 @@ cooker.prepare_bundle() cooker.fileobj.seek(0) fastexport_stream = gzip.GzipFile(fileobj=cooker.fileobj) + yield fastexport_stream + + @contextlib.contextmanager + def cook_extract_revision_gitfast(self, obj_id): + """Context manager that cooks a revision and extract it.""" test_repo = TestRepo() - with test_repo as p: + with self.cook_stream_revision_gitfast(obj_id) as stream, \ + test_repo as p: processor = dulwich.fastexport.GitImportProcessor(test_repo.repo) - processor.import_stream(fastexport_stream) + processor.import_stream(stream) yield test_repo, p @@ -209,6 +215,27 @@ self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'wat').stat().st_mode, 0o100644) + def test_directory_revision_data(self): + target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' + d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') + + dir = { + 'id': d, + 'entries': [ + { + 'name': b'submodule', + 'type': 'rev', + 'target': hashutil.hash_to_bytes(target_rev), + 'perms': 0o100644, + } + ], + } + self.storage.directory_add([dir]) + + with self.cook_extract_directory(d) as p: + self.assertTrue((p / 'submodule').is_symlink()) + self.assertEqual(os.readlink(str(p / 'submodule')), target_rev) + class TestRevisionGitfastCooker(BaseTestCookers, unittest.TestCase): def test_revision_simple(self): @@ -420,3 +447,40 @@ with self.cook_extract_revision_gitfast(test_id) as (ert, p): ert.checkout(b'HEAD') self.assertEqual((p / 'file').stat().st_mode, 0o100644) + + def test_revision_revision_data(self): + target_rev = '0e8a3ad980ec179856012b7eecf4327e99cd44cd' + d = hashutil.hash_to_bytes('17a3e48bce37be5226490e750202ad3a9a1a3fe9') + r = hashutil.hash_to_bytes('1ecc9270c4fc61cfddbc65a774e91ef5c425a6f0') + + dir = { + 'id': d, + 'entries': [ + { + 'name': b'submodule', + 'type': 'rev', + 'target': hashutil.hash_to_bytes(target_rev), + 'perms': 0o100644, + } + ], + } + self.storage.directory_add([dir]) + + rev = { + 'id': r, + 'message': None, + 'author': {'name': None, 'email': None, 'fullname': ''}, + 'date': None, + 'committer': {'name': None, 'email': None, 'fullname': ''}, + 'committer_date': None, + 'parents': [], + 'type': 'git', + 'directory': d, + 'metadata': {}, + 'synthetic': True + } + self.storage.revision_add([rev]) + + with self.cook_stream_revision_gitfast(r) as stream: + pattern = 'M 160000 {} submodule'.format(target_rev).encode() + self.assertIn(pattern, stream.read()) diff --git a/swh/vault/to_disk.py b/swh/vault/to_disk.py --- a/swh/vault/to_disk.py +++ b/swh/vault/to_disk.py @@ -6,6 +6,7 @@ import itertools import os +from swh.model import hashutil from swh.model.from_disk import mode_to_perms, DentryPerms SKIPPED_MESSAGE = (b'This content has not been retrieved in the ' @@ -28,7 +29,6 @@ (either due to privacy policy or because its size was too big for us to archive it). """ - assert file_data['type'] == 'file' if file_data['status'] == 'absent': @@ -61,10 +61,9 @@ data = self.storage.directory_ls(self.dir_id, recursive=True) # Split into files and directory data. - # TODO(seirl): also handle revision data. data1, data2 = itertools.tee(data, 2) dir_data = (entry['name'] for entry in data1 if entry['type'] == 'dir') - file_data = (entry for entry in data2 if entry['type'] == 'file') + file_data = (entry for entry in data2 if entry['type'] != 'dir') # Recreate the directory's subtree and then the files into it. self._create_tree(dir_data) @@ -88,11 +87,15 @@ def _create_files(self, file_datas): """Create the files according to their status.""" - # Then create the files for file_data in file_datas: path = os.path.join(self.root, file_data['name']) - content = get_filtered_file_content(self.storage, file_data) - self._create_file(path, content, file_data['perms']) + if file_data['type'] == 'file': + content = get_filtered_file_content(self.storage, file_data) + self._create_file(path, content, file_data['perms']) + elif file_data['type'] == 'rev': + self._create_file(path, + hashutil.hash_to_hex(file_data['target']), + 0o120000) def _create_file(self, path, content, mode=0o100644): """Create the given file and fill it with content."""