diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py --- a/swh/vault/cookers/revision_gitfast.py +++ b/swh/vault/cookers/revision_gitfast.py @@ -108,6 +108,21 @@ ) self.obj_done.add(obj_id) + def _author_tuple_format(self, author, date): + # We never want to have None values here so we replace null entries + # by ''. + if author is not None: + author_tuple = (author.get('name') or b'', + author.get('email') or b'') + else: + author_tuple = (b'', b'') + if date is not None: + date_tuple = (date.get('timestamp', {}).get('seconds') or 0, + (date.get('offset') or 0) * 60) + else: + date_tuple = (0, 0) + return author_tuple + date_tuple + def _compute_commit_command(self, rev): """Compute a commit command from a specific revision. """ @@ -128,20 +143,15 @@ files = yield from self._compute_file_commands(rev, parent) # Construct and yield the commit command - author = (rev['author']['name'], - rev['author']['email'], - rev['date']['timestamp']['seconds'], - rev['date']['offset'] * 60) - committer = (rev['committer']['name'], - rev['committer']['email'], - rev['committer_date']['timestamp']['seconds'], - rev['committer_date']['offset'] * 60) + author = self._author_tuple_format(rev['author'], rev['date']) + committer = self._author_tuple_format(rev['committer'], + rev['committer_date']) yield fastimport.commands.CommitCommand( ref=b'refs/heads/master', mark=self.mark(rev['id']), author=author, committer=committer, - message=rev['message'], + message=rev['message'] or b'', from_=from_, merges=merges, file_iter=files, diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -385,3 +385,37 @@ self.assertEqual((p / 'file').stat().st_mode, 0o100644) self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'wat').stat().st_mode, 0o100644) + + def test_revision_null_fields(self): + # Our schema doesn't enforce a lot of non-null revision fields. We need + # to check these cases don't break the cooker. + repo = TestRepo() + with repo as rp: + (rp / 'file').write_text(TEST_CONTENT) + c = repo.commit('initial commit') + self.load(str(rp)) + repo.repo.refs[b'HEAD'].decode() + dir_id_hex = repo.repo[c].tree.decode() + dir_id = hashutil.hash_to_bytes(dir_id_hex) + + test_id = b'56789012345678901234' + test_revision = { + 'id': test_id, + 'message': None, + 'author': {'name': None, 'email': None, 'fullname': ''}, + 'date': {'timestamp': 0, 'offset': 0, 'negative_utc': None}, + 'committer': {'name': None, 'email': None, 'fullname': ''}, + 'committer_date': {'timestamp': 0, 'offset': 0, + 'negative_utc': None}, + 'parents': [], + 'type': 'git', + 'directory': dir_id, + 'metadata': {}, + 'synthetic': True + } + + self.storage.revision_add([test_revision]) + + with self.cook_extract_revision_gitfast(test_id) as (ert, p): + ert.checkout(b'HEAD') + self.assertEqual((p / 'file').stat().st_mode, 0o100644)