diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py --- a/swh/vault/cookers/revision_gitfast.py +++ b/swh/vault/cookers/revision_gitfast.py @@ -108,6 +108,21 @@ ) self.obj_done.add(obj_id) + def _author_tuple_format(self, author, date): + # We never want to have None values here so we replace null entries + # by ''. + if author is not None: + author_tuple = (author.get('name') or b'', + author.get('email') or b'') + else: + author_tuple = (b'', b'') + if date is not None: + date_tuple = (date.get('timestamp', {}).get('seconds') or 0, + (date.get('offset') or 0) * 60) + else: + date_tuple = (0, 0) + return author_tuple + date_tuple + def _compute_commit_command(self, rev): """Compute a commit command from a specific revision. """ @@ -128,20 +143,15 @@ files = yield from self._compute_file_commands(rev, parent) # Construct and yield the commit command - author = (rev['author']['name'], - rev['author']['email'], - rev['date']['timestamp']['seconds'], - rev['date']['offset'] * 60) - committer = (rev['committer']['name'], - rev['committer']['email'], - rev['committer_date']['timestamp']['seconds'], - rev['committer_date']['offset'] * 60) + author = self._author_tuple_format(rev['author'], rev['date']) + committer = self._author_tuple_format(rev['committer'], + rev['committer_date']) yield fastimport.commands.CommitCommand( ref=b'refs/heads/master', mark=self.mark(rev['id']), author=author, committer=committer, - message=rev['message'], + message=rev['message'] or b'', from_=from_, merges=merges, file_iter=files, diff --git a/swh/vault/tests/test_cookers.py b/swh/vault/tests/test_cookers.py --- a/swh/vault/tests/test_cookers.py +++ b/swh/vault/tests/test_cookers.py @@ -385,3 +385,31 @@ self.assertEqual((p / 'file').stat().st_mode, 0o100644) self.assertEqual((p / 'executable').stat().st_mode, 0o100755) self.assertEqual((p / 'wat').stat().st_mode, 0o100644) + + def test_revision_null_fields(self): + # Our schema doesn't enforce a lot of non-null revision fields. We need + # to check these cases don't break the cooker. + repo = TestRepo() + with repo as rp: + (rp / 'file').write_text(TEST_CONTENT) + repo.commit('initial commit') + self.load(str(rp)) + obj_id_hex = repo.repo.refs[b'HEAD'].decode() + obj_id = hashutil.hash_to_bytes(obj_id_hex) + + cur = self.storage.db._cursor(None) + cur.execute("""UPDATE revision SET + date = null, + date_offset = null, + committer_date = null, + committer_date_offset = null, + message = null, + author = null, + committer = null, + metadata = null + WHERE id = %s""", (obj_id,)) + cur.close() + + with self.cook_extract_revision_gitfast(obj_id) as (ert, p): + ert.checkout(b'HEAD') + self.assertEqual((p / 'file').stat().st_mode, 0o100644)