diff --git a/swh/objstorage/backends/pathslicing.py b/swh/objstorage/backends/pathslicing.py --- a/swh/objstorage/backends/pathslicing.py +++ b/swh/objstorage/backends/pathslicing.py @@ -57,6 +57,13 @@ with gzip.GzipFile(filename=tmp_path, fileobj=tmp_f) as f: yield f + # Make sure the contents of the temporary file are written to disk + tmp_f.flush() + if objstorage.use_fdatasync: + os.fdatasync(tmp) + else: + os.fsync(tmp) + # Then close the temporary file and move it to the right directory. tmp_f.close() os.chmod(tmp_path, FILE_MODE) @@ -126,6 +133,8 @@ if sbounds ] + self.use_fdatasync = hasattr(os, 'fdatasync') + self.check_config(check_write=False) def check_config(self, *, check_write): diff --git a/swh/objstorage/tests/test_objstorage_pathslicing.py b/swh/objstorage/tests/test_objstorage_pathslicing.py --- a/swh/objstorage/tests/test_objstorage_pathslicing.py +++ b/swh/objstorage/tests/test_objstorage_pathslicing.py @@ -6,6 +6,7 @@ import shutil import tempfile import unittest +from unittest.mock import patch, DEFAULT import gzip from swh.model import hashutil @@ -109,3 +110,30 @@ self.assertEqual(n_leaf, 2) # beware, this depends on the hash algo self.assertEqual(len(ids), 1) self.assertEqual(ids, all_ids[-1:]) + + def test_fdatasync_default(self): + content, obj_id = self.hash_content(b'check_fdatasync') + with patch.multiple('os', fsync=DEFAULT, fdatasync=DEFAULT) as patched: + self.storage.add(content, obj_id=obj_id) + if self.storage.use_fdatasync: + patched['fdatasync'].assert_called_once() + patched['fsync'].assert_not_called() + else: + patched['fdatasync'].assert_not_called() + patched['fsync'].assert_called_once() + + def test_fdatasync_forced_on(self): + self.storage.use_fdatasync = True + content, obj_id = self.hash_content(b'check_fdatasync') + with patch.multiple('os', fsync=DEFAULT, fdatasync=DEFAULT) as patched: + self.storage.add(content, obj_id=obj_id) + patched['fdatasync'].assert_called_once() + patched['fsync'].assert_not_called() + + def test_fdatasync_forced_off(self): + self.storage.use_fdatasync = False + content, obj_id = self.hash_content(b'check_fdatasync') + with patch.multiple('os', fsync=DEFAULT, fdatasync=DEFAULT) as patched: + self.storage.add(content, obj_id=obj_id) + patched['fdatasync'].assert_not_called() + patched['fsync'].assert_called_once()