Page MenuHomeSoftware Heritage

D1431.id4692.diff
No OneTemporary

D1431.id4692.diff

diff --git a/swh/storage/algos/revisions_walker.py b/swh/storage/algos/revisions_walker.py
--- a/swh/storage/algos/revisions_walker.py
+++ b/swh/storage/algos/revisions_walker.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -63,11 +63,13 @@
self._last_rev = None
self._num_revs = 0
self._max_revs = max_revs
+ self._missing_revs = set()
if state:
self._revs_to_visit = state['revs_to_visit']
self._done = state['done']
self._last_rev = state['last_rev']
self._num_revs = state['num_revs']
+ self._missing_revs = state['missing_revs']
self.storage = storage
self.process_rev(rev_start)
@@ -152,6 +154,28 @@
self._revs[rev['id']] = rev
return self._revs.get(rev_id)
+ def missing_revisions(self):
+ """
+ Return a set of revision identifiers whose associated data were
+ found missing into the archive content while walking on the
+ revisions graph.
+
+ Returns:
+ Set[bytes]: a set of revision identifiers
+ """
+ return self._missing_revs
+
+ def is_history_truncated(self):
+ """
+ Return if the revision history generated so far has been truncated
+ of not. A revision history might end up truncated if some revision
+ data were found missing into the archive content.
+
+ Returns:
+ bool: Whether the history got truncated or not
+ """
+ return len(self.missing_revisions()) > 0
+
def export_state(self):
"""
Export the internal state of that revision walker to a dict.
@@ -164,7 +188,8 @@
'revs_to_visit': self._revs_to_visit,
'done': self._done,
'last_rev': self._last_rev,
- 'num_revs': self._num_revs
+ 'num_revs': self._num_revs,
+ 'missing_revs': self._missing_revs
}
def __next__(self):
@@ -178,6 +203,7 @@
rev = self._get_rev(rev_id)
# revision data is missing, returned history will be truncated
if rev is None:
+ self._missing_revs.add(rev_id)
continue
self.process_parent_revs(rev)
if self.should_return(rev):
@@ -210,6 +236,8 @@
if rev is not None:
commit_time = rev['committer_date']['timestamp']['seconds']
heapq.heappush(self._revs_to_visit, (-commit_time, rev_id))
+ else:
+ self._missing_revs.add(rev_id)
def get_next_rev_id(self):
"""
diff --git a/swh/storage/tests/algos/test_revisions_walker.py b/swh/storage/tests/algos/test_revisions_walker.py
--- a/swh/storage/tests/algos/test_revisions_walker.py
+++ b/swh/storage/tests/algos/test_revisions_walker.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -7,7 +7,7 @@
from unittest.mock import patch
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.storage.algos.revisions_walker import get_revisions_walker
# For those tests, we will walk the following revisions history
@@ -283,21 +283,39 @@
_rev_start = 'b364f53155044e5308a0f73abb3b5f01995a5b7d'
+_rev_missing = '836d498396fb9b5d45c896885f84d8d60a5651dc'
+
class RevisionsWalkerTest(unittest.TestCase):
- @patch('swh.storage.storage.Storage')
def check_revisions_ordering(self, rev_walker_type, expected_result,
- MockStorage):
- storage = MockStorage()
- storage.revision_log.return_value = _revisions_list
+ truncated_history):
+ with patch('swh.storage.storage.Storage') as MockStorage:
+ storage = MockStorage()
+ if not truncated_history:
+ storage.revision_log.return_value = _revisions_list
+ else:
+ revs_lists_truncated = [
+ None if hash_to_hex(rev['id']) == _rev_missing else rev
+ for rev in _revisions_list
+ ]
+
+ storage.revision_log.return_value = revs_lists_truncated
+
+ revs_walker = get_revisions_walker(rev_walker_type, storage,
+ hash_to_bytes(_rev_start))
+
+ self.assertEqual(list(map(hash_to_bytes, expected_result)),
+ [rev['id'] for rev in revs_walker])
- revs_walker = \
- get_revisions_walker(rev_walker_type, storage,
- hash_to_bytes(_rev_start))
+ self.assertEqual(revs_walker.is_history_truncated(),
+ truncated_history)
- self.assertEqual(list(map(hash_to_bytes, expected_result)),
- [rev['id'] for rev in revs_walker])
+ if truncated_history:
+ missing_revs = revs_walker.missing_revisions()
+ self.assertEqual(missing_revs, {hash_to_bytes(_rev_missing)})
+ else:
+ self.assertEqual(revs_walker.missing_revisions(), set())
def test_revisions_walker_committer_date(self):
@@ -313,7 +331,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('committer_date', expected_result)
+ self.check_revisions_ordering('committer_date', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs(self):
@@ -330,7 +349,8 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('dfs', expected_result)
+ self.check_revisions_ordering('dfs', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs_post(self):
@@ -347,7 +367,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('dfs_post', expected_result)
+ self.check_revisions_ordering('dfs_post', expected_result,
+ truncated_history=False)
def test_revisions_walker_bfs(self):
@@ -364,4 +385,19 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('bfs', expected_result)
+ self.check_revisions_ordering('bfs', expected_result,
+ truncated_history=False)
+
+ def test_revisions_walker_truncated_history(self):
+
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ for revs_walker_type in ('committer_date', 'bfs', 'dfs', 'dfs_post'):
+
+ self.check_revisions_ordering(revs_walker_type, expected_result,
+ truncated_history=True)

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 12:39 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227464

Event Timeline