Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122940
D1431.id4692.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D1431.id4692.diff
View Options
diff --git a/swh/storage/algos/revisions_walker.py b/swh/storage/algos/revisions_walker.py
--- a/swh/storage/algos/revisions_walker.py
+++ b/swh/storage/algos/revisions_walker.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -63,11 +63,13 @@
self._last_rev = None
self._num_revs = 0
self._max_revs = max_revs
+ self._missing_revs = set()
if state:
self._revs_to_visit = state['revs_to_visit']
self._done = state['done']
self._last_rev = state['last_rev']
self._num_revs = state['num_revs']
+ self._missing_revs = state['missing_revs']
self.storage = storage
self.process_rev(rev_start)
@@ -152,6 +154,28 @@
self._revs[rev['id']] = rev
return self._revs.get(rev_id)
+ def missing_revisions(self):
+ """
+ Return a set of revision identifiers whose associated data were
+ found missing into the archive content while walking on the
+ revisions graph.
+
+ Returns:
+ Set[bytes]: a set of revision identifiers
+ """
+ return self._missing_revs
+
+ def is_history_truncated(self):
+ """
+ Return if the revision history generated so far has been truncated
+ of not. A revision history might end up truncated if some revision
+ data were found missing into the archive content.
+
+ Returns:
+ bool: Whether the history got truncated or not
+ """
+ return len(self.missing_revisions()) > 0
+
def export_state(self):
"""
Export the internal state of that revision walker to a dict.
@@ -164,7 +188,8 @@
'revs_to_visit': self._revs_to_visit,
'done': self._done,
'last_rev': self._last_rev,
- 'num_revs': self._num_revs
+ 'num_revs': self._num_revs,
+ 'missing_revs': self._missing_revs
}
def __next__(self):
@@ -178,6 +203,7 @@
rev = self._get_rev(rev_id)
# revision data is missing, returned history will be truncated
if rev is None:
+ self._missing_revs.add(rev_id)
continue
self.process_parent_revs(rev)
if self.should_return(rev):
@@ -210,6 +236,8 @@
if rev is not None:
commit_time = rev['committer_date']['timestamp']['seconds']
heapq.heappush(self._revs_to_visit, (-commit_time, rev_id))
+ else:
+ self._missing_revs.add(rev_id)
def get_next_rev_id(self):
"""
diff --git a/swh/storage/tests/algos/test_revisions_walker.py b/swh/storage/tests/algos/test_revisions_walker.py
--- a/swh/storage/tests/algos/test_revisions_walker.py
+++ b/swh/storage/tests/algos/test_revisions_walker.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018 The Software Heritage developers
+# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -7,7 +7,7 @@
from unittest.mock import patch
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.storage.algos.revisions_walker import get_revisions_walker
# For those tests, we will walk the following revisions history
@@ -283,21 +283,39 @@
_rev_start = 'b364f53155044e5308a0f73abb3b5f01995a5b7d'
+_rev_missing = '836d498396fb9b5d45c896885f84d8d60a5651dc'
+
class RevisionsWalkerTest(unittest.TestCase):
- @patch('swh.storage.storage.Storage')
def check_revisions_ordering(self, rev_walker_type, expected_result,
- MockStorage):
- storage = MockStorage()
- storage.revision_log.return_value = _revisions_list
+ truncated_history):
+ with patch('swh.storage.storage.Storage') as MockStorage:
+ storage = MockStorage()
+ if not truncated_history:
+ storage.revision_log.return_value = _revisions_list
+ else:
+ revs_lists_truncated = [
+ None if hash_to_hex(rev['id']) == _rev_missing else rev
+ for rev in _revisions_list
+ ]
+
+ storage.revision_log.return_value = revs_lists_truncated
+
+ revs_walker = get_revisions_walker(rev_walker_type, storage,
+ hash_to_bytes(_rev_start))
+
+ self.assertEqual(list(map(hash_to_bytes, expected_result)),
+ [rev['id'] for rev in revs_walker])
- revs_walker = \
- get_revisions_walker(rev_walker_type, storage,
- hash_to_bytes(_rev_start))
+ self.assertEqual(revs_walker.is_history_truncated(),
+ truncated_history)
- self.assertEqual(list(map(hash_to_bytes, expected_result)),
- [rev['id'] for rev in revs_walker])
+ if truncated_history:
+ missing_revs = revs_walker.missing_revisions()
+ self.assertEqual(missing_revs, {hash_to_bytes(_rev_missing)})
+ else:
+ self.assertEqual(revs_walker.missing_revisions(), set())
def test_revisions_walker_committer_date(self):
@@ -313,7 +331,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('committer_date', expected_result)
+ self.check_revisions_ordering('committer_date', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs(self):
@@ -330,7 +349,8 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('dfs', expected_result)
+ self.check_revisions_ordering('dfs', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs_post(self):
@@ -347,7 +367,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('dfs_post', expected_result)
+ self.check_revisions_ordering('dfs_post', expected_result,
+ truncated_history=False)
def test_revisions_walker_bfs(self):
@@ -364,4 +385,19 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('bfs', expected_result)
+ self.check_revisions_ordering('bfs', expected_result,
+ truncated_history=False)
+
+ def test_revisions_walker_truncated_history(self):
+
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ for revs_walker_type in ('committer_date', 'bfs', 'dfs', 'dfs_post'):
+
+ self.check_revisions_ordering(revs_walker_type, expected_result,
+ truncated_history=True)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 12:39 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227464
Attached To
D1431: Revisions walker: Add methods to query produced history state
Event Timeline
Log In to Comment