Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7147844
D1431.id4654.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
D1431.id4654.diff
View Options
diff --git a/swh/storage/algos/revisions_walker.py b/swh/storage/algos/revisions_walker.py
--- a/swh/storage/algos/revisions_walker.py
+++ b/swh/storage/algos/revisions_walker.py
@@ -63,11 +63,13 @@
self._last_rev = None
self._num_revs = 0
self._max_revs = max_revs
+ self._missing_revs = set()
if state:
self._revs_to_visit = state['revs_to_visit']
self._done = state['done']
self._last_rev = state['last_rev']
self._num_revs = state['num_revs']
+ self._missing_revs = state['missing_revs']
self.storage = storage
self.process_rev(rev_start)
@@ -152,6 +154,28 @@
self._revs[rev['id']] = rev
return self._revs.get(rev_id)
+ def missing_revisions(self):
+ """
+ Return a set of revision identifiers whose associated data were
+ found missing into the archive content while walking on the
+ revisions graph.
+
+ Returns:
+ Set[bytes]: a set of revision identifiers
+ """
+ return self._missing_revs
+
+ def is_history_truncated(self):
+ """
+ Return if the revision history generated so far has been truncated
+ of not. A revision history might end up truncated if some revision
+ data were found missing into the archive content.
+
+ Returns:
+ bool: Whether the history got truncated or not
+ """
+ return len(self.missing_revisions()) > 0
+
def export_state(self):
"""
Export the internal state of that revision walker to a dict.
@@ -164,7 +188,8 @@
'revs_to_visit': self._revs_to_visit,
'done': self._done,
'last_rev': self._last_rev,
- 'num_revs': self._num_revs
+ 'num_revs': self._num_revs,
+ 'missing_revs': self._missing_revs
}
def __next__(self):
@@ -178,6 +203,7 @@
rev = self._get_rev(rev_id)
# revision data is missing, returned history will be truncated
if rev is None:
+ self._missing_revs.add(rev_id)
continue
self.process_parent_revs(rev)
if self.should_return(rev):
@@ -210,6 +236,8 @@
if rev is not None:
commit_time = rev['committer_date']['timestamp']['seconds']
heapq.heappush(self._revs_to_visit, (-commit_time, rev_id))
+ else:
+ self._missing_revs.add(rev_id)
def get_next_rev_id(self):
"""
diff --git a/swh/storage/tests/algos/test_revisions_walker.py b/swh/storage/tests/algos/test_revisions_walker.py
--- a/swh/storage/tests/algos/test_revisions_walker.py
+++ b/swh/storage/tests/algos/test_revisions_walker.py
@@ -7,7 +7,7 @@
from unittest.mock import patch
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
from swh.storage.algos.revisions_walker import get_revisions_walker
# For those tests, we will walk the following revisions history
@@ -283,21 +283,39 @@
_rev_start = 'b364f53155044e5308a0f73abb3b5f01995a5b7d'
+_rev_missing = '836d498396fb9b5d45c896885f84d8d60a5651dc'
+
class RevisionsWalkerTest(unittest.TestCase):
- @patch('swh.storage.storage.Storage')
def check_revisions_ordering(self, rev_walker_type, expected_result,
- MockStorage):
- storage = MockStorage()
- storage.revision_log.return_value = _revisions_list
+ truncated_history):
+ with patch('swh.storage.storage.Storage') as MockStorage:
+ storage = MockStorage()
+ if not truncated_history:
+ storage.revision_log.return_value = _revisions_list
+ else:
+ revs_lists_truncated = []
+ for rev in _revisions_list:
+ if hash_to_hex(rev['id']) == _rev_missing:
+ revs_lists_truncated.append(None)
+ else:
+ revs_lists_truncated.append(rev)
+
+ storage.revision_log.return_value = revs_lists_truncated
+
+ revs_walker = get_revisions_walker(rev_walker_type, storage,
+ hash_to_bytes(_rev_start))
+
+ self.assertEqual(list(map(hash_to_bytes, expected_result)),
+ [rev['id'] for rev in revs_walker])
- revs_walker = \
- get_revisions_walker(rev_walker_type, storage,
- hash_to_bytes(_rev_start))
+ self.assertEqual(revs_walker.is_history_truncated(),
+ truncated_history)
- self.assertEqual(list(map(hash_to_bytes, expected_result)),
- [rev['id'] for rev in revs_walker])
+ if truncated_history:
+ missing_revs = revs_walker.missing_revisions()
+ self.assertEqual(missing_revs, {hash_to_bytes(_rev_missing)})
def test_revisions_walker_committer_date(self):
@@ -313,7 +331,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('committer_date', expected_result)
+ self.check_revisions_ordering('committer_date', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs(self):
@@ -330,7 +349,8 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('dfs', expected_result)
+ self.check_revisions_ordering('dfs', expected_result,
+ truncated_history=False)
def test_revisions_walker_dfs_post(self):
@@ -347,7 +367,8 @@
'ee96c2a2d397b79070d2b6fe3051290963748358',
'8f89dda8e072383cf50d42532ae8f52ad89f8fdf']
- self.check_revisions_ordering('dfs_post', expected_result)
+ self.check_revisions_ordering('dfs_post', expected_result,
+ truncated_history=False)
def test_revisions_walker_bfs(self):
@@ -364,4 +385,19 @@
'b401c50863475db4440c85c10ac0b6423b61554d',
'9c5051397e5c2e0c258bb639c3dd34406584ca10']
- self.check_revisions_ordering('bfs', expected_result)
+ self.check_revisions_ordering('bfs', expected_result,
+ truncated_history=False)
+
+ def test_revisions_walker_truncated_history(self):
+
+ expected_result = ['b364f53155044e5308a0f73abb3b5f01995a5b7d',
+ 'b94886c500c46e32dc3d7ebae8a5409accd592e5',
+ '0cb6b4611d65bee0f57821dac7f611e2f8a02433',
+ '2b0240c6d682bad51532eec15b8a7ed6b75c8d31',
+ 'b401c50863475db4440c85c10ac0b6423b61554d',
+ '9c5051397e5c2e0c258bb639c3dd34406584ca10']
+
+ for revs_walker_type in ('committer_date', 'bfs', 'dfs', 'dfs_post'):
+
+ self.check_revisions_ordering(revs_walker_type, expected_result,
+ truncated_history=True)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 23, 1:52 AM (19 h, 19 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229359
Attached To
D1431: Revisions walker: Add methods to query produced history state
Event Timeline
Log In to Comment