Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9344882
D1778.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D1778.diff
View Options
diff --git a/swh/vault/cookers/revision_flat.py b/swh/vault/cookers/revision_flat.py
--- a/swh/vault/cookers/revision_flat.py
+++ b/swh/vault/cookers/revision_flat.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2017 The Software Heritage developers
+# Copyright (C) 2016-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,7 @@
from swh.model import hashutil
from swh.vault.cookers.base import BaseVaultCooker
+from swh.vault.cookers.utils import revision_log
from swh.vault.to_disk import DirectoryBuilder
@@ -22,7 +23,7 @@
def prepare_bundle(self):
with tempfile.TemporaryDirectory(prefix='tmp-vault-revision-') as td:
root = Path(td)
- for revision in self.storage.revision_log([self.obj_id]):
+ for revision in revision_log(self.storage, self.obj_id):
revdir = root / hashutil.hash_to_hex(revision['id'])
revdir.mkdir()
directory_builder = DirectoryBuilder(
diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py
--- a/swh/vault/cookers/revision_gitfast.py
+++ b/swh/vault/cookers/revision_gitfast.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -15,6 +15,7 @@
from swh.model.toposort import toposort
from swh.model.from_disk import mode_to_perms
from swh.vault.cookers.base import BaseVaultCooker
+from swh.vault.cookers.utils import revision_log
from swh.vault.to_disk import get_filtered_files_content
@@ -26,7 +27,7 @@
return not list(self.storage.revision_missing([self.obj_id]))
def prepare_bundle(self):
- self.log = list(toposort(self.storage.revision_log([self.obj_id])))
+ self.log = list(toposort(revision_log(self.storage, self.obj_id)))
self.gzobj = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16)
self.fastexport()
self.write(self.gzobj.flush())
diff --git a/swh/vault/cookers/utils.py b/swh/vault/cookers/utils.py
new file mode 100644
--- /dev/null
+++ b/swh/vault/cookers/utils.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.storage.algos.revisions_walker import get_revisions_walker
+
+
+def revision_log(storage, rev_id, per_page=1000):
+ """Retrieve a revision log in a paginated way in order to avoid storage
+ timeouts when the total number of revisions to fetch is large.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_id (bytes): a revision identifier
+ per_page (Optional[int]): the maximum number of revisions to return
+ in each page
+
+ Yields:
+ dict: Revision information as a dictionary
+ """
+ rw_state = {}
+ nb_revs = 0
+ max_revs = per_page
+ while True:
+ # Get an iterator returning the commits log from rev_id.
+ # At most max_revs visited revisions from rev_id in the commits graph
+ # will be returned.
+ revs_walker = get_revisions_walker('bfs', storage, rev_id,
+ max_revs=max_revs,
+ state=rw_state)
+ # Iterate on at most per_page revisions in the commits log.
+ for rev in revs_walker:
+ nb_revs += 1
+ yield rev
+ # If the total number of iterated revisions is lesser than the
+ # maximum requested one, it means that we hit the initial revision
+ # in the log.
+ if nb_revs < max_revs:
+ break
+ # Backup iterator state to continue the revisions iteration
+ # from where we left it.
+ rw_state = revs_walker.export_state()
+ # Increment the maximum of revisions to iterate from rev_id
+ # to get next revisions in the log.
+ max_revs += per_page
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 2:53 PM (4 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221492
Attached To
D1778: Fix revision cooking errors with the vault for large revision log
Event Timeline
Log In to Comment