Page MenuHomeSoftware Heritage

D1778.diff
No OneTemporary

D1778.diff

diff --git a/swh/vault/cookers/revision_flat.py b/swh/vault/cookers/revision_flat.py
--- a/swh/vault/cookers/revision_flat.py
+++ b/swh/vault/cookers/revision_flat.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2017 The Software Heritage developers
+# Copyright (C) 2016-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,7 @@
from swh.model import hashutil
from swh.vault.cookers.base import BaseVaultCooker
+from swh.vault.cookers.utils import revision_log
from swh.vault.to_disk import DirectoryBuilder
@@ -22,7 +23,7 @@
def prepare_bundle(self):
with tempfile.TemporaryDirectory(prefix='tmp-vault-revision-') as td:
root = Path(td)
- for revision in self.storage.revision_log([self.obj_id]):
+ for revision in revision_log(self.storage, self.obj_id):
revdir = root / hashutil.hash_to_hex(revision['id'])
revdir.mkdir()
directory_builder = DirectoryBuilder(
diff --git a/swh/vault/cookers/revision_gitfast.py b/swh/vault/cookers/revision_gitfast.py
--- a/swh/vault/cookers/revision_gitfast.py
+++ b/swh/vault/cookers/revision_gitfast.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -15,6 +15,7 @@
from swh.model.toposort import toposort
from swh.model.from_disk import mode_to_perms
from swh.vault.cookers.base import BaseVaultCooker
+from swh.vault.cookers.utils import revision_log
from swh.vault.to_disk import get_filtered_files_content
@@ -26,7 +27,7 @@
return not list(self.storage.revision_missing([self.obj_id]))
def prepare_bundle(self):
- self.log = list(toposort(self.storage.revision_log([self.obj_id])))
+ self.log = list(toposort(revision_log(self.storage, self.obj_id)))
self.gzobj = zlib.compressobj(9, zlib.DEFLATED, zlib.MAX_WBITS | 16)
self.fastexport()
self.write(self.gzobj.flush())
diff --git a/swh/vault/cookers/utils.py b/swh/vault/cookers/utils.py
new file mode 100644
--- /dev/null
+++ b/swh/vault/cookers/utils.py
@@ -0,0 +1,47 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.storage.algos.revisions_walker import get_revisions_walker
+
+
+def revision_log(storage, rev_id, per_page=1000):
+ """Retrieve a revision log in a paginated way in order to avoid storage
+ timeouts when the total number of revisions to fetch is large.
+
+ Args:
+ storage (swh.storage.storage.Storage): instance of swh storage
+ (either local or remote)
+ rev_id (bytes): a revision identifier
+ per_page (Optional[int]): the maximum number of revisions to return
+ in each page
+
+ Yields:
+ dict: Revision information as a dictionary
+ """
+ rw_state = {}
+ nb_revs = 0
+ max_revs = per_page
+ while True:
+ # Get an iterator returning the commits log from rev_id.
+ # At most max_revs visited revisions from rev_id in the commits graph
+ # will be returned.
+ revs_walker = get_revisions_walker('bfs', storage, rev_id,
+ max_revs=max_revs,
+ state=rw_state)
+ # Iterate on at most per_page revisions in the commits log.
+ for rev in revs_walker:
+ nb_revs += 1
+ yield rev
+ # If the total number of iterated revisions is lesser than the
+ # maximum requested one, it means that we hit the initial revision
+ # in the log.
+ if nb_revs < max_revs:
+ break
+ # Backup iterator state to continue the revisions iteration
+ # from where we left it.
+ rw_state = revs_walker.export_state()
+ # Increment the maximum of revisions to iterate from rev_id
+ # to get next revisions in the log.
+ max_revs += per_page

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 2:53 PM (4 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221492

Event Timeline