Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9340749
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
View Options
diff --git a/requirements.txt b/requirements.txt
index 2661e11..48f0a0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,6 @@
-python-dateutil
+click
+flask
psycopg2
+python-dateutil
+python-fastimport
vcversioner
-
-# remote storage API server
-flask
-
-
-click
diff --git a/swh/vault/cookers/__init__.py b/swh/vault/cookers/__init__.py
index 18385a2..fd84f0f 100644
--- a/swh/vault/cookers/__init__.py
+++ b/swh/vault/cookers/__init__.py
@@ -1,12 +1,14 @@
-# Copyright (C) 2016 The Software Heritage developers
+# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from .directory import DirectoryCooker
from .revision_flat import RevisionFlatCooker
+from .revision_git import RevisionGitCooker
COOKER_TYPES = {
'directory': DirectoryCooker,
'revision_flat': RevisionFlatCooker,
+ 'revision_git': RevisionGitCooker,
}
diff --git a/swh/vault/cookers/revision_git.py b/swh/vault/cookers/revision_git.py
new file mode 100644
index 0000000..c403084
--- /dev/null
+++ b/swh/vault/cookers/revision_git.py
@@ -0,0 +1,152 @@
+# Copyright (C) 2017 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import fastimport.commands
+
+from .base import BaseVaultCooker
+
+
+class RevisionGitCooker(BaseVaultCooker):
+ """Cooker to create a git fast-import bundle """
+ CACHE_TYPE_KEY = 'revision_git'
+
+ def prepare_bundle(self, obj_id):
+ commands = self.fastexport(self.storage.revision_log([obj_id]))
+ bundle_content = b'\n'.join(bytes(command) for command in commands)
+ return bundle_content
+
+ def fastexport(self, log):
+ """Generate all the git fast-import commands from a given log.
+ """
+ self.rev_by_id = {r['id']: r for r in log}
+ self.rev_sorted = list(self._toposort(self.rev_by_id))
+ self.dir_by_id = {}
+ self.obj_done = set()
+ self.obj_to_mark = {}
+ self.next_available_mark = 1
+
+ yield from self._compute_all_blob_commands()
+ yield from self._compute_all_commit_commands()
+
+ def _toposort(self, rev_by_id):
+ """Perform a topological sort on the revision graph.
+ """
+ done = set()
+ remaining = rev_by_id.copy()
+
+ while remaining:
+ for rev_id, rev in list(remaining.items()):
+ parents = rev['parents']
+ if set(parents) <= done:
+ yield rev
+ done.add(rev_id)
+ del remaining[rev_id]
+
+ def mark(self, obj_id):
+ """Get the mark ID as bytes of a git object.
+
+ If the object has not yet been marked, assign a new ID and add it to
+ the mark dictionary.
+ """
+ if obj_id not in self.obj_to_mark:
+ self.obj_to_mark[obj_id] = self.next_available_mark
+ self.next_available_mark += 1
+ return str(self.obj_to_mark[obj_id]).encode()
+
+ def _compute_all_blob_commands(self):
+ """Compute all the blob commands to populate the empty git repository.
+
+ Mark the populated blobs so that we are able to reference them in file
+ commands.
+
+ """
+ for rev in self.rev_sorted:
+ yield from self._compute_blob_commands_in_dir(rev['directory'])
+
+ def _compute_blob_commands_in_dir(self, dir_id):
+ """Find all the blobs in a directory and generate their blob commands.
+
+ If a blob has already been visited and marked, skip it.
+ """
+ data = self.storage.directory_ls(dir_id, recursive=True)
+ files_data = list(entry for entry in data if entry['type'] == 'file')
+ self.dir_by_id[dir_id] = files_data
+ for file_data in files_data:
+ obj_id = file_data['sha1']
+ if obj_id in self.obj_done:
+ continue
+ content = list(self.storage.content_get([obj_id]))[0]['data']
+ yield fastimport.commands.BlobCommand(
+ mark=self.mark(obj_id),
+ data=content,
+ )
+ self.obj_done.add(obj_id)
+
+ def _compute_all_commit_commands(self):
+ """Compute all the commit commands.
+ """
+ for rev in self.rev_sorted:
+ yield from self._compute_commit_command(rev)
+
+ def _compute_commit_command(self, rev):
+ """Compute a commit command from a specific revision.
+ """
+ from_ = None
+ merges = None
+ parent = None
+ if 'parents' in rev and rev['parents']:
+ from_ = b':' + self.mark(rev['parents'][0])
+ merges = [b':' + self.mark(r) for r in rev['parents'][1:]]
+ parent = self.rev_by_id[rev['parents'][0]]
+ files = self._compute_file_commands(rev, parent)
+ author = (rev['author']['name'],
+ rev['author']['email'],
+ rev['date']['timestamp']['seconds'],
+ rev['date']['offset'] * 60)
+ committer = (rev['committer']['name'],
+ rev['committer']['email'],
+ rev['committer_date']['timestamp']['seconds'],
+ rev['committer_date']['offset'] * 60)
+ yield fastimport.commands.CommitCommand(
+ ref=b'refs/heads/master',
+ mark=self.mark(rev['id']),
+ author=author,
+ committer=committer,
+ message=rev['message'],
+ from_=from_,
+ merges=merges,
+ file_iter=files,
+ )
+
+ def _compute_file_commands(self, rev, parent=None):
+ """Compute all the file commands of a revision.
+
+ Generate a diff of the files between the revision and its main parent
+ to find the necessary file commands to apply.
+ """
+ if not parent:
+ parent_dir = []
+ else:
+ parent_dir = self.dir_by_id[parent['directory']]
+ cur_dir = self.dir_by_id[rev['directory']]
+ parent_dir = {f['name']: f for f in parent_dir}
+ cur_dir = {f['name']: f for f in cur_dir}
+
+ for fname, f in cur_dir.items():
+ if ((fname not in parent_dir
+ or f['sha1'] != parent_dir[fname]['sha1']
+ or f['perms'] != parent_dir[fname]['perms'])):
+ yield fastimport.commands.FileModifyCommand(
+ path=f['name'],
+ mode=f['perms'],
+ dataref=(b':' + self.mark(f['sha1'])),
+ data=None,
+ )
+
+ for fname, f in parent_dir.items():
+ if fname not in cur_dir:
+ yield fastimport.commands.FileDeleteCommand(
+ path=f['name']
+ )
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 11:05 AM (3 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3317401
Attached To
rDVAU Software Heritage Vault
Event Timeline
Log In to Comment