Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8391759
object_store.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
36 KB
Subscribers
None
object_store.py
View Options
# object_store.py -- Object store for git objects
# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
# and others
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# or (at your option) a later version of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
"""Git object store interfaces and implementation."""
from
io
import
BytesIO
import
errno
from
itertools
import
chain
import
os
import
stat
import
sys
import
tempfile
from
dulwich.diff_tree
import
(
tree_changes
,
walk_trees
,
)
from
dulwich.errors
import
(
NotTreeError
,
)
from
dulwich.file
import
GitFile
from
dulwich.objects
import
(
Commit
,
ShaFile
,
Tag
,
Tree
,
ZERO_SHA
,
hex_to_sha
,
sha_to_hex
,
hex_to_filename
,
S_ISGITLINK
,
object_class
,
)
from
dulwich.pack
import
(
Pack
,
PackData
,
PackInflater
,
iter_sha1
,
write_pack_header
,
write_pack_index_v2
,
write_pack_object
,
write_pack_objects
,
compute_file_sha
,
PackIndexer
,
PackStreamCopier
,
)
INFODIR
=
'info'
PACKDIR
=
'pack'
if
sys
.
version_info
[
0
]
==
2
:
iteritems
=
lambda
d
:
d
.
iteritems
()
else
:
iteritems
=
lambda
d
:
d
.
items
()
class
BaseObjectStore
(
object
):
"""Object store interface."""
def
determine_wants_all
(
self
,
refs
):
return
[
sha
for
(
ref
,
sha
)
in
iteritems
(
refs
)
if
not
sha
in
self
and
not
ref
.
endswith
(
b
"^{}"
)
and
not
sha
==
ZERO_SHA
]
def
iter_shas
(
self
,
shas
):
"""Iterate over the objects for the specified shas.
:param shas: Iterable object with SHAs
:return: Object iterator
"""
return
ObjectStoreIterator
(
self
,
shas
)
def
contains_loose
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is loose."""
raise
NotImplementedError
(
self
.
contains_loose
)
def
contains_packed
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is packed."""
raise
NotImplementedError
(
self
.
contains_packed
)
def
__contains__
(
self
,
sha
):
"""Check if a particular object is present by SHA1.
This method makes no distinction between loose and packed objects.
"""
return
self
.
contains_packed
(
sha
)
or
self
.
contains_loose
(
sha
)
@property
def
packs
(
self
):
"""Iterable of pack objects."""
raise
NotImplementedError
def
get_raw
(
self
,
name
):
"""Obtain the raw text for an object.
:param name: sha for the object.
:return: tuple with numeric type and object contents.
"""
raise
NotImplementedError
(
self
.
get_raw
)
def
__getitem__
(
self
,
sha
):
"""Obtain an object by SHA1."""
type_num
,
uncomp
=
self
.
get_raw
(
sha
)
return
ShaFile
.
from_raw_string
(
type_num
,
uncomp
,
sha
=
sha
)
def
__iter__
(
self
):
"""Iterate over the SHAs that are present in this store."""
raise
NotImplementedError
(
self
.
__iter__
)
def
add_object
(
self
,
obj
):
"""Add a single object to this object store.
"""
raise
NotImplementedError
(
self
.
add_object
)
def
add_objects
(
self
,
objects
):
"""Add a set of objects to this object store.
:param objects: Iterable over a list of objects.
"""
raise
NotImplementedError
(
self
.
add_objects
)
def
tree_changes
(
self
,
source
,
target
,
want_unchanged
=
False
):
"""Find the differences between the contents of two trees
:param source: SHA1 of the source tree
:param target: SHA1 of the target tree
:param want_unchanged: Whether unchanged files should be reported
:return: Iterator over tuples with
(oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
"""
for
change
in
tree_changes
(
self
,
source
,
target
,
want_unchanged
=
want_unchanged
):
yield
((
change
.
old
.
path
,
change
.
new
.
path
),
(
change
.
old
.
mode
,
change
.
new
.
mode
),
(
change
.
old
.
sha
,
change
.
new
.
sha
))
def
iter_tree_contents
(
self
,
tree_id
,
include_trees
=
False
):
"""Iterate the contents of a tree and all subtrees.
Iteration is depth-first pre-order, as in e.g. os.walk.
:param tree_id: SHA1 of the tree.
:param include_trees: If True, include tree objects in the iteration.
:return: Iterator over TreeEntry namedtuples for all the objects in a
tree.
"""
for
entry
,
_
in
walk_trees
(
self
,
tree_id
,
None
):
if
not
stat
.
S_ISDIR
(
entry
.
mode
)
or
include_trees
:
yield
entry
def
find_missing_objects
(
self
,
haves
,
wants
,
progress
=
None
,
get_tagged
=
None
,
get_parents
=
lambda
commit
:
commit
.
parents
):
"""Find the missing objects required for a set of revisions.
:param haves: Iterable over SHAs already in common.
:param wants: Iterable over SHAs of objects to fetch.
:param progress: Simple progress function that will be called with
updated progress strings.
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
sha for including tags.
:param get_parents: Optional function for getting the parents of a commit.
:return: Iterator over (sha, path) pairs.
"""
finder
=
MissingObjectFinder
(
self
,
haves
,
wants
,
progress
,
get_tagged
,
get_parents
=
get_parents
)
return
iter
(
finder
.
next
,
None
)
def
find_common_revisions
(
self
,
graphwalker
):
"""Find which revisions this store has in common using graphwalker.
:param graphwalker: A graphwalker object.
:return: List of SHAs that are in common
"""
haves
=
[]
sha
=
next
(
graphwalker
)
while
sha
:
if
sha
in
self
:
haves
.
append
(
sha
)
graphwalker
.
ack
(
sha
)
sha
=
next
(
graphwalker
)
return
haves
def
generate_pack_contents
(
self
,
have
,
want
,
progress
=
None
):
"""Iterate over the contents of a pack file.
:param have: List of SHA1s of objects that should not be sent
:param want: List of SHA1s of objects that should be sent
:param progress: Optional progress reporting method
"""
return
self
.
iter_shas
(
self
.
find_missing_objects
(
have
,
want
,
progress
))
def
peel_sha
(
self
,
sha
):
"""Peel all tags from a SHA.
:param sha: The object SHA to peel.
:return: The fully-peeled SHA1 of a tag object, after peeling all
intermediate tags; if the original ref does not point to a tag, this
will equal the original SHA1.
"""
obj
=
self
[
sha
]
obj_class
=
object_class
(
obj
.
type_name
)
while
obj_class
is
Tag
:
obj_class
,
sha
=
obj
.
object
obj
=
self
[
sha
]
return
obj
def
_collect_ancestors
(
self
,
heads
,
common
=
set
(),
get_parents
=
lambda
commit
:
commit
.
parents
):
"""Collect all ancestors of heads up to (excluding) those in common.
:param heads: commits to start from
:param common: commits to end at, or empty set to walk repository
completely
:param get_parents: Optional function for getting the parents of a commit.
:return: a tuple (A, B) where A - all commits reachable
from heads but not present in common, B - common (shared) elements
that are directly reachable from heads
"""
bases
=
set
()
commits
=
set
()
queue
=
[]
queue
.
extend
(
heads
)
while
queue
:
e
=
queue
.
pop
(
0
)
if
e
in
common
:
bases
.
add
(
e
)
elif
e
not
in
commits
:
commits
.
add
(
e
)
cmt
=
self
[
e
]
queue
.
extend
(
get_parents
(
cmt
))
return
(
commits
,
bases
)
def
close
(
self
):
"""Close any files opened by this object store."""
# Default implementation is a NO-OP
class
PackBasedObjectStore
(
BaseObjectStore
):
def
__init__
(
self
):
self
.
_pack_cache
=
{}
@property
def
alternates
(
self
):
return
[]
def
contains_packed
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is packed.
This does not check alternates.
"""
for
pack
in
self
.
packs
:
if
sha
in
pack
:
return
True
return
False
def
__contains__
(
self
,
sha
):
"""Check if a particular object is present by SHA1.
This method makes no distinction between loose and packed objects.
"""
if
self
.
contains_packed
(
sha
)
or
self
.
contains_loose
(
sha
):
return
True
for
alternate
in
self
.
alternates
:
if
sha
in
alternate
:
return
True
return
False
def
_pack_cache_stale
(
self
):
"""Check whether the pack cache is stale."""
raise
NotImplementedError
(
self
.
_pack_cache_stale
)
def
_add_known_pack
(
self
,
base_name
,
pack
):
"""Add a newly appeared pack to the cache by path.
"""
self
.
_pack_cache
[
base_name
]
=
pack
def
close
(
self
):
pack_cache
=
self
.
_pack_cache
self
.
_pack_cache
=
{}
while
pack_cache
:
(
name
,
pack
)
=
pack_cache
.
popitem
()
pack
.
close
()
@property
def
packs
(
self
):
"""List with pack objects."""
if
self
.
_pack_cache
is
None
or
self
.
_pack_cache_stale
():
self
.
_update_pack_cache
()
return
self
.
_pack_cache
.
values
()
def
_iter_alternate_objects
(
self
):
"""Iterate over the SHAs of all the objects in alternate stores."""
for
alternate
in
self
.
alternates
:
for
alternate_object
in
alternate
:
yield
alternate_object
def
_iter_loose_objects
(
self
):
"""Iterate over the SHAs of all loose objects."""
raise
NotImplementedError
(
self
.
_iter_loose_objects
)
def
_get_loose_object
(
self
,
sha
):
raise
NotImplementedError
(
self
.
_get_loose_object
)
def
_remove_loose_object
(
self
,
sha
):
raise
NotImplementedError
(
self
.
_remove_loose_object
)
def
pack_loose_objects
(
self
):
"""Pack loose objects.
:return: Number of objects packed
"""
objects
=
set
()
for
sha
in
self
.
_iter_loose_objects
():
objects
.
add
((
self
.
_get_loose_object
(
sha
),
None
))
self
.
add_objects
(
list
(
objects
))
for
obj
,
path
in
objects
:
self
.
_remove_loose_object
(
obj
.
id
)
return
len
(
objects
)
def
__iter__
(
self
):
"""Iterate over the SHAs that are present in this store."""
iterables
=
list
(
self
.
packs
)
+
[
self
.
_iter_loose_objects
()]
+
[
self
.
_iter_alternate_objects
()]
return
chain
(
*
iterables
)
def
contains_loose
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is loose.
This does not check alternates.
"""
return
self
.
_get_loose_object
(
sha
)
is
not
None
def
get_raw
(
self
,
name
):
"""Obtain the raw text for an object.
:param name: sha for the object.
:return: tuple with numeric type and object contents.
"""
if
len
(
name
)
==
40
:
sha
=
hex_to_sha
(
name
)
hexsha
=
name
elif
len
(
name
)
==
20
:
sha
=
name
hexsha
=
None
else
:
raise
AssertionError
(
"Invalid object name
%r
"
%
name
)
for
pack
in
self
.
packs
:
try
:
return
pack
.
get_raw
(
sha
)
except
KeyError
:
pass
if
hexsha
is
None
:
hexsha
=
sha_to_hex
(
name
)
ret
=
self
.
_get_loose_object
(
hexsha
)
if
ret
is
not
None
:
return
ret
.
type_num
,
ret
.
as_raw_string
()
for
alternate
in
self
.
alternates
:
try
:
return
alternate
.
get_raw
(
hexsha
)
except
KeyError
:
pass
raise
KeyError
(
hexsha
)
def
add_objects
(
self
,
objects
):
"""Add a set of objects to this object store.
:param objects: Iterable over objects, should support __len__.
:return: Pack object of the objects written.
"""
if
len
(
objects
)
==
0
:
# Don't bother writing an empty pack file
return
f
,
commit
,
abort
=
self
.
add_pack
()
try
:
write_pack_objects
(
f
,
objects
)
except
:
abort
()
raise
else
:
return
commit
()
class
DiskObjectStore
(
PackBasedObjectStore
):
"""Git-style object store that exists on disk."""
def
__init__
(
self
,
path
):
"""Open an object store.
:param path: Path of the object store.
"""
super
(
DiskObjectStore
,
self
)
.
__init__
()
self
.
path
=
path
self
.
pack_dir
=
os
.
path
.
join
(
self
.
path
,
PACKDIR
)
self
.
_pack_cache_time
=
0
self
.
_pack_cache
=
{}
self
.
_alternates
=
None
def
__repr__
(
self
):
return
"<
%s
(
%r
)>"
%
(
self
.
__class__
.
__name__
,
self
.
path
)
@property
def
alternates
(
self
):
if
self
.
_alternates
is
not
None
:
return
self
.
_alternates
self
.
_alternates
=
[]
for
path
in
self
.
_read_alternate_paths
():
self
.
_alternates
.
append
(
DiskObjectStore
(
path
))
return
self
.
_alternates
def
_read_alternate_paths
(
self
):
try
:
f
=
GitFile
(
os
.
path
.
join
(
self
.
path
,
"info"
,
"alternates"
),
'rb'
)
except
(
OSError
,
IOError
)
as
e
:
if
e
.
errno
==
errno
.
ENOENT
:
return
raise
with
f
:
for
l
in
f
.
readlines
():
l
=
l
.
rstrip
(
b
"
\n
"
)
if
l
[
0
]
==
b
"#"
:
continue
if
os
.
path
.
isabs
(
l
):
yield
l
.
decode
(
sys
.
getfilesystemencoding
())
else
:
yield
os
.
path
.
join
(
self
.
path
,
l
)
.
decode
(
sys
.
getfilesystemencoding
())
def
add_alternate_path
(
self
,
path
):
"""Add an alternate path to this object store.
"""
try
:
os
.
mkdir
(
os
.
path
.
join
(
self
.
path
,
"info"
))
except
OSError
as
e
:
if
e
.
errno
!=
errno
.
EEXIST
:
raise
alternates_path
=
os
.
path
.
join
(
self
.
path
,
"info/alternates"
)
with
GitFile
(
alternates_path
,
'wb'
)
as
f
:
try
:
orig_f
=
open
(
alternates_path
,
'rb'
)
except
(
OSError
,
IOError
)
as
e
:
if
e
.
errno
!=
errno
.
ENOENT
:
raise
else
:
with
orig_f
:
f
.
write
(
orig_f
.
read
())
f
.
write
(
path
.
encode
(
sys
.
getfilesystemencoding
())
+
b
"
\n
"
)
if
not
os
.
path
.
isabs
(
path
):
path
=
os
.
path
.
join
(
self
.
path
,
path
)
self
.
alternates
.
append
(
DiskObjectStore
(
path
))
def
_update_pack_cache
(
self
):
try
:
pack_dir_contents
=
os
.
listdir
(
self
.
pack_dir
)
except
OSError
as
e
:
if
e
.
errno
==
errno
.
ENOENT
:
self
.
_pack_cache_time
=
0
self
.
close
()
return
raise
self
.
_pack_cache_time
=
os
.
stat
(
self
.
pack_dir
)
.
st_mtime
pack_files
=
set
()
for
name
in
pack_dir_contents
:
assert
type
(
name
)
is
str
# TODO: verify that idx exists first
if
name
.
startswith
(
"pack-"
)
and
name
.
endswith
(
".pack"
):
pack_files
.
add
(
name
[:
-
len
(
".pack"
)])
# Open newly appeared pack files
for
f
in
pack_files
:
if
f
not
in
self
.
_pack_cache
:
self
.
_pack_cache
[
f
]
=
Pack
(
os
.
path
.
join
(
self
.
pack_dir
,
f
))
# Remove disappeared pack files
for
f
in
set
(
self
.
_pack_cache
)
-
pack_files
:
self
.
_pack_cache
.
pop
(
f
)
.
close
()
def
_pack_cache_stale
(
self
):
try
:
return
os
.
stat
(
self
.
pack_dir
)
.
st_mtime
>
self
.
_pack_cache_time
except
OSError
as
e
:
if
e
.
errno
==
errno
.
ENOENT
:
return
True
raise
def
_get_shafile_path
(
self
,
sha
):
# Check from object dir
return
hex_to_filename
(
self
.
path
,
sha
)
def
_iter_loose_objects
(
self
):
for
base
in
os
.
listdir
(
self
.
path
):
if
len
(
base
)
!=
2
:
continue
for
rest
in
os
.
listdir
(
os
.
path
.
join
(
self
.
path
,
base
)):
yield
(
base
+
rest
)
.
encode
(
sys
.
getfilesystemencoding
())
def
_get_loose_object
(
self
,
sha
):
path
=
self
.
_get_shafile_path
(
sha
)
try
:
return
ShaFile
.
from_path
(
path
)
except
(
OSError
,
IOError
)
as
e
:
if
e
.
errno
==
errno
.
ENOENT
:
return
None
raise
def
_remove_loose_object
(
self
,
sha
):
os
.
remove
(
self
.
_get_shafile_path
(
sha
))
def
_get_pack_basepath
(
self
,
entries
):
suffix
=
iter_sha1
(
entry
[
0
]
for
entry
in
entries
)
# TODO: Handle self.pack_dir being bytes
suffix
=
suffix
.
decode
(
'ascii'
)
return
os
.
path
.
join
(
self
.
pack_dir
,
"pack-"
+
suffix
)
def
_complete_thin_pack
(
self
,
f
,
path
,
copier
,
indexer
):
"""Move a specific file containing a pack into the pack directory.
:note: The file should be on the same file system as the
packs directory.
:param f: Open file object for the pack.
:param path: Path to the pack file.
:param copier: A PackStreamCopier to use for writing pack data.
:param indexer: A PackIndexer for indexing the pack.
"""
entries
=
list
(
indexer
)
# Update the header with the new number of objects.
f
.
seek
(
0
)
write_pack_header
(
f
,
len
(
entries
)
+
len
(
indexer
.
ext_refs
()))
# Must flush before reading (http://bugs.python.org/issue3207)
f
.
flush
()
# Rescan the rest of the pack, computing the SHA with the new header.
new_sha
=
compute_file_sha
(
f
,
end_ofs
=-
20
)
# Must reposition before writing (http://bugs.python.org/issue3207)
f
.
seek
(
0
,
os
.
SEEK_CUR
)
# Complete the pack.
for
ext_sha
in
indexer
.
ext_refs
():
assert
len
(
ext_sha
)
==
20
type_num
,
data
=
self
.
get_raw
(
ext_sha
)
offset
=
f
.
tell
()
crc32
=
write_pack_object
(
f
,
type_num
,
data
,
sha
=
new_sha
)
entries
.
append
((
ext_sha
,
offset
,
crc32
))
pack_sha
=
new_sha
.
digest
()
f
.
write
(
pack_sha
)
f
.
close
()
# Move the pack in.
entries
.
sort
()
pack_base_name
=
self
.
_get_pack_basepath
(
entries
)
os
.
rename
(
path
,
pack_base_name
+
'.pack'
)
# Write the index.
index_file
=
GitFile
(
pack_base_name
+
'.idx'
,
'wb'
)
try
:
write_pack_index_v2
(
index_file
,
entries
,
pack_sha
)
index_file
.
close
()
finally
:
index_file
.
abort
()
# Add the pack to the store and return it.
final_pack
=
Pack
(
pack_base_name
)
final_pack
.
check_length_and_checksum
()
self
.
_add_known_pack
(
pack_base_name
,
final_pack
)
return
final_pack
def
add_thin_pack
(
self
,
read_all
,
read_some
):
"""Add a new thin pack to this object store.
Thin packs are packs that contain deltas with parents that exist outside
the pack. They should never be placed in the object store directly, and
always indexed and completed as they are copied.
:param read_all: Read function that blocks until the number of requested
bytes are read.
:param read_some: Read function that returns at least one byte, but may
not return the number of bytes requested.
:return: A Pack object pointing at the now-completed thin pack in the
objects/pack directory.
"""
fd
,
path
=
tempfile
.
mkstemp
(
dir
=
self
.
path
,
prefix
=
'tmp_pack_'
)
with
os
.
fdopen
(
fd
,
'w+b'
)
as
f
:
indexer
=
PackIndexer
(
f
,
resolve_ext_ref
=
self
.
get_raw
)
copier
=
PackStreamCopier
(
read_all
,
read_some
,
f
,
delta_iter
=
indexer
)
copier
.
verify
()
return
self
.
_complete_thin_pack
(
f
,
path
,
copier
,
indexer
)
def
move_in_pack
(
self
,
path
):
"""Move a specific file containing a pack into the pack directory.
:note: The file should be on the same file system as the
packs directory.
:param path: Path to the pack file.
"""
with
PackData
(
path
)
as
p
:
entries
=
p
.
sorted_entries
()
basename
=
self
.
_get_pack_basepath
(
entries
)
with
GitFile
(
basename
+
".idx"
,
"wb"
)
as
f
:
write_pack_index_v2
(
f
,
entries
,
p
.
get_stored_checksum
())
os
.
rename
(
path
,
basename
+
".pack"
)
final_pack
=
Pack
(
basename
)
self
.
_add_known_pack
(
basename
,
final_pack
)
return
final_pack
def
add_pack
(
self
):
"""Add a new pack to this object store.
:return: Fileobject to write to, a commit function to
call when the pack is finished and an abort
function.
"""
fd
,
path
=
tempfile
.
mkstemp
(
dir
=
self
.
pack_dir
,
suffix
=
".pack"
)
f
=
os
.
fdopen
(
fd
,
'wb'
)
def
commit
():
os
.
fsync
(
fd
)
f
.
close
()
if
os
.
path
.
getsize
(
path
)
>
0
:
return
self
.
move_in_pack
(
path
)
else
:
os
.
remove
(
path
)
return
None
def
abort
():
f
.
close
()
os
.
remove
(
path
)
return
f
,
commit
,
abort
def
add_object
(
self
,
obj
):
"""Add a single object to this object store.
:param obj: Object to add
"""
path
=
self
.
_get_shafile_path
(
obj
.
id
)
dir
=
os
.
path
.
dirname
(
path
)
try
:
os
.
mkdir
(
dir
)
except
OSError
as
e
:
if
e
.
errno
!=
errno
.
EEXIST
:
raise
if
os
.
path
.
exists
(
path
):
return
# Already there, no need to write again
with
GitFile
(
path
,
'wb'
)
as
f
:
f
.
write
(
obj
.
as_legacy_object
())
@classmethod
def
init
(
cls
,
path
):
try
:
os
.
mkdir
(
path
)
except
OSError
as
e
:
if
e
.
errno
!=
errno
.
EEXIST
:
raise
os
.
mkdir
(
os
.
path
.
join
(
path
,
"info"
))
os
.
mkdir
(
os
.
path
.
join
(
path
,
PACKDIR
))
return
cls
(
path
)
class
MemoryObjectStore
(
BaseObjectStore
):
"""Object store that keeps all objects in memory."""
def
__init__
(
self
):
super
(
MemoryObjectStore
,
self
)
.
__init__
()
self
.
_data
=
{}
def
_to_hexsha
(
self
,
sha
):
if
len
(
sha
)
==
40
:
return
sha
elif
len
(
sha
)
==
20
:
return
sha_to_hex
(
sha
)
else
:
raise
ValueError
(
"Invalid sha
%r
"
%
(
sha
,))
def
contains_loose
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is loose."""
return
self
.
_to_hexsha
(
sha
)
in
self
.
_data
def
contains_packed
(
self
,
sha
):
"""Check if a particular object is present by SHA1 and is packed."""
return
False
def
__iter__
(
self
):
"""Iterate over the SHAs that are present in this store."""
return
iter
(
self
.
_data
.
keys
())
@property
def
packs
(
self
):
"""List with pack objects."""
return
[]
def
get_raw
(
self
,
name
):
"""Obtain the raw text for an object.
:param name: sha for the object.
:return: tuple with numeric type and object contents.
"""
obj
=
self
[
self
.
_to_hexsha
(
name
)]
return
obj
.
type_num
,
obj
.
as_raw_string
()
def
__getitem__
(
self
,
name
):
return
self
.
_data
[
self
.
_to_hexsha
(
name
)]
def
__delitem__
(
self
,
name
):
"""Delete an object from this store, for testing only."""
del
self
.
_data
[
self
.
_to_hexsha
(
name
)]
def
add_object
(
self
,
obj
):
"""Add a single object to this object store.
"""
self
.
_data
[
obj
.
id
]
=
obj
def
add_objects
(
self
,
objects
):
"""Add a set of objects to this object store.
:param objects: Iterable over a list of objects.
"""
for
obj
,
path
in
objects
:
self
.
_data
[
obj
.
id
]
=
obj
def
add_pack
(
self
):
"""Add a new pack to this object store.
Because this object store doesn't support packs, we extract and add the
individual objects.
:return: Fileobject to write to and a commit function to
call when the pack is finished.
"""
f
=
BytesIO
()
def
commit
():
p
=
PackData
.
from_file
(
BytesIO
(
f
.
getvalue
()),
f
.
tell
())
f
.
close
()
for
obj
in
PackInflater
.
for_pack_data
(
p
,
self
.
get_raw
):
self
.
_data
[
obj
.
id
]
=
obj
def
abort
():
pass
return
f
,
commit
,
abort
def
_complete_thin_pack
(
self
,
f
,
indexer
):
"""Complete a thin pack by adding external references.
:param f: Open file object for the pack.
:param indexer: A PackIndexer for indexing the pack.
"""
entries
=
list
(
indexer
)
# Update the header with the new number of objects.
f
.
seek
(
0
)
write_pack_header
(
f
,
len
(
entries
)
+
len
(
indexer
.
ext_refs
()))
# Rescan the rest of the pack, computing the SHA with the new header.
new_sha
=
compute_file_sha
(
f
,
end_ofs
=-
20
)
# Complete the pack.
for
ext_sha
in
indexer
.
ext_refs
():
assert
len
(
ext_sha
)
==
20
type_num
,
data
=
self
.
get_raw
(
ext_sha
)
write_pack_object
(
f
,
type_num
,
data
,
sha
=
new_sha
)
pack_sha
=
new_sha
.
digest
()
f
.
write
(
pack_sha
)
def
add_thin_pack
(
self
,
read_all
,
read_some
):
"""Add a new thin pack to this object store.
Thin packs are packs that contain deltas with parents that exist outside
the pack. Because this object store doesn't support packs, we extract
and add the individual objects.
:param read_all: Read function that blocks until the number of requested
bytes are read.
:param read_some: Read function that returns at least one byte, but may
not return the number of bytes requested.
"""
f
,
commit
,
abort
=
self
.
add_pack
()
try
:
indexer
=
PackIndexer
(
f
,
resolve_ext_ref
=
self
.
get_raw
)
copier
=
PackStreamCopier
(
read_all
,
read_some
,
f
,
delta_iter
=
indexer
)
copier
.
verify
()
self
.
_complete_thin_pack
(
f
,
indexer
)
except
:
abort
()
raise
else
:
commit
()
class
ObjectImporter
(
object
):
"""Interface for importing objects."""
def
__init__
(
self
,
count
):
"""Create a new ObjectImporter.
:param count: Number of objects that's going to be imported.
"""
self
.
count
=
count
def
add_object
(
self
,
object
):
"""Add an object."""
raise
NotImplementedError
(
self
.
add_object
)
def
finish
(
self
,
object
):
"""Finish the import and write objects to disk."""
raise
NotImplementedError
(
self
.
finish
)
class
ObjectIterator
(
object
):
"""Interface for iterating over objects."""
def
iterobjects
(
self
):
raise
NotImplementedError
(
self
.
iterobjects
)
class
ObjectStoreIterator
(
ObjectIterator
):
"""ObjectIterator that works on top of an ObjectStore."""
def
__init__
(
self
,
store
,
sha_iter
):
"""Create a new ObjectIterator.
:param store: Object store to retrieve from
:param sha_iter: Iterator over (sha, path) tuples
"""
self
.
store
=
store
self
.
sha_iter
=
sha_iter
self
.
_shas
=
[]
def
__iter__
(
self
):
"""Yield tuple with next object and path."""
for
sha
,
path
in
self
.
itershas
():
yield
self
.
store
[
sha
],
path
def
iterobjects
(
self
):
"""Iterate over just the objects."""
for
o
,
path
in
self
:
yield
o
def
itershas
(
self
):
"""Iterate over the SHAs."""
for
sha
in
self
.
_shas
:
yield
sha
for
sha
in
self
.
sha_iter
:
self
.
_shas
.
append
(
sha
)
yield
sha
def
__contains__
(
self
,
needle
):
"""Check if an object is present.
:note: This checks if the object is present in
the underlying object store, not if it would
be yielded by the iterator.
:param needle: SHA1 of the object to check for
"""
return
needle
in
self
.
store
def
__getitem__
(
self
,
key
):
"""Find an object by SHA1.
:note: This retrieves the object from the underlying
object store. It will also succeed if the object would
not be returned by the iterator.
"""
return
self
.
store
[
key
]
def
__len__
(
self
):
"""Return the number of objects."""
return
len
(
list
(
self
.
itershas
()))
def
tree_lookup_path
(
lookup_obj
,
root_sha
,
path
):
"""Look up an object in a Git tree.
:param lookup_obj: Callback for retrieving object by SHA1
:param root_sha: SHA1 of the root tree
:param path: Path to lookup
:return: A tuple of (mode, SHA) of the resulting path.
"""
tree
=
lookup_obj
(
root_sha
)
if
not
isinstance
(
tree
,
Tree
):
raise
NotTreeError
(
root_sha
)
return
tree
.
lookup_path
(
lookup_obj
,
path
)
def
_collect_filetree_revs
(
obj_store
,
tree_sha
,
kset
):
"""Collect SHA1s of files and directories for specified tree.
:param obj_store: Object store to get objects by SHA from
:param tree_sha: tree reference to walk
:param kset: set to fill with references to files and directories
"""
filetree
=
obj_store
[
tree_sha
]
for
name
,
mode
,
sha
in
filetree
.
iteritems
():
if
not
S_ISGITLINK
(
mode
)
and
sha
not
in
kset
:
kset
.
add
(
sha
)
if
stat
.
S_ISDIR
(
mode
):
_collect_filetree_revs
(
obj_store
,
sha
,
kset
)
def
_split_commits_and_tags
(
obj_store
,
lst
,
ignore_unknown
=
False
):
"""Split object id list into three lists with commit, tag, and other SHAs.
Commits referenced by tags are included into commits
list as well. Only SHA1s known in this repository will get
through, and unless ignore_unknown argument is True, KeyError
is thrown for SHA1 missing in the repository
:param obj_store: Object store to get objects by SHA1 from
:param lst: Collection of commit and tag SHAs
:param ignore_unknown: True to skip SHA1 missing in the repository
silently.
:return: A tuple of (commits, tags, others) SHA1s
"""
commits
=
set
()
tags
=
set
()
others
=
set
()
for
e
in
lst
:
try
:
o
=
obj_store
[
e
]
except
KeyError
:
if
not
ignore_unknown
:
raise
else
:
if
isinstance
(
o
,
Commit
):
commits
.
add
(
e
)
elif
isinstance
(
o
,
Tag
):
tags
.
add
(
e
)
tagged
=
o
.
object
[
1
]
c
,
t
,
o
=
_split_commits_and_tags
(
obj_store
,
[
tagged
],
ignore_unknown
=
ignore_unknown
)
commits
|=
c
tags
|=
t
others
|=
o
else
:
others
.
add
(
e
)
return
(
commits
,
tags
,
others
)
class
MissingObjectFinder
(
object
):
"""Find the objects missing from another object store.
:param object_store: Object store containing at least all objects to be
sent
:param haves: SHA1s of commits not to send (already present in target)
:param wants: SHA1s of commits to send
:param progress: Optional function to report progress to.
:param get_tagged: Function that returns a dict of pointed-to sha -> tag
sha for including tags.
:param get_parents: Optional function for getting the parents of a commit.
:param tagged: dict of pointed-to sha -> tag sha for including tags
"""
def
__init__
(
self
,
object_store
,
haves
,
wants
,
progress
=
None
,
get_tagged
=
None
,
get_parents
=
lambda
commit
:
commit
.
parents
):
self
.
object_store
=
object_store
self
.
_get_parents
=
get_parents
# process Commits and Tags differently
# Note, while haves may list commits/tags not available locally,
# and such SHAs would get filtered out by _split_commits_and_tags,
# wants shall list only known SHAs, and otherwise
# _split_commits_and_tags fails with KeyError
have_commits
,
have_tags
,
have_others
=
(
_split_commits_and_tags
(
object_store
,
haves
,
True
))
want_commits
,
want_tags
,
want_others
=
(
_split_commits_and_tags
(
object_store
,
wants
,
False
))
# all_ancestors is a set of commits that shall not be sent
# (complete repository up to 'haves')
all_ancestors
=
object_store
.
_collect_ancestors
(
have_commits
,
get_parents
=
self
.
_get_parents
)[
0
]
# all_missing - complete set of commits between haves and wants
# common - commits from all_ancestors we hit into while
# traversing parent hierarchy of wants
missing_commits
,
common_commits
=
object_store
.
_collect_ancestors
(
want_commits
,
all_ancestors
,
get_parents
=
self
.
_get_parents
)
self
.
sha_done
=
set
()
# Now, fill sha_done with commits and revisions of
# files and directories known to be both locally
# and on target. Thus these commits and files
# won't get selected for fetch
for
h
in
common_commits
:
self
.
sha_done
.
add
(
h
)
cmt
=
object_store
[
h
]
_collect_filetree_revs
(
object_store
,
cmt
.
tree
,
self
.
sha_done
)
# record tags we have as visited, too
for
t
in
have_tags
:
self
.
sha_done
.
add
(
t
)
missing_tags
=
want_tags
.
difference
(
have_tags
)
missing_others
=
want_others
.
difference
(
have_others
)
# in fact, what we 'want' is commits, tags, and others
# we've found missing
wants
=
missing_commits
.
union
(
missing_tags
)
wants
=
wants
.
union
(
missing_others
)
self
.
objects_to_send
=
set
([(
w
,
None
,
False
)
for
w
in
wants
])
if
progress
is
None
:
self
.
progress
=
lambda
x
:
None
else
:
self
.
progress
=
progress
self
.
_tagged
=
get_tagged
and
get_tagged
()
or
{}
def
add_todo
(
self
,
entries
):
self
.
objects_to_send
.
update
([
e
for
e
in
entries
if
not
e
[
0
]
in
self
.
sha_done
])
def
next
(
self
):
while
True
:
if
not
self
.
objects_to_send
:
return
None
(
sha
,
name
,
leaf
)
=
self
.
objects_to_send
.
pop
()
if
sha
not
in
self
.
sha_done
:
break
if
not
leaf
:
o
=
self
.
object_store
[
sha
]
if
isinstance
(
o
,
Commit
):
self
.
add_todo
([(
o
.
tree
,
""
,
False
)])
elif
isinstance
(
o
,
Tree
):
self
.
add_todo
([(
s
,
n
,
not
stat
.
S_ISDIR
(
m
))
for
n
,
m
,
s
in
o
.
iteritems
()
if
not
S_ISGITLINK
(
m
)])
elif
isinstance
(
o
,
Tag
):
self
.
add_todo
([(
o
.
object
[
1
],
None
,
False
)])
if
sha
in
self
.
_tagged
:
self
.
add_todo
([(
self
.
_tagged
[
sha
],
None
,
True
)])
self
.
sha_done
.
add
(
sha
)
self
.
progress
(
"counting objects:
%d
\r
"
%
len
(
self
.
sha_done
))
return
(
sha
,
name
)
__next__
=
next
class
ObjectStoreGraphWalker
(
object
):
"""Graph walker that finds what commits are missing from an object store.
:ivar heads: Revisions without descendants in the local repo
:ivar get_parents: Function to retrieve parents in the local repo
"""
def
__init__
(
self
,
local_heads
,
get_parents
):
"""Create a new instance.
:param local_heads: Heads to start search with
:param get_parents: Function for finding the parents of a SHA1.
"""
self
.
heads
=
set
(
local_heads
)
self
.
get_parents
=
get_parents
self
.
parents
=
{}
def
ack
(
self
,
sha
):
"""Ack that a revision and its ancestors are present in the source."""
if
len
(
sha
)
!=
40
:
raise
ValueError
(
"unexpected sha
%r
received"
%
sha
)
ancestors
=
set
([
sha
])
# stop if we run out of heads to remove
while
self
.
heads
:
for
a
in
ancestors
:
if
a
in
self
.
heads
:
self
.
heads
.
remove
(
a
)
# collect all ancestors
new_ancestors
=
set
()
for
a
in
ancestors
:
ps
=
self
.
parents
.
get
(
a
)
if
ps
is
not
None
:
new_ancestors
.
update
(
ps
)
self
.
parents
[
a
]
=
None
# no more ancestors; stop
if
not
new_ancestors
:
break
ancestors
=
new_ancestors
def
next
(
self
):
"""Iterate over ancestors of heads in the target."""
if
self
.
heads
:
ret
=
self
.
heads
.
pop
()
ps
=
self
.
get_parents
(
ret
)
self
.
parents
[
ret
]
=
ps
self
.
heads
.
update
([
p
for
p
in
ps
if
not
p
in
self
.
parents
])
return
ret
return
None
__next__
=
next
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jun 4 2025, 6:49 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3398800
Attached To
rPPDW python3-dulwich packaging
Event Timeline
Log In to Comment