Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F8395314
protocol.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
protocol.py
View Options
# protocol.py -- Shared parts of the git protocols
# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@samba.org>
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
# or (at your option) any later version. You can redistribute it and/or
# modify it under the terms of either of these two licenses.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# You should have received a copy of the licenses; if not, see
# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
# License, Version 2.0.
#
"""Generic functions for talking the git smart server protocol."""
from
io
import
BytesIO
from
os
import
(
SEEK_END
,
)
import
socket
import
dulwich
from
dulwich.errors
import
(
HangupException
,
GitProtocolError
,
)
TCP_GIT_PORT
=
9418
ZERO_SHA
=
b
"0"
*
40
SINGLE_ACK
=
0
MULTI_ACK
=
1
MULTI_ACK_DETAILED
=
2
# pack data
SIDE_BAND_CHANNEL_DATA
=
1
# progress messages
SIDE_BAND_CHANNEL_PROGRESS
=
2
# fatal error message just before stream aborts
SIDE_BAND_CHANNEL_FATAL
=
3
CAPABILITY_DEEPEN_SINCE
=
b
'deepen-since'
CAPABILITY_DEEPEN_NOT
=
b
'deepen-not'
CAPABILITY_DEEPEN_RELATIVE
=
b
'deepen-relative'
CAPABILITY_DELETE_REFS
=
b
'delete-refs'
CAPABILITY_INCLUDE_TAG
=
b
'include-tag'
CAPABILITY_MULTI_ACK
=
b
'multi_ack'
CAPABILITY_MULTI_ACK_DETAILED
=
b
'multi_ack_detailed'
CAPABILITY_NO_DONE
=
b
'no-done'
CAPABILITY_NO_PROGRESS
=
b
'no-progress'
CAPABILITY_OFS_DELTA
=
b
'ofs-delta'
CAPABILITY_QUIET
=
b
'quiet'
CAPABILITY_REPORT_STATUS
=
b
'report-status'
CAPABILITY_SHALLOW
=
b
'shallow'
CAPABILITY_SIDE_BAND
=
b
'side-band'
CAPABILITY_SIDE_BAND_64K
=
b
'side-band-64k'
CAPABILITY_THIN_PACK
=
b
'thin-pack'
CAPABILITY_AGENT
=
b
'agent'
CAPABILITY_SYMREF
=
b
'symref'
# Magic ref that is used to attach capabilities to when
# there are no refs. Should always be ste to ZERO_SHA.
CAPABILITIES_REF
=
b
'capabilities^{}'
COMMON_CAPABILITIES
=
[
CAPABILITY_OFS_DELTA
,
CAPABILITY_SIDE_BAND
,
CAPABILITY_SIDE_BAND_64K
,
CAPABILITY_AGENT
,
CAPABILITY_NO_PROGRESS
]
KNOWN_UPLOAD_CAPABILITIES
=
set
(
COMMON_CAPABILITIES
+
[
CAPABILITY_THIN_PACK
,
CAPABILITY_MULTI_ACK
,
CAPABILITY_MULTI_ACK_DETAILED
,
CAPABILITY_INCLUDE_TAG
,
CAPABILITY_DEEPEN_SINCE
,
CAPABILITY_SYMREF
,
CAPABILITY_SHALLOW
,
CAPABILITY_DEEPEN_NOT
,
CAPABILITY_DEEPEN_RELATIVE
,
])
KNOWN_RECEIVE_CAPABILITIES
=
set
(
COMMON_CAPABILITIES
+
[
CAPABILITY_REPORT_STATUS
])
def
agent_string
():
return
(
'dulwich/
%d
.
%d
.
%d
'
%
dulwich
.
__version__
)
.
encode
(
'ascii'
)
def
capability_agent
():
return
CAPABILITY_AGENT
+
b
'='
+
agent_string
()
def
capability_symref
(
from_ref
,
to_ref
):
return
CAPABILITY_SYMREF
+
b
'='
+
from_ref
+
b
':'
+
to_ref
def
extract_capability_names
(
capabilities
):
return
set
(
parse_capability
(
c
)[
0
]
for
c
in
capabilities
)
def
parse_capability
(
capability
):
parts
=
capability
.
split
(
'='
,
1
)
if
len
(
parts
)
==
1
:
return
(
parts
[
0
],
None
)
return
tuple
(
parts
)
COMMAND_DEEPEN
=
b
'deepen'
COMMAND_SHALLOW
=
b
'shallow'
COMMAND_UNSHALLOW
=
b
'unshallow'
COMMAND_DONE
=
b
'done'
COMMAND_WANT
=
b
'want'
COMMAND_HAVE
=
b
'have'
class
ProtocolFile
(
object
):
"""A dummy file for network ops that expect file-like objects."""
def
__init__
(
self
,
read
,
write
):
self
.
read
=
read
self
.
write
=
write
def
tell
(
self
):
pass
def
close
(
self
):
pass
def
pkt_line
(
data
):
"""Wrap data in a pkt-line.
:param data: The data to wrap, as a str or None.
:return: The data prefixed with its length in pkt-line format; if data was
None, returns the flush-pkt ('0000').
"""
if
data
is
None
:
return
b
'0000'
return
(
'
%04x
'
%
(
len
(
data
)
+
4
))
.
encode
(
'ascii'
)
+
data
class
Protocol
(
object
):
"""Class for interacting with a remote git process over the wire.
Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
consists of the length of the line as a 4-byte hex string, followed by the
payload data. The length includes the 4-byte header. The special line
'0000' indicates the end of a section of input and is called a 'flush-pkt'.
For details on the pkt-line format, see the cgit distribution:
Documentation/technical/protocol-common.txt
"""
def
__init__
(
self
,
read
,
write
,
close
=
None
,
report_activity
=
None
):
self
.
read
=
read
self
.
write
=
write
self
.
_close
=
close
self
.
report_activity
=
report_activity
self
.
_readahead
=
None
def
close
(
self
):
if
self
.
_close
:
self
.
_close
()
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
close
()
def
read_pkt_line
(
self
):
"""Reads a pkt-line from the remote git process.
This method may read from the readahead buffer; see unread_pkt_line.
:return: The next string from the stream, without the length prefix, or
None for a flush-pkt ('0000').
"""
if
self
.
_readahead
is
None
:
read
=
self
.
read
else
:
read
=
self
.
_readahead
.
read
self
.
_readahead
=
None
try
:
sizestr
=
read
(
4
)
if
not
sizestr
:
raise
HangupException
()
size
=
int
(
sizestr
,
16
)
if
size
==
0
:
if
self
.
report_activity
:
self
.
report_activity
(
4
,
'read'
)
return
None
if
self
.
report_activity
:
self
.
report_activity
(
size
,
'read'
)
pkt_contents
=
read
(
size
-
4
)
except
socket
.
error
as
e
:
raise
GitProtocolError
(
e
)
else
:
if
len
(
pkt_contents
)
+
4
!=
size
:
raise
GitProtocolError
(
'Length of pkt read
%04x
does not match length prefix
%04x
'
%
(
len
(
pkt_contents
)
+
4
,
size
))
return
pkt_contents
def
eof
(
self
):
"""Test whether the protocol stream has reached EOF.
Note that this refers to the actual stream EOF and not just a
flush-pkt.
:return: True if the stream is at EOF, False otherwise.
"""
try
:
next_line
=
self
.
read_pkt_line
()
except
HangupException
:
return
True
self
.
unread_pkt_line
(
next_line
)
return
False
def
unread_pkt_line
(
self
,
data
):
"""Unread a single line of data into the readahead buffer.
This method can be used to unread a single pkt-line into a fixed
readahead buffer.
:param data: The data to unread, without the length prefix.
:raise ValueError: If more than one pkt-line is unread.
"""
if
self
.
_readahead
is
not
None
:
raise
ValueError
(
'Attempted to unread multiple pkt-lines.'
)
self
.
_readahead
=
BytesIO
(
pkt_line
(
data
))
def
read_pkt_seq
(
self
):
"""Read a sequence of pkt-lines from the remote git process.
:return: Yields each line of data up to but not including the next
flush-pkt.
"""
pkt
=
self
.
read_pkt_line
()
while
pkt
:
yield
pkt
pkt
=
self
.
read_pkt_line
()
def
write_pkt_line
(
self
,
line
):
"""Sends a pkt-line to the remote git process.
:param line: A string containing the data to send, without the length
prefix.
"""
try
:
line
=
pkt_line
(
line
)
self
.
write
(
line
)
if
self
.
report_activity
:
self
.
report_activity
(
len
(
line
),
'write'
)
except
socket
.
error
as
e
:
raise
GitProtocolError
(
e
)
def
write_file
(
self
):
"""Return a writable file-like object for this protocol."""
class
ProtocolFile
(
object
):
def
__init__
(
self
,
proto
):
self
.
_proto
=
proto
self
.
_offset
=
0
def
write
(
self
,
data
):
self
.
_proto
.
write
(
data
)
self
.
_offset
+=
len
(
data
)
def
tell
(
self
):
return
self
.
_offset
def
close
(
self
):
pass
return
ProtocolFile
(
self
)
def
write_sideband
(
self
,
channel
,
blob
):
"""Write multiplexed data to the sideband.
:param channel: An int specifying the channel to write to.
:param blob: A blob of data (as a string) to send on this channel.
"""
# a pktline can be a max of 65520. a sideband line can therefore be
# 65520-5 = 65515
# WTF: Why have the len in ASCII, but the channel in binary.
while
blob
:
self
.
write_pkt_line
(
bytes
(
bytearray
([
channel
]))
+
blob
[:
65515
])
blob
=
blob
[
65515
:]
def
send_cmd
(
self
,
cmd
,
*
args
):
"""Send a command and some arguments to a git server.
Only used for the TCP git protocol (git://).
:param cmd: The remote service to access.
:param args: List of arguments to send to remove service.
"""
self
.
write_pkt_line
(
cmd
+
b
" "
+
b
""
.
join
([(
a
+
b
"
\0
"
)
for
a
in
args
]))
def
read_cmd
(
self
):
"""Read a command and some arguments from the git client
Only used for the TCP git protocol (git://).
:return: A tuple of (command, [list of arguments]).
"""
line
=
self
.
read_pkt_line
()
splice_at
=
line
.
find
(
b
" "
)
cmd
,
args
=
line
[:
splice_at
],
line
[
splice_at
+
1
:]
assert
args
[
-
1
:]
==
b
"
\x00
"
return
cmd
,
args
[:
-
1
]
.
split
(
b
"
\0
"
)
_RBUFSIZE
=
8192
# Default read buffer size.
class
ReceivableProtocol
(
Protocol
):
"""Variant of Protocol that allows reading up to a size without blocking.
This class has a recv() method that behaves like socket.recv() in addition
to a read() method.
If you want to read n bytes from the wire and block until exactly n bytes
(or EOF) are read, use read(n). If you want to read at most n bytes from
the wire but don't care if you get less, use recv(n). Note that recv(n)
will still block until at least one byte is read.
"""
def
__init__
(
self
,
recv
,
write
,
report_activity
=
None
,
rbufsize
=
_RBUFSIZE
):
super
(
ReceivableProtocol
,
self
)
.
__init__
(
self
.
read
,
write
,
report_activity
)
self
.
_recv
=
recv
self
.
_rbuf
=
BytesIO
()
self
.
_rbufsize
=
rbufsize
def
read
(
self
,
size
):
# From _fileobj.read in socket.py in the Python 2.6.5 standard library,
# with the following modifications:
# - omit the size <= 0 branch
# - seek back to start rather than 0 in case some buffer has been
# consumed.
# - use SEEK_END instead of the magic number.
# Copyright (c) 2001-2010 Python Software Foundation; All Rights
# Reserved
# Licensed under the Python Software Foundation License.
# TODO: see if buffer is more efficient than cBytesIO.
assert
size
>
0
# Our use of BytesIO rather than lists of string objects returned by
# recv() minimizes memory usage and fragmentation that occurs when
# rbufsize is large compared to the typical return value of recv().
buf
=
self
.
_rbuf
start
=
buf
.
tell
()
buf
.
seek
(
0
,
SEEK_END
)
# buffer may have been partially consumed by recv()
buf_len
=
buf
.
tell
()
-
start
if
buf_len
>=
size
:
# Already have size bytes in our buffer? Extract and return.
buf
.
seek
(
start
)
rv
=
buf
.
read
(
size
)
self
.
_rbuf
=
BytesIO
()
self
.
_rbuf
.
write
(
buf
.
read
())
self
.
_rbuf
.
seek
(
0
)
return
rv
self
.
_rbuf
=
BytesIO
()
# reset _rbuf. we consume it via buf.
while
True
:
left
=
size
-
buf_len
# recv() will malloc the amount of memory given as its
# parameter even though it often returns much less data
# than that. The returned data string is short lived
# as we copy it into a BytesIO and free it. This avoids
# fragmentation issues on many platforms.
data
=
self
.
_recv
(
left
)
if
not
data
:
break
n
=
len
(
data
)
if
n
==
size
and
not
buf_len
:
# Shortcut. Avoid buffer data copies when:
# - We have no data in our buffer.
# AND
# - Our call to recv returned exactly the
# number of bytes we were asked to read.
return
data
if
n
==
left
:
buf
.
write
(
data
)
del
data
# explicit free
break
assert
n
<=
left
,
"_recv(
%d
) returned
%d
bytes"
%
(
left
,
n
)
buf
.
write
(
data
)
buf_len
+=
n
del
data
# explicit free
# assert buf_len == buf.tell()
buf
.
seek
(
start
)
return
buf
.
read
()
def
recv
(
self
,
size
):
assert
size
>
0
buf
=
self
.
_rbuf
start
=
buf
.
tell
()
buf
.
seek
(
0
,
SEEK_END
)
buf_len
=
buf
.
tell
()
buf
.
seek
(
start
)
left
=
buf_len
-
start
if
not
left
:
# only read from the wire if our read buffer is exhausted
data
=
self
.
_recv
(
self
.
_rbufsize
)
if
len
(
data
)
==
size
:
# shortcut: skip the buffer if we read exactly size bytes
return
data
buf
=
BytesIO
()
buf
.
write
(
data
)
buf
.
seek
(
0
)
del
data
# explicit free
self
.
_rbuf
=
buf
return
buf
.
read
(
size
)
def
extract_capabilities
(
text
):
"""Extract a capabilities list from a string, if present.
:param text: String to extract from
:return: Tuple with text with capabilities removed and list of capabilities
"""
if
b
"
\0
"
not
in
text
:
return
text
,
[]
text
,
capabilities
=
text
.
rstrip
()
.
split
(
b
"
\0
"
)
return
(
text
,
capabilities
.
strip
()
.
split
(
b
" "
))
def
extract_want_line_capabilities
(
text
):
"""Extract a capabilities list from a want line, if present.
Note that want lines have capabilities separated from the rest of the line
by a space instead of a null byte. Thus want lines have the form:
want obj-id cap1 cap2 ...
:param text: Want line to extract from
:return: Tuple with text with capabilities removed and list of capabilities
"""
split_text
=
text
.
rstrip
()
.
split
(
b
" "
)
if
len
(
split_text
)
<
3
:
return
text
,
[]
return
(
b
" "
.
join
(
split_text
[:
2
]),
split_text
[
2
:])
def
ack_type
(
capabilities
):
"""Extract the ack type from a capabilities list."""
if
b
'multi_ack_detailed'
in
capabilities
:
return
MULTI_ACK_DETAILED
elif
b
'multi_ack'
in
capabilities
:
return
MULTI_ACK
return
SINGLE_ACK
class
BufferedPktLineWriter
(
object
):
"""Writer that wraps its data in pkt-lines and has an independent buffer.
Consecutive calls to write() wrap the data in a pkt-line and then buffers
it until enough lines have been written such that their total length
(including length prefix) reach the buffer size.
"""
def
__init__
(
self
,
write
,
bufsize
=
65515
):
"""Initialize the BufferedPktLineWriter.
:param write: A write callback for the underlying writer.
:param bufsize: The internal buffer size, including length prefixes.
"""
self
.
_write
=
write
self
.
_bufsize
=
bufsize
self
.
_wbuf
=
BytesIO
()
self
.
_buflen
=
0
def
write
(
self
,
data
):
"""Write data, wrapping it in a pkt-line."""
line
=
pkt_line
(
data
)
line_len
=
len
(
line
)
over
=
self
.
_buflen
+
line_len
-
self
.
_bufsize
if
over
>=
0
:
start
=
line_len
-
over
self
.
_wbuf
.
write
(
line
[:
start
])
self
.
flush
()
else
:
start
=
0
saved
=
line
[
start
:]
self
.
_wbuf
.
write
(
saved
)
self
.
_buflen
+=
len
(
saved
)
def
flush
(
self
):
"""Flush all data from the buffer."""
data
=
self
.
_wbuf
.
getvalue
()
if
data
:
self
.
_write
(
data
)
self
.
_len
=
0
self
.
_wbuf
=
BytesIO
()
class
PktLineParser
(
object
):
"""Packet line parser that hands completed packets off to a callback.
"""
def
__init__
(
self
,
handle_pkt
):
self
.
handle_pkt
=
handle_pkt
self
.
_readahead
=
BytesIO
()
def
parse
(
self
,
data
):
"""Parse a fragment of data and call back for any completed packets.
"""
self
.
_readahead
.
write
(
data
)
buf
=
self
.
_readahead
.
getvalue
()
if
len
(
buf
)
<
4
:
return
while
len
(
buf
)
>=
4
:
size
=
int
(
buf
[:
4
],
16
)
if
size
==
0
:
self
.
handle_pkt
(
None
)
buf
=
buf
[
4
:]
elif
size
<=
len
(
buf
):
self
.
handle_pkt
(
buf
[
4
:
size
])
buf
=
buf
[
size
:]
else
:
break
self
.
_readahead
=
BytesIO
()
self
.
_readahead
.
write
(
buf
)
def
get_tail
(
self
):
"""Read back any unused data."""
return
self
.
_readahead
.
getvalue
()
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Jun 4 2025, 7:37 PM (10 w, 6 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3399273
Attached To
rPPDW python3-dulwich packaging
Event Timeline
Log In to Comment