diff --git a/swh/loader/mercurial/chunked_reader.py b/swh/loader/mercurial/chunked_reader.py --- a/swh/loader/mercurial/chunked_reader.py +++ b/swh/loader/mercurial/chunked_reader.py @@ -29,8 +29,7 @@ self._file.seek(self._offset, 0) # seek back to original position def _chunk_size(self, first_time=False): - """Unpack the next bytes from the - file to get the next file chunk size. + """Unpack the next bytes from the file to get the next file chunk size. """ size = struct.unpack(self._size_pattern, self._file.read(self._size_bytes))[0] diff --git a/swh/loader/mercurial/objects.py b/swh/loader/mercurial/objects.py --- a/swh/loader/mercurial/objects.py +++ b/swh/loader/mercurial/objects.py @@ -255,14 +255,17 @@ if the primary RAM-based storage area is filled to the designated capacity. Storage is occupied in three phases: + 1) The most recent key/value pair is always held, regardless of other - factors, until the next entry replaces it. + factors, until the next entry replaces it. + 2) Stored key/value pairs are pushed into a randomly accessible - expanding buffer in memory with a stored size function, maximum size - value, and special hinting about which keys to store for how long - optionally declared at instantiation. + expanding buffer in memory with a stored size function, maximum size + value, and special hinting about which keys to store for how long + optionally declared at instantiation. + 3) The in-memory buffer pickles into a randomly accessible disk-backed - secondary buffer when it becomes full. + secondary buffer when it becomes full. Occupied space is calculated by default as whatever the len() function returns on the values being stored. This can be changed by passing in a new @@ -278,14 +281,15 @@ def __init__(self, max_size=None, cache_hints=None, size_function=None, filename=None): - """args: - max_size: integer value indicating the maximum size of the part - of storage held in memory - cache_hints: dict of key/int pairs as described in the class - description - size_function: callback function that accepts one parameter and - returns one int, which should probably be the - calculated size of the parameter + """ + args: + max_size: integer value indicating the maximum size of the part + of storage held in memory + cache_hints: dict of key/int pairs as described in the class + description + size_function: callback function that accepts one parameter and + returns one int, which should probably be the calculated + size of the parameter """ self._max_size = max_size or SelectiveCache.DEFAULT_SIZE self._disk = None