Changeset View
Changeset View
Standalone View
Standalone View
swh/objstorage/objstorage_pathslicing.py
Show First 20 Lines • Show All 125 Lines • ▼ Show 20 Lines | def __init__(self, root, slicing): | ||||
max_endchar = max(map(lambda bound: bound.stop, self.bounds)) | max_endchar = max(map(lambda bound: bound.stop, self.bounds)) | ||||
if ID_HASH_LENGTH < max_endchar: | if ID_HASH_LENGTH < max_endchar: | ||||
raise ValueError( | raise ValueError( | ||||
'Algorithm %s has too short hash for slicing to char %d' | 'Algorithm %s has too short hash for slicing to char %d' | ||||
% (ID_HASH_ALGO, max_endchar) | % (ID_HASH_ALGO, max_endchar) | ||||
) | ) | ||||
def __contains__(self, obj_id): | def __contains__(self, obj_id): | ||||
""" Check whether the given object is present in the storage or not. | """ Indicates if the given object is present in the storage | ||||
Returns: | See base class [ObjStorage]. | ||||
True iff the object is present in the storage. | |||||
""" | """ | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
return os.path.exists(self._obj_path(hex_obj_id)) | return os.path.exists(self._obj_path(hex_obj_id)) | ||||
def __iter__(self): | def __iter__(self): | ||||
"""iterate over the object identifiers currently available in the storage | """iterate over the object identifiers currently available in the storage | ||||
Warning: with the current implementation of the object storage, this | Warning: with the current implementation of the object storage, this | ||||
Show All 16 Lines | class PathSlicingObjStorage(ObjStorage): | ||||
def __len__(self): | def __len__(self): | ||||
"""compute the number of objects available in the storage | """compute the number of objects available in the storage | ||||
Warning: this currently uses `__iter__`, its warning about bad | Warning: this currently uses `__iter__`, its warning about bad | ||||
performances applies | performances applies | ||||
Return: | Return: | ||||
number of objects contained in the storage | number of objects contained in the storage | ||||
""" | """ | ||||
return sum(1 for i in self) | return sum(1 for i in self) | ||||
def _obj_dir(self, hex_obj_id): | def _obj_dir(self, hex_obj_id): | ||||
""" Compute the storage directory of an object. | """ Compute the storage directory of an object. | ||||
See also: PathSlicingObjStorage::_obj_path | See also: PathSlicingObjStorage::_obj_path | ||||
Show All 15 Lines | def _obj_path(self, hex_obj_id): | ||||
hex_obj_id: object id as hexlified string. | hex_obj_id: object id as hexlified string. | ||||
Returns: | Returns: | ||||
Path to the actual object corresponding to the given id. | Path to the actual object corresponding to the given id. | ||||
""" | """ | ||||
return os.path.join(self._obj_dir(hex_obj_id), hex_obj_id) | return os.path.join(self._obj_dir(hex_obj_id), hex_obj_id) | ||||
def add(self, bytes, obj_id=None, check_presence=True): | def add(self, bytes, obj_id=None, check_presence=True): | ||||
""" Add a new object to the object storage. | """ Add a new object to the current object storage. | ||||
Args: | |||||
bytes: content of the object to be added to the storage. | |||||
obj_id: checksum of [bytes] using [ID_HASH_ALGO] algorithm. When | |||||
given, obj_id will be trusted to match the bytes. If missing, | |||||
obj_id will be computed on the fly. | |||||
check_presence: indicate if the presence of the content should be | |||||
verified before adding the file. | |||||
Returns: | See base class [ObjStorage]. | ||||
zack: Same comment about these docstrings that I made in D94. //If// they add nothing wrt parent… | |||||
the id of the object into the storage. | |||||
""" | """ | ||||
if obj_id is None: | if obj_id is None: | ||||
# Checksum is missing, compute it on the fly. | # Checksum is missing, compute it on the fly. | ||||
h = hashutil._new_hash(ID_HASH_ALGO, len(bytes)) | h = hashutil._new_hash(ID_HASH_ALGO, len(bytes)) | ||||
h.update(bytes) | h.update(bytes) | ||||
obj_id = h.digest() | obj_id = h.digest() | ||||
if check_presence and obj_id in self: | if check_presence and obj_id in self: | ||||
# If the object is already present, return immediatly. | # If the object is already present, return immediatly. | ||||
return obj_id | return obj_id | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
with _write_obj_file(hex_obj_id, self) as f: | with _write_obj_file(hex_obj_id, self) as f: | ||||
f.write(bytes) | f.write(bytes) | ||||
return obj_id | return obj_id | ||||
def restore(self, bytes, obj_id=None): | |||||
""" Restore a content that have been corrupted. | |||||
This function is identical to add_bytes but does not check if | |||||
the object id is already in the file system. | |||||
Args: | |||||
bytes: content of the object to be added to the storage | |||||
obj_id: checksums of `bytes` as computed by ID_HASH_ALGO. When | |||||
given, obj_id will be trusted to match bytes. If missing, | |||||
obj_id will be computed on the fly. | |||||
""" | |||||
return self.add(bytes, obj_id, check_presence=False) | |||||
def get(self, obj_id): | def get(self, obj_id): | ||||
""" Retrieve the content of a given object. | """ Retrieve the content of a given object. | ||||
Args: | See base class [ObjStorage]. | ||||
obj_id: object id. | |||||
Returns: | |||||
the content of the requested object as bytes. | |||||
Raises: | |||||
ObjNotFoundError: if the requested object is missing. | |||||
""" | """ | ||||
if obj_id not in self: | if obj_id not in self: | ||||
raise ObjNotFoundError(obj_id) | raise ObjNotFoundError(obj_id) | ||||
# Open the file and return its content as bytes | # Open the file and return its content as bytes | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
with _read_obj_file(hex_obj_id, self) as f: | with _read_obj_file(hex_obj_id, self) as f: | ||||
return f.read() | return f.read() | ||||
def check(self, obj_id): | def check(self, obj_id): | ||||
""" Perform an integrity check for a given object. | """ Perform an integrity check for a given object. | ||||
Verify that the file object is in place and that the gziped content | See base class [ObjStorage]. | ||||
matches the object id. | |||||
Args: | |||||
obj_id: object id. | |||||
Raises: | |||||
ObjNotFoundError: if the requested object is missing. | |||||
Error: if the request object is corrupted. | |||||
""" | """ | ||||
if obj_id not in self: | if obj_id not in self: | ||||
raise ObjNotFoundError(obj_id) | raise ObjNotFoundError(obj_id) | ||||
hex_obj_id = hashutil.hash_to_hex(obj_id) | hex_obj_id = hashutil.hash_to_hex(obj_id) | ||||
try: | try: | ||||
with gzip.open(self._obj_path(hex_obj_id)) as f: | with gzip.open(self._obj_path(hex_obj_id)) as f: | ||||
Show All 21 Lines | def check(self, obj_id): | ||||
) | ) | ||||
except (OSError, IOError): | except (OSError, IOError): | ||||
# IOError is for compatibility with older python versions | # IOError is for compatibility with older python versions | ||||
raise Error('Corrupt object %s is not a gzip file' % obj_id) | raise Error('Corrupt object %s is not a gzip file' % obj_id) | ||||
def get_random(self, batch_size): | def get_random(self, batch_size): | ||||
""" Get random ids of existing contents | """ Get random ids of existing contents | ||||
This method is used in order to get random ids to perform | See base class [ObjStorage]. | ||||
content integrity verifications on random contents. | |||||
Attributes: | |||||
batch_size (int): Number of ids that will be given | |||||
Yields: | |||||
An iterable of ids of contents that are in the current object | |||||
storage. | |||||
""" | """ | ||||
def get_random_content(self, batch_size): | def get_random_content(self, batch_size): | ||||
""" Get a batch of content inside a single directory. | """ Get a batch of content inside a single directory. | ||||
Returns: | Returns: | ||||
a tuple (batch size, batch). | a tuple (batch size, batch). | ||||
""" | """ | ||||
dirs = [] | dirs = [] | ||||
Show All 17 Lines |
Same comment about these docstrings that I made in D94. If they add nothing wrt parent class, please remove them.