Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/archiver/director.py
Show First 20 Lines • Show All 70 Lines • ▼ Show 20 Lines | class ArchiverDirector(config.SWHConfig): | ||||
def get_unarchived_content_batch(self): | def get_unarchived_content_batch(self): | ||||
""" Create batch of contents that needs to be archived | """ Create batch of contents that needs to be archived | ||||
Yields: | Yields: | ||||
batch of sha1 that corresponds to contents that needs more archive | batch of sha1 that corresponds to contents that needs more archive | ||||
copies. | copies. | ||||
""" | """ | ||||
contents = [] | contents = [] | ||||
for content in self._get_unarchived_content(): | for content in self._get_unarchived_content_id(): | ||||
contents.append(content) | contents.append(content) | ||||
if len(contents) > self.config['batch_max_size']: | if len(contents) > self.config['batch_max_size']: | ||||
yield contents | yield contents | ||||
contents = [] | contents = [] | ||||
if len(contents) > 0: | if len(contents) > 0: | ||||
yield contents | yield contents | ||||
def _get_unarchived_content(self): | def _get_unarchived_content_id(self): | ||||
zack: there seems to be a couple of naming inconsistencies in this function:
# the plural, which is… | |||||
""" Get all the content ids in the db that needs more copies | |||||
Yields: | |||||
sha1 of contents that needs to be archived. | |||||
""" | |||||
for content_id, present, _ongoing in self._get_all_contents(): | |||||
if len(present) < self.config['retention_policy']: | |||||
yield content_id | |||||
else: | |||||
continue | |||||
def _get_all_contents(self): | |||||
""" Get batchs from the archiver db and yield it as continous stream | """ Get batchs from the archiver db and yield it as continous stream | ||||
Content returned are those that need to have more copies. | |||||
Yields: | Yields: | ||||
Datas about a content as a tuple | Datas about a content as a tuple | ||||
(content_id, present_copies, ongoing_copies) where ongoing_copies | (content_id, present_copies, ongoing_copies) where ongoing_copies | ||||
is a dict mapping copy to mtime. | is a dict mapping copy to mtime. | ||||
""" | """ | ||||
last_object = b'' | last_object = b'' | ||||
while True: | while True: | ||||
archiver_contents = list( | archiver_contents = list( | ||||
self.archiver_storage.content_archive_get_copies(last_object) | self.archiver_storage.content_archive_get_unarchived_copies( | ||||
last_content=last_object, | |||||
retention_policy=self.config['retention_policy'] | |||||
) | |||||
) | ) | ||||
if not archiver_contents: | if not archiver_contents: | ||||
return | return | ||||
for content in archiver_contents: | for content_id, presents, oingoings in archiver_contents: | ||||
last_object = content[0] | last_object = content_id | ||||
yield content | yield content_id | ||||
def launch(): | def launch(): | ||||
archiver = ArchiverDirector() | archiver = ArchiverDirector() | ||||
archiver.run() | archiver.run() | ||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
launch() | launch() |
there seems to be a couple of naming inconsistencies in this function:
so perhaps it should be renamed to _get_unarchived_content_id (?)
Note: I haven't checked the complete version of the file, so it might be in need of a more general swipe of naming uniformation.