Changeset View
Changeset View
Standalone View
Standalone View
swh/archiver/storage.py
Show All 30 Lines | def __init__(self, dbconn): | ||||
else: | else: | ||||
self._db = ArchiverDb.connect(dbconn) | self._db = ArchiverDb.connect(dbconn) | ||||
except psycopg2.OperationalError as e: | except psycopg2.OperationalError as e: | ||||
raise StorageDBError(e) | raise StorageDBError(e) | ||||
def get_db(self): | def get_db(self): | ||||
return self._db | return self._db | ||||
@db_transaction_generator | @db_transaction_generator() | ||||
def archive_ls(self, db=None, cur=None): | def archive_ls(self, db=None, cur=None): | ||||
""" Get all the archives registered on the server. | """ Get all the archives registered on the server. | ||||
Yields: | Yields: | ||||
a tuple (server_id, server_url) for each archive server. | a tuple (server_id, server_url) for each archive server. | ||||
""" | """ | ||||
yield from db.archive_ls(cur) | yield from db.archive_ls(cur) | ||||
@db_transaction | @db_transaction() | ||||
def content_archive_get(self, content_id, db=None, cur=None): | def content_archive_get(self, content_id, db=None, cur=None): | ||||
""" Get the archival status of a content. | """ Get the archival status of a content. | ||||
Retrieve from the database the archival status of the given content | Retrieve from the database the archival status of the given content | ||||
Args: | Args: | ||||
content_id: the sha1 of the content | content_id: the sha1 of the content | ||||
Yields: | Yields: | ||||
A tuple (content_id, present_copies, ongoing_copies), where | A tuple (content_id, present_copies, ongoing_copies), where | ||||
ongoing_copies is a dict mapping copy to mtime. | ongoing_copies is a dict mapping copy to mtime. | ||||
""" | """ | ||||
return db.content_archive_get(content_id, cur) | return db.content_archive_get(content_id, cur) | ||||
@db_transaction_generator | @db_transaction_generator() | ||||
def content_archive_get_copies(self, last_content=None, limit=1000, | def content_archive_get_copies(self, last_content=None, limit=1000, | ||||
db=None, cur=None): | db=None, cur=None): | ||||
""" Get the list of copies for `limit` contents starting after | """ Get the list of copies for `limit` contents starting after | ||||
`last_content`. | `last_content`. | ||||
Args: | Args: | ||||
last_content: sha1 of the last content retrieved. May be None | last_content: sha1 of the last content retrieved. May be None | ||||
to start at the beginning. | to start at the beginning. | ||||
limit: number of contents to retrieve. Can be None to retrieve all | limit: number of contents to retrieve. Can be None to retrieve all | ||||
objects (will be slow). | objects (will be slow). | ||||
Yields: | Yields: | ||||
A tuple (content_id, present_copies, ongoing_copies), where | A tuple (content_id, present_copies, ongoing_copies), where | ||||
ongoing_copies is a dict mapping copy to mtime. | ongoing_copies is a dict mapping copy to mtime. | ||||
""" | """ | ||||
yield from db.content_archive_get_copies(last_content, limit, cur) | yield from db.content_archive_get_copies(last_content, limit, cur) | ||||
@db_transaction_generator | @db_transaction_generator() | ||||
def content_archive_get_unarchived_copies( | def content_archive_get_unarchived_copies( | ||||
self, retention_policy, last_content=None, | self, retention_policy, last_content=None, | ||||
limit=1000, db=None, cur=None): | limit=1000, db=None, cur=None): | ||||
""" Get the list of copies for `limit` contents starting after | """ Get the list of copies for `limit` contents starting after | ||||
`last_content`. Yields only copies with number of present | `last_content`. Yields only copies with number of present | ||||
smaller than `retention policy`. | smaller than `retention policy`. | ||||
Args: | Args: | ||||
last_content: sha1 of the last content retrieved. May be None | last_content: sha1 of the last content retrieved. May be None | ||||
to start at the beginning. | to start at the beginning. | ||||
retention_policy: number of required present copies | retention_policy: number of required present copies | ||||
limit: number of contents to retrieve. Can be None to retrieve all | limit: number of contents to retrieve. Can be None to retrieve all | ||||
objects (will be slow). | objects (will be slow). | ||||
Yields: | Yields: | ||||
A tuple (content_id, present_copies, ongoing_copies), where | A tuple (content_id, present_copies, ongoing_copies), where | ||||
ongoing_copies is a dict mapping copy to mtime. | ongoing_copies is a dict mapping copy to mtime. | ||||
""" | """ | ||||
yield from db.content_archive_get_unarchived_copies( | yield from db.content_archive_get_unarchived_copies( | ||||
retention_policy, last_content, limit, cur) | retention_policy, last_content, limit, cur) | ||||
@db_transaction_generator | @db_transaction_generator() | ||||
def content_archive_get_missing(self, content_ids, backend_name, db=None, | def content_archive_get_missing(self, content_ids, backend_name, db=None, | ||||
cur=None): | cur=None): | ||||
"""Retrieve missing sha1s from source_name. | """Retrieve missing sha1s from source_name. | ||||
Args: | Args: | ||||
content_ids ([sha1s]): list of sha1s to test | content_ids ([sha1s]): list of sha1s to test | ||||
source_name (str): Name of the backend to check for content | source_name (str): Name of the backend to check for content | ||||
Yields: | Yields: | ||||
missing sha1s from backend_name | missing sha1s from backend_name | ||||
""" | """ | ||||
db.mktemp_content_archive(cur) | db.mktemp_content_archive(cur) | ||||
db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur) | db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur) | ||||
for content_id in db.content_archive_get_missing(backend_name, cur): | for content_id in db.content_archive_get_missing(backend_name, cur): | ||||
yield content_id[0] | yield content_id[0] | ||||
@db_transaction_generator | @db_transaction_generator() | ||||
def content_archive_get_unknown(self, content_ids, db=None, cur=None): | def content_archive_get_unknown(self, content_ids, db=None, cur=None): | ||||
"""Retrieve unknown sha1s from content_archive. | """Retrieve unknown sha1s from content_archive. | ||||
Args: | Args: | ||||
content_ids ([sha1s]): list of sha1s to test | content_ids ([sha1s]): list of sha1s to test | ||||
Yields: | Yields: | ||||
Unknown sha1s from content_archive | Unknown sha1s from content_archive | ||||
""" | """ | ||||
db.mktemp_content_archive(cur) | db.mktemp_content_archive(cur) | ||||
db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur) | db.copy_to(content_ids, 'tmp_content_archive', ['content_id'], cur) | ||||
for content_id in db.content_archive_get_unknown(cur): | for content_id in db.content_archive_get_unknown(cur): | ||||
yield content_id[0] | yield content_id[0] | ||||
@db_transaction | @db_transaction() | ||||
def content_archive_update(self, content_id, archive_id, | def content_archive_update(self, content_id, archive_id, | ||||
new_status=None, db=None, cur=None): | new_status=None, db=None, cur=None): | ||||
""" Update the status of an archive content and set its mtime to now | """ Update the status of an archive content and set its mtime to now | ||||
Change the mtime of an archived content for the given archive and set | Change the mtime of an archived content for the given archive and set | ||||
it's mtime to the current time. | it's mtime to the current time. | ||||
Args: | Args: | ||||
content_id (str): content sha1 | content_id (str): content sha1 | ||||
archive_id (str): name of the archive | archive_id (str): name of the archive | ||||
new_status (str): one of 'missing', 'present' or 'ongoing'. | new_status (str): one of 'missing', 'present' or 'ongoing'. | ||||
this status will replace the previous one. If not given, | this status will replace the previous one. If not given, | ||||
the function only change the mtime of the content for the | the function only change the mtime of the content for the | ||||
given archive. | given archive. | ||||
""" | """ | ||||
db.content_archive_update(content_id, archive_id, new_status, cur) | db.content_archive_update(content_id, archive_id, new_status, cur) | ||||
@db_transaction | @db_transaction() | ||||
def content_archive_add( | def content_archive_add( | ||||
self, content_ids, sources_present, db=None, cur=None): | self, content_ids, sources_present, db=None, cur=None): | ||||
"""Insert a new entry in db about content_id. | """Insert a new entry in db about content_id. | ||||
Args: | Args: | ||||
content_ids ([bytes|str]): content identifiers | content_ids ([bytes|str]): content identifiers | ||||
sources_present ([str]): List of source names where | sources_present ([str]): List of source names where | ||||
contents are present | contents are present | ||||
▲ Show 20 Lines • Show All 189 Lines • Show Last 20 Lines |