diff --git a/sql/swh-vault-schema.sql b/sql/swh-vault-schema.sql --- a/sql/swh-vault-schema.sql +++ b/sql/swh-vault-schema.sql @@ -13,7 +13,7 @@ create type cook_type as enum ('directory', 'revision_gitfast'); comment on type cook_type is 'Type of the requested bundle'; -create type cook_status as enum ('new', 'pending', 'done'); +create type cook_status as enum ('new', 'pending', 'done', 'failed'); comment on type cook_status is 'Status of the cooking'; create table vault_bundle ( diff --git a/swh/vault/backend.py b/swh/vault/backend.py --- a/swh/vault/backend.py +++ b/swh/vault/backend.py @@ -21,8 +21,10 @@ NOTIF_EMAIL_FROM = ('"Software Heritage Vault" ' '') -NOTIF_EMAIL_SUBJECT = ("Bundle ready: {obj_type} {short_id}") -NOTIF_EMAIL_BODY = """ +NOTIF_EMAIL_SUBJECT_SUCCESS = ("Bundle ready: {obj_type} {short_id}") +NOTIF_EMAIL_SUBJECT_FAILURE = ("Bundle failed: {obj_type} {short_id}") + +NOTIF_EMAIL_BODY_SUCCESS = """ You have requested the following bundle from the Software Heritage Vault: @@ -40,6 +42,23 @@ The Software Heritage Developers """ +NOTIF_EMAIL_BODY_FAILURE = """ +You have requested the following bundle from the Software Heritage +Vault: + +Object Type: {obj_type} +Object ID: {hex_id} + +This bundle could not be cooked for the following reason: + +{progress_msg} + +Please contact us for more information about this problem. + +--\x20 +The Software Heritage Developers +""" + class NotFoundExc(Exception): """Bundle was not found.""" @@ -85,8 +104,9 @@ self.db = None self.reconnect() self.smtp_server = smtplib.SMTP('localhost', 25) - self.scheduler = SchedulerBackend( - scheduling_db=self.config['scheduling_db']) + if self.config['scheduling_db'] is not None: + self.scheduler = SchedulerBackend( + scheduling_db=self.config['scheduling_db']) def reconnect(self): """Reconnect to the database.""" @@ -188,13 +208,27 @@ """Main entry point for cooking requests. This starts a cooking task if needed, and add the given e-mail to the notify list""" info = self.task_info(obj_type, obj_id) + + # If there's a failed bundle entry, delete it first. + if info is not None and info['task_status'] == 'failed': + cursor.execute('''DELETE FROM vault_bundle + WHERE obj_type = %s AND obj_id = %s''', + (obj_type, obj_id)) + self.commit() + info = None + + # If there's no bundle entry, create the task. if info is None: self.create_task(obj_type, obj_id, sticky) + if email is not None: + # If the task is already done, send the email directly if info is not None and info['task_status'] == 'done': self.send_notification(None, email, obj_type, obj_id) + # Else, add it to the notification queue else: self.add_notif_email(obj_type, obj_id, email) + info = self.task_info(obj_type, obj_id) return info @@ -250,16 +284,19 @@ """Send all the e-mails in the notification list of a bundle""" obj_id = hashutil.hash_to_bytes(obj_id) cursor.execute(''' - SELECT vault_notif_email.id AS id, email + SELECT vault_notif_email.id AS id, email, task_status, progress_msg FROM vault_notif_email INNER JOIN vault_bundle ON bundle_id = vault_bundle.id WHERE vault_bundle.type = %s AND vault_bundle.object_id = %s''', (obj_type, obj_id)) for d in cursor: - self.send_notification(d['id'], d['email'], obj_type, obj_id) + self.send_notification(d['id'], d['email'], obj_type, obj_id, + status=d['task_status'], + progress_msg=d['progress_msg']) @autocommit - def send_notification(self, n_id, email, obj_type, obj_id, cursor=None): + def send_notification(self, n_id, email, obj_type, obj_id, + status=None, progress_msg=None, cursor=None): """Send the notification of a bundle to a specific e-mail""" hex_id = hashutil.hash_to_hex(obj_id) short_id = hex_id[:7] @@ -273,11 +310,20 @@ url = ('https://archive.softwareheritage.org/api/1/vault/{}/{}/' 'raw'.format(obj_type, hex_id)) - text = NOTIF_EMAIL_BODY.strip() - text = text.format(obj_type=obj_type, hex_id=hex_id, url=url) - msg = MIMEText(text) - msg['Subject'] = (NOTIF_EMAIL_SUBJECT - .format(obj_type=obj_type, short_id=short_id)) + if status == 'done': + text = NOTIF_EMAIL_BODY_SUCCESS.strip() + text = text.format(obj_type=obj_type, hex_id=hex_id, url=url) + msg = MIMEText(text) + msg['Subject'] = (NOTIF_EMAIL_SUBJECT_SUCCESS + .format(obj_type=obj_type, short_id=short_id)) + else: + text = NOTIF_EMAIL_BODY_FAILURE.strip() + text = text.format(obj_type=obj_type, hex_id=hex_id, + progress_msg=progress_msg) + msg = MIMEText(text) + msg['Subject'] = (NOTIF_EMAIL_SUBJECT_FAILURE + .format(obj_type=obj_type, short_id=short_id)) + msg['From'] = NOTIF_EMAIL_FROM msg['To'] = email diff --git a/swh/vault/cookers/base.py b/swh/vault/cookers/base.py --- a/swh/vault/cookers/base.py +++ b/swh/vault/cookers/base.py @@ -85,12 +85,17 @@ content_iter = self.prepare_bundle() # TODO: use proper content streaming - bundle = b''.join(content_iter) - self.backend.put_bundle(self.CACHE_TYPE_KEY, self.obj_id, bundle) - - self.backend.set_status(self.obj_type, self.obj_id, 'done') - self.backend.set_progress(self.obj_type, self.obj_id, None) - self.backend.send_notif(self.obj_type, self.obj_id) + try: + bundle = b''.join(content_iter) + except Exception as e: + self.backend.set_status(self.obj_type, self.obj_id, 'failed') + self.backend.set_progress(self.obj_type, self.obj_id, e.message) + else: + self.backend.put_bundle(self.CACHE_TYPE_KEY, self.obj_id, bundle) + self.backend.set_status(self.obj_type, self.obj_id, 'done') + self.backend.set_progress(self.obj_type, self.obj_id, None) + finally: + self.backend.send_notif(self.obj_type, self.obj_id) SKIPPED_MESSAGE = (b'This content has not been retrieved in the ' diff --git a/swh/vault/tests/test_backend.py b/swh/vault/tests/test_backend.py --- a/swh/vault/tests/test_backend.py +++ b/swh/vault/tests/test_backend.py @@ -46,6 +46,12 @@ self.vault_backend.set_status(obj_type, obj_id, 'done') return obj_id, content + def fail_cook(self, obj_type, obj_id, failure_reason): + with self.mock_cooking(): + self.vault_backend.create_task(obj_type, obj_id) + self.vault_backend.set_status(obj_type, obj_id, 'failed') + self.vault_backend.set_progress(obj_type, obj_id, failure_reason) + TEST_TYPE = 'revision_gitfast' TEST_HEX_ID = '4a4b9771542143cf070386f86b4b92d42966bdbc' @@ -282,3 +288,34 @@ for i in r: self.assertEqual(self.vault_backend.is_available( TEST_TYPE, inserted[i][0]), i in should_be_still_here) + + def test_fail_cook_simple(self): + self.fail_cook(TEST_TYPE, TEST_OBJ_ID, 'error42') + self.assertFalse(self.vault_backend.is_available(TEST_TYPE, + TEST_OBJ_ID)) + info = self.vault_backend.task_info(TEST_TYPE, TEST_OBJ_ID) + self.assertEqual(info['progress_msg'], 'error42') + + def test_send_failure_email(self): + with self.mock_cooking(): + self.vault_backend.cook_request(TEST_TYPE, TEST_OBJ_ID, + email='a@example.com') + + self.vault_backend.set_status(TEST_TYPE, TEST_OBJ_ID, 'failed') + self.vault_backend.set_progress(TEST_TYPE, TEST_OBJ_ID, 'test error') + + with patch.object(self.vault_backend, 'smtp_server') as m: + self.vault_backend.send_all_notifications(TEST_TYPE, TEST_OBJ_ID) + + e = [k[0][0] for k in m.send_message.call_args_list][0] + print(e) + self.assertEqual(e['To'], 'a@example.com') + + self.assertIn('info@softwareheritage.org', e['From']) + self.assertIn(TEST_TYPE, e['Subject']) + self.assertIn(TEST_HEX_ID[:5], e['Subject']) + self.assertIn('fail', e['Subject']) + self.assertIn(TEST_TYPE, str(e)) + self.assertIn(TEST_HEX_ID[:5], str(e)) + self.assertIn('test error', str(e)) + self.assertIn('--\x20\n', str(e)) # Well-formated signature diff --git a/swh/vault/tests/vault_testing.py b/swh/vault/tests/vault_testing.py --- a/swh/vault/tests/vault_testing.py +++ b/swh/vault/tests/vault_testing.py @@ -41,7 +41,6 @@ self.cache_root = tempfile.TemporaryDirectory('vault-cache-') self.vault_config = { 'storage': self.storage_config, - 'db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, 'cache': { 'cls': 'pathslicing', 'args': { @@ -49,7 +48,9 @@ 'slicing': '0:1/1:5', 'allow_delete': True, } - } + }, + 'db': 'postgresql:///' + self.TEST_VAULT_DB_NAME, + 'scheduling_db': None, } self.vault_backend = VaultBackend(self.vault_config)