Changeset View
Changeset View
Standalone View
Standalone View
swh/scheduler/cli/task.py
Show First 20 Lines • Show All 492 Lines • ▼ Show 20 Lines | """Archive task/task_run whose (task_type is 'oneshot' and task_status | ||||
'disabled'). | 'disabled'). | ||||
With --dry-run flag set (default), only list those. | With --dry-run flag set (default), only list those. | ||||
""" | """ | ||||
from swh.core.utils import grouper | from swh.core.utils import grouper | ||||
from swh.scheduler.backend_es import SWHElasticSearchClient | from swh.scheduler.backend_es import SWHElasticSearchClient | ||||
config = ctx.obj['config'] | |||||
scheduler = ctx.obj['scheduler'] | scheduler = ctx.obj['scheduler'] | ||||
if not scheduler: | if not scheduler: | ||||
raise ValueError('Scheduler class (local/remote) must be instantiated') | raise ValueError('Scheduler class (local/remote) must be instantiated') | ||||
es_client = SWHElasticSearchClient() | es_client = SWHElasticSearchClient(**config) | ||||
logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) | logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) | ||||
log = logging.getLogger('swh.scheduler.cli.archive') | log = logging.getLogger('swh.scheduler.cli.archive') | ||||
logging.getLogger('urllib3').setLevel(logging.WARN) | logging.getLogger('urllib3').setLevel(logging.WARN) | ||||
logging.getLogger('elasticsearch').setLevel(logging.WARN) | logging.getLogger('elasticsearch').setLevel(logging.WARN) | ||||
if dry_run: | if dry_run: | ||||
log.info('**DRY-RUN** (only reading db)') | log.info('**DRY-RUN** (only reading db)') | ||||
if not cleanup: | if not cleanup: | ||||
log.info('**NO CLEANUP**') | log.info('**NO CLEANUP**') | ||||
now = arrow.utcnow() | now = arrow.utcnow() | ||||
# Default to archive tasks from a rolling month starting the week | # Default to archive tasks from a rolling month starting the week | ||||
# prior to the current one | # prior to the current one | ||||
if not before: | if not before: | ||||
before = now.shift(weeks=-1).format('YYYY-MM-DD') | before = now.shift(weeks=-1).format('YYYY-MM-DD') | ||||
if not after: | if not after: | ||||
after = now.shift(weeks=-1).shift(months=-1).format('YYYY-MM-DD') | after = now.shift(weeks=-1).shift(months=-1).format('YYYY-MM-DD') | ||||
log.debug('index: %s; cleanup: %s; period: [%s ; %s]' % ( | log.debug('index: %s; cleanup: %s; period: [%s ; %s]' % ( | ||||
not dry_run, not dry_run and cleanup, after, before)) | not dry_run, not dry_run and cleanup, after, before)) | ||||
def group_by_index_name(data, es_client=es_client): | def group_by_index_name(data, es_client=es_client): | ||||
vlorentz: I know it's off-topic, but this function should just be named `get_index_name`... it doesn't do… | |||||
"""Given a data record, determine the index's name through its ending | """Given a data record, determine the index's name through its ending | ||||
date. This varies greatly depending on the task_run's | date. This varies greatly depending on the task_run's | ||||
status. | status. | ||||
""" | """ | ||||
date = data.get('started') | date = data.get('started') | ||||
if not date: | if not date: | ||||
date = data['scheduled'] | date = data['scheduled'] | ||||
Show All 35 Lines |
I know it's off-topic, but this function should just be named get_index_name... it doesn't do any grouping.