diff --git a/swh/lister/core/lister_base.py b/swh/lister/core/lister_base.py --- a/swh/lister/core/lister_base.py +++ b/swh/lister/core/lister_base.py @@ -384,20 +384,6 @@ return sql_repo - def origin_dict(self, origin_type, origin_url, **kwargs): - """Return special dict format for the origins list - - Args: - origin_type (string) - origin_url (string) - Returns: - the same information in a different form - """ - return { - 'type': origin_type, - 'url': origin_url, - } - def task_dict(self, origin_type, origin_url, **kwargs): """Return special dict format for the tasks list @@ -452,45 +438,34 @@ injected_repos[m['uid']] = self.db_inject_repo(m) return injected_repos - def create_missing_origins_and_tasks(self, models_list, injected_repos): - """Find any newly created db entries that don't yet have tasks or - origin objects assigned. + def schedule_missing_tasks(self, models_list, injected_repos): + """Find any newly created db entries that do not have been scheduled + yet. Args: - models_list: a list of dicts mapping keys in the db model for - each repo - injected_repos: dict of uid:sql_repo pairs that have just - been created + models_list ([Model]): List of dicts mapping keys in the db model + for each repo + injected_repos ([dict]): Dict of uid:sql_repo pairs that have just + been created + Returns: Nothing. Modifies injected_repos. + """ - origins = {} tasks = {} - def _origin_key(m): - _type = m.get('origin_type', m.get('type')) - _url = m.get('origin_url', m.get('url')) - return '%s-%s' % (_type, _url) - def _task_key(m): - return '%s-%s' % (m['type'], - json.dumps(m['arguments'], sort_keys=True)) + return '%s-%s' % ( + m['type'], + json.dumps(m['arguments'], sort_keys=True) + ) for m in models_list: ir = injected_repos[m['uid']] - if not ir.origin_id: - origin_dict = self.origin_dict(**m) - origins[_origin_key(m)] = (ir, m, origin_dict) if not ir.task_id: task_dict = self.task_dict(**m) tasks[_task_key(task_dict)] = (ir, m, task_dict) - new_origins = self.storage.origin_add( - (origin_dicts for (_, _, origin_dicts) in origins.values())) - for origin in new_origins: - ir, m, _ = origins[_origin_key(origin)] - ir.origin_id = origin['id'] - new_tasks = self.scheduler.create_tasks( (task_dicts for (_, _, task_dicts) in tasks.values())) for task in new_tasks: @@ -519,7 +494,7 @@ # inject into local db injected = self.inject_repo_data_into_db(models_list) # queue workers - self.create_missing_origins_and_tasks(models_list, injected) + self.schedule_missing_tasks(models_list, injected) return response, injected def save_response(self, response): diff --git a/swh/lister/core/simple_lister.py b/swh/lister/core/simple_lister.py --- a/swh/lister/core/simple_lister.py +++ b/swh/lister/core/simple_lister.py @@ -51,7 +51,7 @@ # inject into local db injected = self.inject_repo_data_into_db(models) # queue workers - self.create_missing_origins_and_tasks(models, injected) + self.schedule_missing_tasks(models, injected) all_injected.append(injected) # flush self.db_session.commit() diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py --- a/swh/lister/core/tests/test_lister.py +++ b/swh/lister/core/tests/test_lister.py @@ -164,8 +164,8 @@ if k not in ['last_seen', 'task_id', 'origin_id', 'id']: self.assertIn(k, di) - def disable_storage_and_scheduler(self, fl): - fl.create_missing_origins_and_tasks = Mock(return_value=None) + def disable_scheduler(self, fl): + fl.schedule_missing_tasks = Mock(return_value=None) def disable_db(self, fl): fl.winnow_models = Mock(return_value=[]) @@ -176,7 +176,7 @@ http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() - self.disable_storage_and_scheduler(fl) + self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=1, max_bound=1) # stores no results @@ -185,7 +185,7 @@ http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() - self.disable_storage_and_scheduler(fl) + self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=self.first_index, max_bound=self.first_index) @@ -194,7 +194,7 @@ http_mocker.get(self.test_re, text=self.mock_response) fl = self.get_fl() - self.disable_storage_and_scheduler(fl) + self.disable_scheduler(fl) self.disable_db(fl) fl.run(min_bound=self.first_index) @@ -222,7 +222,7 @@ }) self.init_db(db, fl.MODEL) - self.disable_storage_and_scheduler(fl) + self.disable_scheduler(fl) fl.run(min_bound=self.first_index) diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -119,7 +119,7 @@ """Generate the Package entries that didn't previously exist. Contrary to SWHListerBase, we don't actually insert the data in - database. `create_missing_origins_and_tasks` does it once we have the + database. `schedule_missing_tasks` does it once we have the origin and task identifiers. """ by_name_version = {} @@ -173,7 +173,7 @@ self.db_session.add_all(added_packages) return added_packages - def create_missing_origins_and_tasks(self, models_list, added_packages): + def schedule_missing_tasks(self, models_list, added_packages): """We create tasks at the end of the full snapshot processing""" return diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py --- a/swh/lister/phabricator/lister.py +++ b/swh/lister/phabricator/lister.py @@ -97,7 +97,7 @@ self.max_index = models_list[0]['indexable'] models_list = self.filter_before_inject(models_list) injected = self.inject_repo_data_into_db(models_list) - self.create_missing_origins_and_tasks(models_list, injected) + self.schedule_missing_tasks(models_list, injected) return self.max_index def run(self, min_bound=None, max_bound=None):