diff --git a/swh/loader/core/loader.py b/swh/loader/core/loader.py --- a/swh/loader/core/loader.py +++ b/swh/loader/core/loader.py @@ -237,6 +237,96 @@ return origin_visit @retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) + def send_tool(self, tool_name, tool_version, tool_configuration): + log_id = str(uuid.uuid4()) + self.log.debug( + """Creating tool with name %s version %s configuration %s""" % ( + tool_name, tool_version, tool_configuration), + extra={ + 'swh_type': 'storage_send_start', + 'swh_content_type': 'indexer_configuration', + 'swh_num': 1, + 'swh_id': log_id + }) + + tool_id = self.storage.indexer_configuration_add(tool_name, + tool_version, + tool_configuration) + self.log.debug( + """Done creating tool with name %s version %s and + configuration %s""" % (tool_name, tool_version, + tool_configuration), + extra={ + 'swh_type': 'storage_send_end', + 'swh_content_type': 'indexer_configuration', + 'swh_num': 1, + 'swh_id': log_id + }) + return tool_id + + @retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) + def send_provider(self, provider_name, provider_type, provider_url, + metadata): + log_id = str(uuid.uuid4()) + self.log.debug( + """Creating provider_metadata with name %s type %s url %s and + metadata %s""" % ( + provider_name, provider_type, provider_url, metadata), + extra={ + 'swh_type': 'storage_send_start', + 'swh_content_type': 'metadata_provider', + 'swh_num': 1, + 'swh_id': log_id + }) + + provider_id = self.storage.metadata_provider_add(provider_name, + provider_type, + provider_url, + metadata) + self.log.debug( + """Done creating provider_metadata with name %s type %s url %s and + metadata %s""" % ( + provider_name, provider_type, provider_url, metadata), + extra={ + 'swh_type': 'storage_send_end', + 'swh_content_type': 'metadata_provider', + 'swh_num': 1, + 'swh_id': log_id + }) + return provider_id + + @retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) + def send_origin_metadata(self, origin_id, visit_date, provider_id, + tool_id, metadata): + log_id = str(uuid.uuid4()) + self.log.debug( + """Creating origin_metadata for origin %s at time %s with provider_id + %s and tool_id %s""" % ( + origin_id, visit_date, provider_id, tool_id), + extra={ + 'swh_type': 'storage_send_start', + 'swh_content_type': 'origin_metadata', + 'swh_num': 1, + 'swh_id': log_id + }) + + self.storage.origin_metadata_add(origin_id, + visit_date, + provider_id, + tool_id, + metadata) + self.log.debug( + """Done Creating origin_metadata for origin %s at time %s with + provider %s and tool %s""" % ( + origin_id, visit_date, provider_id, tool_id), + extra={ + 'swh_type': 'storage_send_end', + 'swh_content_type': 'origin_metadata', + 'swh_num': 1, + 'swh_id': log_id + }) + + @retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) def update_origin_visit(self, origin_id, visit, status): log_id = str(uuid.uuid4()) self.log.debug( @@ -620,6 +710,31 @@ if self.config['send_occurrences']: self.send_batch_occurrences(occurrences) + def prepare_metadata(self): + """First step for origin_metadata insertion, resolving the + provider_ id and the tool_id by fetching data from the storage + or creating tool and provider on the fly if the data isn't available + + """ + origin_metadata = self.origin_metadata + + tool = origin_metadata['tool'] + tool_id = self.storage.indexer_configuration_get(tool) + if not tool_id: + tool_id = self.send_tool(tool['tool_name'], + tool['tool_version'], + tool['tool_configuration']) + self.origin_metadata['tool']['tool_id'] = tool_id + + provider = origin_metadata['provider'] + provider_id = self.storage.metadata_provider_get_by(provider) + if not provider_id: + provider_id = self.send_provider(provider['provider_name'], + provider['provider_type'], + provider['provider_url'], + provider['metadata']) + self.origin_metadata['provider']['provider_id'] = provider_id + @abstractmethod def cleanup(self): """Last step executed by the loader. @@ -666,6 +781,13 @@ """ pass + def store_metadata(self): + """Store fetched metadata in the database. + + For more information, see implementation in :class:`DepositLoader`. + """ + pass + def load_status(self): """Detailed loading status. @@ -743,6 +865,7 @@ if not more_data_to_fetch: break + self.store_metadata() self.close_fetch_history_success(fetch_history_id) self.update_origin_visit( self.origin_id, self.visit, status=self.visit_status())