Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/loader.py
Show First 20 Lines • Show All 210 Lines • ▼ Show 20 Lines | def send_origin_visit(self, visit_date: Union[str, datetime.datetime], | ||||
'swh_content_type': 'origin_visit', | 'swh_content_type': 'origin_visit', | ||||
'swh_num': 1, | 'swh_num': 1, | ||||
'swh_id': log_id | 'swh_id': log_id | ||||
}) | }) | ||||
return origin_visit | return origin_visit | ||||
@retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) | @retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) | ||||
def send_tool(self, tool: Dict[str, Any]) -> None: | |||||
log_id = str(uuid.uuid4()) | |||||
self.log.debug( | |||||
'Creating tool with name %s version %s configuration %s' % ( | |||||
tool['name'], tool['version'], tool['configuration']), | |||||
extra={ | |||||
'swh_type': 'storage_send_start', | |||||
'swh_content_type': 'tool', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
tools = self.storage.tool_add([tool]) | |||||
tool_id = tools[0]['id'] | |||||
self.log.debug( | |||||
'Done creating tool with name %s version %s and configuration %s' % ( # noqa | |||||
tool['name'], tool['version'], tool['configuration']), | |||||
extra={ | |||||
'swh_type': 'storage_send_end', | |||||
'swh_content_type': 'tool', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
return tool_id | |||||
@retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) | |||||
def send_provider(self, provider: Dict[str, Any]) -> None: | |||||
log_id = str(uuid.uuid4()) | |||||
self.log.debug( | |||||
'Creating metadata_provider with name %s type %s url %s' % ( | |||||
provider['provider_name'], provider['provider_type'], | |||||
provider['provider_url']), | |||||
extra={ | |||||
'swh_type': 'storage_send_start', | |||||
'swh_content_type': 'metadata_provider', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
# FIXME: align metadata_provider_add with indexer_configuration_add | |||||
_provider = self.storage.metadata_provider_get_by(provider) | |||||
if _provider and 'id' in _provider: | |||||
provider_id = _provider['id'] | |||||
else: | |||||
provider_id = self.storage.metadata_provider_add( | |||||
provider['provider_name'], | |||||
provider['provider_type'], | |||||
provider['provider_url'], | |||||
provider['metadata']) | |||||
self.log.debug( | |||||
'Done creating metadata_provider with name %s type %s url %s' % ( | |||||
provider['provider_name'], provider['provider_type'], | |||||
provider['provider_url']), | |||||
extra={ | |||||
'swh_type': 'storage_send_end', | |||||
'swh_content_type': 'metadata_provider', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
return provider_id | |||||
@retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) | |||||
def send_origin_metadata(self, visit_date, provider_id, | |||||
tool_id, metadata): | |||||
log_id = str(uuid.uuid4()) | |||||
self.log.debug( | |||||
'Creating origin_metadata for origin %s at time %s with provider_id %s and tool_id %s' % ( # noqa | |||||
self.origin['url'], visit_date, provider_id, tool_id), | |||||
extra={ | |||||
'swh_type': 'storage_send_start', | |||||
'swh_content_type': 'origin_metadata', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
self.storage.origin_metadata_add( | |||||
self.origin['url'], visit_date, provider_id, tool_id, metadata) | |||||
self.log.debug( | |||||
'Done Creating origin_metadata for origin %s at time %s with provider %s and tool %s' % ( # noqa | |||||
self.origin['url'], visit_date, provider_id, tool_id), | |||||
extra={ | |||||
'swh_type': 'storage_send_end', | |||||
'swh_content_type': 'origin_metadata', | |||||
'swh_num': 1, | |||||
'swh_id': log_id | |||||
}) | |||||
@retry(retry_on_exception=retry_loading, stop_max_attempt_number=3) | |||||
def update_origin_visit(self, status: str) -> None: | def update_origin_visit(self, status: str) -> None: | ||||
log_id = str(uuid.uuid4()) | log_id = str(uuid.uuid4()) | ||||
self.log.debug( | self.log.debug( | ||||
'Updating origin_visit for origin %s with status %s' % ( | 'Updating origin_visit for origin %s with status %s' % ( | ||||
self.origin['url'], status), | self.origin['url'], status), | ||||
extra={ | extra={ | ||||
'swh_type': 'storage_send_start', | 'swh_type': 'storage_send_start', | ||||
'swh_content_type': 'origin_visit', | 'swh_content_type': 'origin_visit', | ||||
▲ Show 20 Lines • Show All 136 Lines • ▼ Show 20 Lines | def flush(self) -> None: | ||||
Bypass the maybe_load_* methods which awaits threshold reached | Bypass the maybe_load_* methods which awaits threshold reached | ||||
signal. We actually want to store those as we are done | signal. We actually want to store those as we are done | ||||
loading. | loading. | ||||
""" | """ | ||||
if hasattr(self.storage, 'flush'): | if hasattr(self.storage, 'flush'): | ||||
self.storage.flush() | self.storage.flush() | ||||
def prepare_metadata(self) -> None: | |||||
"""First step for origin_metadata insertion, resolving the | |||||
provider_id and the tool_id by fetching data from the storage | |||||
or creating tool and provider on the fly if the data isn't available | |||||
""" | |||||
origin_metadata = self.origin_metadata | |||||
tool = origin_metadata['tool'] | |||||
try: | |||||
tool_id = self.send_tool(tool) | |||||
self.origin_metadata['tool']['tool_id'] = tool_id | |||||
except Exception: | |||||
self.log.exception('Problem when storing new tool') | |||||
raise | |||||
provider = origin_metadata['provider'] | |||||
try: | |||||
provider_id = self.send_provider(provider) | |||||
self.origin_metadata['provider']['provider_id'] = provider_id | |||||
except Exception: | |||||
self.log.exception('Problem when storing new provider') | |||||
raise | |||||
@abstractmethod | @abstractmethod | ||||
def cleanup(self) -> None: | def cleanup(self) -> None: | ||||
"""Last step executed by the loader. | """Last step executed by the loader. | ||||
""" | """ | ||||
pass | pass | ||||
@abstractmethod | @abstractmethod | ||||
▲ Show 20 Lines • Show All 237 Lines • Show Last 20 Lines |