Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/indexer.py
Show First 20 Lines • Show All 137 Lines • ▼ Show 20 Lines | DEFAULT_CONFIG = { | ||||
'args': { | 'args': { | ||||
'url': 'http://localhost:5003/', | 'url': 'http://localhost:5003/', | ||||
} | } | ||||
}) | }) | ||||
} | } | ||||
ADDITIONAL_CONFIG = {} | ADDITIONAL_CONFIG = {} | ||||
def __init__(self): | USE_TOOLS = True | ||||
def __init__(self, config=None, **kw): | |||||
"""Prepare and check that the indexer is ready to run. | """Prepare and check that the indexer is ready to run. | ||||
""" | """ | ||||
super().__init__() | super().__init__() | ||||
if config is not None: | |||||
self.config = config | |||||
else: | |||||
config_keys = ('base_filename', 'config_filename', | |||||
'additional_configs', 'global_config') | |||||
config_args = {k: v for k, v in kw.items() if k in config_keys} | |||||
self.config = self.parse_config_file(**config_args) | |||||
self.prepare() | self.prepare() | ||||
self.check() | self.check() | ||||
def prepare(self): | def prepare(self): | ||||
"""Prepare the indexer's needed runtime configuration. | """Prepare the indexer's needed runtime configuration. | ||||
Without this step, the indexer cannot possibly run. | Without this step, the indexer cannot possibly run. | ||||
""" | """ | ||||
# HACK to deal with edge case (e.g revision metadata indexer) | |||||
if not hasattr(self, 'config'): | |||||
self.config = self.parse_config_file( | |||||
additional_configs=[self.ADDITIONAL_CONFIG]) | |||||
config_storage = self.config.get('storage') | config_storage = self.config.get('storage') | ||||
if config_storage: | if config_storage: | ||||
self.storage = get_storage(**config_storage) | self.storage = get_storage(**config_storage) | ||||
objstorage = self.config['objstorage'] | objstorage = self.config['objstorage'] | ||||
self.objstorage = get_objstorage(objstorage['cls'], objstorage['args']) | self.objstorage = get_objstorage(objstorage['cls'], | ||||
objstorage['args']) | |||||
idx_storage = self.config[INDEXER_CFG_KEY] | idx_storage = self.config[INDEXER_CFG_KEY] | ||||
self.idx_storage = get_indexer_storage(**idx_storage) | self.idx_storage = get_indexer_storage(**idx_storage) | ||||
_log = logging.getLogger('requests.packages.urllib3.connectionpool') | _log = logging.getLogger('requests.packages.urllib3.connectionpool') | ||||
_log.setLevel(logging.WARN) | _log.setLevel(logging.WARN) | ||||
self.log = logging.getLogger('swh.indexer') | self.log = logging.getLogger('swh.indexer') | ||||
self.tools = list(self.register_tools(self.config['tools'])) | |||||
def check(self, *, check_tools=True): | if self.USE_TOOLS: | ||||
self.tools = list(self.register_tools( | |||||
self.config.get('tools', []))) | |||||
self.results = [] | |||||
@property | |||||
def tool(self): | |||||
return self.tools[0] | |||||
def check(self): | |||||
"""Check the indexer's configuration is ok before proceeding. | """Check the indexer's configuration is ok before proceeding. | ||||
If ok, does nothing. If not raise error. | If ok, does nothing. If not raise error. | ||||
""" | """ | ||||
if check_tools and not self.tools: | if self.USE_TOOLS and not self.tools: | ||||
raise ValueError('Tools %s is unknown, cannot continue' % | raise ValueError('Tools %s is unknown, cannot continue' % | ||||
self.tools) | self.tools) | ||||
def _prepare_tool(self, tool): | def _prepare_tool(self, tool): | ||||
"""Prepare the tool dict to be compliant with the storage api. | """Prepare the tool dict to be compliant with the storage api. | ||||
""" | """ | ||||
return {'tool_%s' % key: value for key, value in tool.items()} | return {'tool_%s' % key: value for key, value in tool.items()} | ||||
Show All 40 Lines | def index(self, id, data): | ||||
Returns: | Returns: | ||||
dict: a dict that makes sense for the | dict: a dict that makes sense for the | ||||
:meth:`.persist_index_computations` method. | :meth:`.persist_index_computations` method. | ||||
""" | """ | ||||
pass | pass | ||||
def filter(self, ids): | |||||
"""Filter missing ids for that particular indexer. | |||||
Args: | |||||
ids ([bytes]): list of ids | |||||
Yields: | |||||
iterator of missing ids | |||||
""" | |||||
yield from ids | |||||
@abc.abstractmethod | @abc.abstractmethod | ||||
def persist_index_computations(self, results, policy_update): | def persist_index_computations(self, results, policy_update): | ||||
"""Persist the computation resulting from the index. | """Persist the computation resulting from the index. | ||||
Args: | Args: | ||||
results ([result]): List of results. One result is the | results ([result]): List of results. One result is the | ||||
result of the index function. | result of the index function. | ||||
▲ Show 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | class ContentIndexer(BaseIndexer): | ||||
To work on indexer range, use the :class:`ContentRangeIndexer` | To work on indexer range, use the :class:`ContentRangeIndexer` | ||||
instead. | instead. | ||||
Note: :class:`ContentIndexer` is not an instantiable object. To | Note: :class:`ContentIndexer` is not an instantiable object. To | ||||
use it, one should inherit from this class and override the | use it, one should inherit from this class and override the | ||||
methods mentioned in the :class:`BaseIndexer` class. | methods mentioned in the :class:`BaseIndexer` class. | ||||
""" | """ | ||||
@abc.abstractmethod | |||||
def filter(self, ids): | |||||
"""Filter missing ids for that particular indexer. | |||||
Args: | |||||
ids ([bytes]): list of ids | |||||
Yields: | |||||
iterator of missing ids | |||||
""" | |||||
pass | |||||
def run(self, ids, policy_update, | def run(self, ids, policy_update, | ||||
next_step=None, **kwargs): | next_step=None, **kwargs): | ||||
"""Given a list of ids: | """Given a list of ids: | ||||
- retrieve the content from the storage | - retrieve the content from the storage | ||||
- execute the indexing computations | - execute the indexing computations | ||||
- store the results (according to policy_update) | - store the results (according to policy_update) | ||||
▲ Show 20 Lines • Show All 285 Lines • Show Last 20 Lines |