Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/core/indexing_lister.py
# Copyright (C) 2015-2017 the Software Heritage developers | # Copyright (C) 2015-2019 the Software Heritage developers | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import abc | import abc | ||||
import logging | import logging | ||||
from itertools import count | from itertools import count | ||||
import dateutil | import dateutil | ||||
from sqlalchemy import func | from sqlalchemy import func | ||||
from .lister_transports import ListerHttpTransport | from .lister_transports import ListerHttpTransport | ||||
from .lister_base import ListerBase | from .lister_base import ListerBase | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
class IndexingLister(ListerBase): | class IndexingLister(ListerBase): | ||||
vlorentz: This argument should have a short docstring | |||||
flush_packet_db = 20 | |||||
"""Lister* intermediate class for any service that follows the pattern: | """Lister* intermediate class for any service that follows the pattern: | ||||
- The service must report at least one stable unique identifier, known | - The service must report at least one stable unique identifier, known | ||||
herein as the UID value, for every listed repository. | herein as the UID value, for every listed repository. | ||||
- If the service splits the list of repositories into sublists, it must | - If the service splits the list of repositories into sublists, it must | ||||
report at least one stable and sorted index identifier for every listed | report at least one stable and sorted index identifier for every listed | ||||
repository, known herein as the indexable value, which can be used as | repository, known herein as the indexable value, which can be used as | ||||
part of the service endpoint query to request a sublist beginning from | part of the service endpoint query to request a sublist beginning from | ||||
▲ Show 20 Lines • Show All 190 Lines • ▼ Show 20 Lines | def run(self, min_bound=None, max_bound=None): | ||||
logger.info('stopping after index %s, no next link found', | logger.info('stopping after index %s, no next link found', | ||||
index) | index) | ||||
return | return | ||||
index = next_index | index = next_index | ||||
logger.debug('Index: %s', index) | logger.debug('Index: %s', index) | ||||
yield i | yield i | ||||
for i in ingest_indexes(): | for i in ingest_indexes(): | ||||
if (i % 20) == 0: | if (i % self.flush_packet_db) == 0: | ||||
logger.debug('Flushing updates at index %s', i) | logger.debug('Flushing updates at index %s', i) | ||||
self.db_session.commit() | self.db_session.commit() | ||||
self.db_session = self.mk_session() | self.db_session = self.mk_session() | ||||
self.db_session.commit() | self.db_session.commit() | ||||
self.db_session = self.mk_session() | self.db_session = self.mk_session() | ||||
class IndexingHttpLister(ListerHttpTransport, IndexingLister): | class IndexingHttpLister(ListerHttpTransport, IndexingLister): | ||||
"""Convenience class for ensuring right lookup and init order | """Convenience class for ensuring right lookup and init order | ||||
when combining IndexingLister and ListerHttpTransport.""" | when combining IndexingLister and ListerHttpTransport.""" | ||||
def __init__(self, api_baseurl=None, override_config=None): | def __init__(self, api_baseurl=None, override_config=None): | ||||
ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) | ListerHttpTransport.__init__(self, api_baseurl=api_baseurl) | ||||
IndexingLister.__init__(self, override_config=override_config) | IndexingLister.__init__(self, override_config=override_config) |
This argument should have a short docstring