Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/core/loader.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
import logging | import logging | ||||
import os | import os | ||||
import psycopg2 | import psycopg2 | ||||
import requests | import requests | ||||
import traceback | import traceback | ||||
import uuid | import uuid | ||||
from abc import ABCMeta, abstractmethod | from abc import ABCMeta, abstractmethod | ||||
from retrying import retry | from retrying import retry | ||||
from typing import Any, Dict, Optional, Tuple | |||||
from . import converters | from . import converters | ||||
from swh.core import config | from swh.core import config | ||||
from swh.storage import get_storage, HashCollision | from swh.storage import get_storage, HashCollision | ||||
from .queue import QueuePerSizeAndNbUniqueElements | from .queue import QueuePerSizeAndNbUniqueElements | ||||
from .queue import QueuePerNbUniqueElements | from .queue import QueuePerNbUniqueElements | ||||
▲ Show 20 Lines • Show All 82 Lines • ▼ Show 20 Lines | class BufferedLoader(config.SWHConfig, metaclass=ABCMeta): | ||||
You can take a look at some example classes: | You can take a look at some example classes: | ||||
- :class:`BaseSvnLoader` | - :class:`BaseSvnLoader` | ||||
- :class:`TarLoader` | - :class:`TarLoader` | ||||
- :class:`DirLoader` | - :class:`DirLoader` | ||||
- :class:`DebianLoader` | - :class:`DebianLoader` | ||||
""" | """ | ||||
CONFIG_BASE_FILENAME = None | CONFIG_BASE_FILENAME = None # type: Optional[str] | ||||
DEFAULT_CONFIG = { | DEFAULT_CONFIG = { | ||||
'storage': ('dict', { | 'storage': ('dict', { | ||||
'cls': 'remote', | 'cls': 'remote', | ||||
'args': { | 'args': { | ||||
'url': 'http://localhost:5002/', | 'url': 'http://localhost:5002/', | ||||
} | } | ||||
}), | }), | ||||
Show All 10 Lines | DEFAULT_CONFIG = { | ||||
# Number of contents | # Number of contents | ||||
'content_packet_size': ('int', 10000), | 'content_packet_size': ('int', 10000), | ||||
# packet of 100Mib contents | # packet of 100Mib contents | ||||
'content_packet_size_bytes': ('int', 100 * 1024 * 1024), | 'content_packet_size_bytes': ('int', 100 * 1024 * 1024), | ||||
'directory_packet_size': ('int', 25000), | 'directory_packet_size': ('int', 25000), | ||||
'revision_packet_size': ('int', 100000), | 'revision_packet_size': ('int', 100000), | ||||
'release_packet_size': ('int', 100000), | 'release_packet_size': ('int', 100000), | ||||
'occurrence_packet_size': ('int', 100000), | 'occurrence_packet_size': ('int', 100000), | ||||
} | } # type: Dict[str, Tuple[str, Any]] | ||||
ADDITIONAL_CONFIG = {} | ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] | ||||
def __init__(self, logging_class=None, config=None): | def __init__(self, logging_class=None, config=None): | ||||
if config: | if config: | ||||
self.config = config | self.config = config | ||||
else: | else: | ||||
self.config = self.parse_config_file( | self.config = self.parse_config_file( | ||||
additional_configs=[self.ADDITIONAL_CONFIG]) | additional_configs=[self.ADDITIONAL_CONFIG]) | ||||
▲ Show 20 Lines • Show All 748 Lines • ▼ Show 20 Lines | class UnbufferedLoader(BufferedLoader): | ||||
UnbufferedLoader loaders are able to load all the data in one go. For | UnbufferedLoader loaders are able to load all the data in one go. For | ||||
example, the loader defined in swh-loader-git | example, the loader defined in swh-loader-git | ||||
:class:`BulkUpdater`. | :class:`BulkUpdater`. | ||||
For other loaders (stateful one, (e.g :class:`SWHSvnLoader`), | For other loaders (stateful one, (e.g :class:`SWHSvnLoader`), | ||||
inherit directly from :class:`BufferedLoader`. | inherit directly from :class:`BufferedLoader`. | ||||
""" | """ | ||||
ADDITIONAL_CONFIG = {} | ADDITIONAL_CONFIG = {} # type: Dict[str, Tuple[str, Any]] | ||||
def __init__(self, logging_class=None, config=None): | def __init__(self, logging_class=None, config=None): | ||||
super().__init__(logging_class=logging_class, config=config) | super().__init__(logging_class=logging_class, config=config) | ||||
self.visit_date = None # possibly overridden in self.prepare method | self.visit_date = None # possibly overridden in self.prepare method | ||||
def cleanup(self): | def cleanup(self): | ||||
"""Clean up an eventual state installed for computations.""" | """Clean up an eventual state installed for computations.""" | ||||
pass | pass | ||||
▲ Show 20 Lines • Show All 65 Lines • Show Last 20 Lines |