Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
"""Loader in charge of injecting either new or existing svn mirrors to | """Loader in charge of injecting either new or existing svn mirrors to | ||||
swh-storage. | swh-storage. | ||||
""" | """ | ||||
import os | import os | ||||
import pty | import pty | ||||
import re | import re | ||||
import shutil | import shutil | ||||
import tempfile | import tempfile | ||||
from mmap import mmap, ACCESS_WRITE | from mmap import mmap, ACCESS_WRITE | ||||
from subprocess import Popen | from subprocess import Popen | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from swh.model.identifiers import identifier_to_bytes, revision_identifier | from swh.model.identifiers import identifier_to_bytes, revision_identifier | ||||
from swh.model.identifiers import snapshot_identifier | from swh.model.identifiers import snapshot_identifier | ||||
from swh.loader.core.loader import BufferedLoader | from swh.loader.core.converters import content_for_storage | ||||
from swh.loader.core.loader import BaseLoader | |||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.storage.algos.snapshot import snapshot_get_all_branches | from swh.storage.algos.snapshot import snapshot_get_all_branches | ||||
from . import svn, converters | from . import svn, converters | ||||
from .utils import ( | from .utils import ( | ||||
init_svn_repo_from_dump, init_svn_repo_from_archive_dump, | init_svn_repo_from_dump, init_svn_repo_from_archive_dump, | ||||
OutputStream | OutputStream | ||||
) | ) | ||||
Show All 21 Lines | return { | ||||
} | } | ||||
} | } | ||||
} | } | ||||
TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.svn.' | TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.svn.' | ||||
class SvnLoader(BufferedLoader): | class SvnLoader(BaseLoader): | ||||
ardumont: got renamed (BufferedLoader still exists as alias) but it's the same behavior minus the retry… | |||||
"""Swh svn loader. | """Swh svn loader. | ||||
The repository is either remote or local. The loader deals with | The repository is either remote or local. The loader deals with | ||||
update on an already previously loaded repository. | update on an already previously loaded repository. | ||||
""" | """ | ||||
CONFIG_BASE_FILENAME = 'loader/svn' | CONFIG_BASE_FILENAME = 'loader/svn' | ||||
▲ Show 20 Lines • Show All 465 Lines • ▼ Show 20 Lines | Local repository not cleaned up for investigation: %s''' % ( | ||||
def store_data(self): | def store_data(self): | ||||
"""We store the data accumulated in internal instance variable. If | """We store the data accumulated in internal instance variable. If | ||||
the iteration over the svn revisions is done, we create the | the iteration over the svn revisions is done, we create the | ||||
snapshot and flush to storage the data. | snapshot and flush to storage the data. | ||||
This also resets the internal instance variable state. | This also resets the internal instance variable state. | ||||
""" | """ | ||||
self.send_contents(self._contents) | contents = [ | ||||
self.send_directories(self._directories) | content_for_storage(c, max_content_size=self.max_content_size, | ||||
self.send_revisions(self._revisions) | origin_url=self.origin['url']) | ||||
for c in self._contents | |||||
] | |||||
self.storage.content_add(contents) | |||||
self.storage.directory_add(self._directories) | |||||
self.storage.revision_add(self._revisions) | |||||
if self.done: # finish line, snapshot! | if self.done: # finish line, snapshot! | ||||
self.generate_and_load_snapshot(revision=self._last_revision, | self.generate_and_load_snapshot(revision=self._last_revision, | ||||
snapshot=self._snapshot) | snapshot=self._snapshot) | ||||
self.flush() | self.flush() | ||||
self._contents = [] | self._contents = [] | ||||
self._directories = [] | self._directories = [] | ||||
Show All 13 Lines | def generate_and_load_snapshot(self, revision=None, snapshot=None): | ||||
if revision: # Priority to the revision | if revision: # Priority to the revision | ||||
snap = build_swh_snapshot(revision['id']) | snap = build_swh_snapshot(revision['id']) | ||||
snap['id'] = identifier_to_bytes(snapshot_identifier(snap)) | snap['id'] = identifier_to_bytes(snapshot_identifier(snap)) | ||||
elif snapshot: # Fallback to prior snapshot | elif snapshot: # Fallback to prior snapshot | ||||
snap = snapshot | snap = snapshot | ||||
else: | else: | ||||
return None | return None | ||||
self.log.debug('snapshot: %s' % snap) | self.log.debug('snapshot: %s' % snap) | ||||
self.send_snapshot(snap) | self.storage.snapshot_add([snap]) | ||||
def load_status(self): | def load_status(self): | ||||
return { | return { | ||||
'status': self._load_status, | 'status': self._load_status, | ||||
} | } | ||||
def visit_status(self): | def visit_status(self): | ||||
return self._visit_status | return self._visit_status | ||||
▲ Show 20 Lines • Show All 182 Lines • Show Last 20 Lines |
got renamed (BufferedLoader still exists as alias) but it's the same behavior minus the retry and the send_* indirections.