Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
Show All 25 Lines | from swh.model.model import ( | ||||
Revision, | Revision, | ||||
Snapshot, | Snapshot, | ||||
SnapshotBranch, | SnapshotBranch, | ||||
TargetType, | TargetType, | ||||
) | ) | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.core.utils import clean_dangling_folders | from swh.loader.core.utils import clean_dangling_folders | ||||
from swh.storage.algos.snapshot import snapshot_get_all_branches | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from . import svn, converters | from . import svn, converters | ||||
from .utils import ( | from .utils import ( | ||||
init_svn_repo_from_dump, | init_svn_repo_from_dump, | ||||
init_svn_repo_from_archive_dump, | init_svn_repo_from_archive_dump, | ||||
OutputStream, | OutputStream, | ||||
) | ) | ||||
from .exception import SvnLoaderUneventful | from .exception import SvnLoaderUneventful | ||||
▲ Show 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | def swh_revision_hash_tree_at_svn_revision(self, revision): | ||||
The hash tree directory as bytes. | The hash tree directory as bytes. | ||||
""" | """ | ||||
local_dirname, local_url = self.svnrepo.export_temporary(revision) | local_dirname, local_url = self.svnrepo.export_temporary(revision) | ||||
h = from_disk.Directory.from_disk(path=local_url).hash | h = from_disk.Directory.from_disk(path=local_url).hash | ||||
self.svnrepo.clean_fs(local_dirname) | self.svnrepo.clean_fs(local_dirname) | ||||
return h | return h | ||||
def swh_latest_snapshot_revision(self, origin_url, previous_swh_revision=None): | def swh_latest_snapshot_revision( | ||||
self, origin_url: str, previous_swh_revision: Optional[bytes] = None | |||||
): | |||||
"""Look for latest snapshot revision and returns it if any. | """Look for latest snapshot revision and returns it if any. | ||||
Args: | Args: | ||||
origin_url (str): Origin identifier | origin_url: Origin identifier | ||||
previous_swh_revision: (optional) id of a possible | previous_swh_revision: id of a possible previous swh revision | ||||
previous swh revision | |||||
Returns: | Returns: | ||||
dict: The latest known point in time. Dict with keys: | dict: The latest known point in time. Dict with keys: | ||||
'revision': latest visited revision | 'revision': latest visited revision | ||||
'snapshot': latest snapshot | 'snapshot': latest snapshot | ||||
If None is found, return an empty dict. | If nothing matching criteria is found, return an empty dict. | ||||
""" | """ | ||||
storage = self.storage | storage = self.storage | ||||
if not previous_swh_revision: # check latest snapshot's revision | |||||
ardumont: rahhh, come on...
(apparently missing test for that part...) | |||||
Done Inline ActionsThe current test scaffolding prevent this from running there. ardumont: The current test scaffolding prevent this from running there.
This needs more work. | |||||
Done Inline Actionstbc, that's for another diff heh. ardumont: tbc, that's for another diff heh. | |||||
visit = storage.origin_visit_get_latest(origin_url, require_snapshot=True) | latest_snapshot_d = {} | ||||
if visit: | if not previous_swh_revision: | ||||
latest_snap = snapshot_get_all_branches(storage, visit["snapshot"]) | latest_snapshot = snapshot_get_latest(storage, origin_url) | ||||
if latest_snap: | if not latest_snapshot: | ||||
branches = latest_snap.get("branches") | return {} | ||||
latest_snapshot_d = latest_snapshot.to_dict() | |||||
branches = latest_snapshot.branches | |||||
if not branches: | if not branches: | ||||
return {} | return {} | ||||
branch = branches.get(DEFAULT_BRANCH) | branch = branches.get(DEFAULT_BRANCH) | ||||
if not branch: | if not branch: | ||||
return {} | return {} | ||||
target_type = branch["target_type"] | target_type = branch.target_type.value | ||||
if target_type != "revision": | if target_type != "revision": | ||||
return {} | return {} | ||||
previous_swh_revision = branch["target"] | previous_swh_revision = branch.target | ||||
else: | |||||
return {} | |||||
else: | |||||
return {} | |||||
if isinstance(previous_swh_revision, dict): | if isinstance(previous_swh_revision, dict): | ||||
swh_id = previous_swh_revision["id"] | swh_id = previous_swh_revision["id"] | ||||
Not Done Inline ActionsWill that ever happen now? olasd: Will that ever happen now? | |||||
Done Inline ActionsI actually have mistyped the method... Yes, i think that can still happen... That parameter is coming from the constructor and i did not touched that part. ardumont: I actually have mistyped the method...
For `previous_swh_revision`, the type should be… | |||||
Done Inline Actions(Thus why i want to rework those among other things ;) ardumont: https://forge.softwareheritage.org/source/swh-loader… | |||||
else: | else: | ||||
swh_id = previous_swh_revision | swh_id = previous_swh_revision | ||||
revs = list(storage.revision_get([swh_id])) | revs = list(storage.revision_get([swh_id])) | ||||
if revs: | if revs: | ||||
return {"snapshot": latest_snap, "revision": revs[0]} | return {"snapshot": latest_snapshot_d, "revision": revs[0]} | ||||
return {} | return {} | ||||
def build_swh_revision(self, rev, commit, dir_id, parents): | def build_swh_revision(self, rev, commit, dir_id, parents): | ||||
"""Build the swh revision dictionary. | """Build the swh revision dictionary. | ||||
This adds: | This adds: | ||||
- the `'synthetic`' flag to true | - the `'synthetic`' flag to true | ||||
▲ Show 20 Lines • Show All 224 Lines • ▼ Show 20 Lines | ]: | ||||
if nextrev: | if nextrev: | ||||
revision_parents[nextrev] = [swh_revision.id] | revision_parents[nextrev] = [swh_revision.id] | ||||
yield _contents, _skipped_contents, _directories, swh_revision | yield _contents, _skipped_contents, _directories, swh_revision | ||||
def prepare_origin_visit(self, *args, **kwargs): | def prepare_origin_visit(self, *args, **kwargs): | ||||
self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url) | self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url) | ||||
self.last_visit = self.storage.origin_visit_get_latest(self.origin_url) | |||||
def prepare(self, *args, **kwargs): | def prepare(self, *args, **kwargs): | ||||
if self.swh_revision: | if self.swh_revision: | ||||
self.last_known_swh_revision = self.swh_revision | self.last_known_swh_revision = self.swh_revision | ||||
else: | else: | ||||
self.last_known_swh_revision = None | self.last_known_swh_revision = None | ||||
self.latest_snapshot = self.swh_latest_snapshot_revision( | self.latest_snapshot = self.swh_latest_snapshot_revision( | ||||
▲ Show 20 Lines • Show All 349 Lines • Show Last 20 Lines |
rahhh, come on...
(apparently missing test for that part...)