Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/svn/loader.py
Show First 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | ): | ||||
self._visit_status = "full" | self._visit_status = "full" | ||||
self._load_status = "uneventful" | self._load_status = "uneventful" | ||||
self.visit_date = visit_date | self.visit_date = visit_date | ||||
self.destination_path = destination_path | self.destination_path = destination_path | ||||
self.start_from_scratch = start_from_scratch | self.start_from_scratch = start_from_scratch | ||||
self.swh_revision = swh_revision | self.swh_revision = swh_revision | ||||
self.max_content_length = self.config["max_content_size"] | self.max_content_length = self.config["max_content_size"] | ||||
self.snapshot = None | self.snapshot = None | ||||
self.last_visit = None | |||||
def pre_cleanup(self): | def pre_cleanup(self): | ||||
"""Cleanup potential dangling files from prior runs (e.g. OOM killed | """Cleanup potential dangling files from prior runs (e.g. OOM killed | ||||
tasks) | tasks) | ||||
""" | """ | ||||
clean_dangling_folders( | clean_dangling_folders( | ||||
self.temp_directory, | self.temp_directory, | ||||
Show All 26 Lines | def swh_revision_hash_tree_at_svn_revision(self, revision): | ||||
The hash tree directory as bytes. | The hash tree directory as bytes. | ||||
""" | """ | ||||
local_dirname, local_url = self.svnrepo.export_temporary(revision) | local_dirname, local_url = self.svnrepo.export_temporary(revision) | ||||
h = from_disk.Directory.from_disk(path=local_url).hash | h = from_disk.Directory.from_disk(path=local_url).hash | ||||
self.svnrepo.clean_fs(local_dirname) | self.svnrepo.clean_fs(local_dirname) | ||||
return h | return h | ||||
def swh_latest_snapshot_revision(self, origin_url, previous_swh_revision=None): | def swh_latest_snapshot_revision( | ||||
self, origin_url: str, previous_swh_revision: Optional[bytes] = None | |||||
): | |||||
"""Look for latest snapshot revision and returns it if any. | """Look for latest snapshot revision and returns it if any. | ||||
Args: | Args: | ||||
origin_url (str): Origin identifier | origin_url: Origin identifier | ||||
previous_swh_revision: (optional) id of a possible | previous_swh_revision: id of a possible previous swh revision | ||||
previous swh revision | |||||
Returns: | Returns: | ||||
dict: The latest known point in time. Dict with keys: | dict: The latest known point in time. Dict with keys: | ||||
'revision': latest visited revision | 'revision': latest visited revision | ||||
'snapshot': latest snapshot | 'snapshot': latest snapshot | ||||
If None is found, return an empty dict. | If nothing matching criteria is found, return an empty dict. | ||||
""" | """ | ||||
storage = self.storage | storage = self.storage | ||||
if not previous_swh_revision: # check latest snapshot's revision | if not previous_swh_revision: # check latest snapshot's revision | ||||
visit = storage.origin_visit_get_latest(origin_url, require_snapshot=True) | if self.last_visit is None: | ||||
if visit: | return {} | ||||
latest_snap = snapshot_get_all_branches(storage, visit["snapshot"]) | visit_id = self.last_visit.visit | ||||
assert visit_id is not None | |||||
visit_status = storage.origin_visit_status_get_latest( | |||||
origin_url, visit_id, require_snapshot=True | |||||
) | |||||
if visit_status: | |||||
latest_snap = snapshot_get_all_branches(storage, visit_status.snapshot) | |||||
if latest_snap: | if latest_snap: | ||||
branches = latest_snap.get("branches") | branches = latest_snap.get("branches") | ||||
if not branches: | if not branches: | ||||
return {} | return {} | ||||
branch = branches.get(DEFAULT_BRANCH) | branch = branches.get(DEFAULT_BRANCH) | ||||
if not branch: | if not branch: | ||||
return {} | return {} | ||||
target_type = branch["target_type"] | target_type = branch["target_type"] | ||||
ardumont: rahhh, come on...
(apparently missing test for that part...) | |||||
Done Inline ActionsThe current test scaffolding prevent this from running there. ardumont: The current test scaffolding prevent this from running there.
This needs more work. | |||||
Done Inline Actionstbc, that's for another diff heh. ardumont: tbc, that's for another diff heh. | |||||
if target_type != "revision": | if target_type != "revision": | ||||
return {} | return {} | ||||
previous_swh_revision = branch["target"] | previous_swh_revision = branch["target"] | ||||
else: | else: | ||||
return {} | return {} | ||||
else: | else: | ||||
return {} | return {} | ||||
if isinstance(previous_swh_revision, dict): | if isinstance(previous_swh_revision, dict): | ||||
swh_id = previous_swh_revision["id"] | swh_id = previous_swh_revision["id"] | ||||
Not Done Inline ActionsWill that ever happen now? olasd: Will that ever happen now? | |||||
Done Inline ActionsI actually have mistyped the method... Yes, i think that can still happen... That parameter is coming from the constructor and i did not touched that part. ardumont: I actually have mistyped the method...
For `previous_swh_revision`, the type should be… | |||||
Done Inline Actions(Thus why i want to rework those among other things ;) ardumont: https://forge.softwareheritage.org/source/swh-loader… | |||||
else: | else: | ||||
swh_id = previous_swh_revision | swh_id = previous_swh_revision | ||||
revs = list(storage.revision_get([swh_id])) | revs = list(storage.revision_get([swh_id])) | ||||
if revs: | if revs: | ||||
return {"snapshot": latest_snap, "revision": revs[0]} | return {"snapshot": latest_snap, "revision": revs[0]} | ||||
return {} | return {} | ||||
▲ Show 20 Lines • Show All 230 Lines • ▼ Show 20 Lines | ]: | ||||
if nextrev: | if nextrev: | ||||
revision_parents[nextrev] = [swh_revision.id] | revision_parents[nextrev] = [swh_revision.id] | ||||
yield _contents, _skipped_contents, _directories, swh_revision | yield _contents, _skipped_contents, _directories, swh_revision | ||||
def prepare_origin_visit(self, *args, **kwargs): | def prepare_origin_visit(self, *args, **kwargs): | ||||
self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url) | self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url) | ||||
self.last_visit = self.storage.origin_visit_get_latest(self.origin_url) | |||||
def prepare(self, *args, **kwargs): | def prepare(self, *args, **kwargs): | ||||
if self.swh_revision: | if self.swh_revision: | ||||
self.last_known_swh_revision = self.swh_revision | self.last_known_swh_revision = self.swh_revision | ||||
else: | else: | ||||
self.last_known_swh_revision = None | self.last_known_swh_revision = None | ||||
self.latest_snapshot = self.swh_latest_snapshot_revision( | self.latest_snapshot = self.swh_latest_snapshot_revision( | ||||
▲ Show 20 Lines • Show All 349 Lines • Show Last 20 Lines |
rahhh, come on...
(apparently missing test for that part...)