Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 274 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
return {'status': 'failed'} | return {'status': 'failed'} | ||||
try: | try: | ||||
last_snapshot = self.last_snapshot() | last_snapshot = self.last_snapshot() | ||||
logger.debug('last snapshot: %s', last_snapshot) | logger.debug('last snapshot: %s', last_snapshot) | ||||
known_artifacts = self.known_artifacts(last_snapshot) | known_artifacts = self.known_artifacts(last_snapshot) | ||||
logger.debug('known artifacts: %s', known_artifacts) | logger.debug('known artifacts: %s', known_artifacts) | ||||
# Retrieve the default release version (the "latest" one) | |||||
default_version = self.get_default_version() | |||||
logger.debug('default version: %s', default_version) | |||||
for version in self.get_versions(): # for each | for version in self.get_versions(): # for each | ||||
logger.debug('version: %s', version) | logger.debug('version: %s', version) | ||||
tmp_revisions[version] = [] | tmp_revisions[version] = [] | ||||
# `p_` stands for `package_` | # `p_` stands for `package_` | ||||
for branch_name, p_info in self.get_package_info(version): | for branch_name, p_info in self.get_package_info(version): | ||||
logger.debug('package_info: %s', p_info) | logger.debug('package_info: %s', p_info) | ||||
revision_id = self.resolve_revision_from( | revision_id = self.resolve_revision_from( | ||||
known_artifacts, p_info['raw']) | known_artifacts, p_info['raw']) | ||||
if revision_id is None: | if revision_id is None: | ||||
(revision_id, loaded) = \ | (revision_id, loaded) = \ | ||||
self._load_revision(p_info, origin) | self._load_revision(p_info, origin) | ||||
if loaded: | if loaded: | ||||
status_load = 'eventful' | status_load = 'eventful' | ||||
else: | else: | ||||
status_visit = 'partial' | status_visit = 'partial' | ||||
if revision_id is None: | if revision_id is None: | ||||
continue | continue | ||||
tmp_revisions[version].append((branch_name, revision_id)) | tmp_revisions[version].append((branch_name, revision_id)) | ||||
snapshot = self._load_snapshot(default_version, tmp_revisions) | |||||
if hasattr(self.storage, 'flush'): | |||||
self.storage.flush() | |||||
except Exception: | except Exception: | ||||
logger.exception('Fail to load %s' % self.url) | logger.exception('Fail to load %s' % self.url) | ||||
status_visit = 'partial' | status_visit = 'partial' | ||||
status_load = 'failed' | status_load = 'failed' | ||||
finally: | finally: | ||||
lewo: I think we could be in trouble if this method raises an exception. So, maybe we could instead… | |||||
Done Inline ActionsLet's make the functional loader resilient to this and not the loader one. (i think try except within its own implementation). Same goes for default version by the way. ardumont: Let's make the functional loader resilient to this and not the loader one.
Since there is only… | |||||
# Retrieve the default release version (the "latest" one) | |||||
Done Inline Actions^ D2769 makes me wonder... ardumont: ^ D2769 makes me wonder... | |||||
Done Inline Actionsnvm that comment now, it's no longer relevant. ardumont: nvm that comment now, it's no longer relevant. | |||||
default_version = self.get_default_version() | |||||
logger.debug('default version: %s', default_version) | |||||
extra_branches = self.extra_branches() | |||||
logger.debug('extra branches: %s', extra_branches) | |||||
snapshot = self._load_snapshot( | |||||
default_version, tmp_revisions, extra_branches) | |||||
if hasattr(self.storage, 'flush'): | |||||
self.storage.flush() | |||||
self.storage.origin_visit_update( | self.storage.origin_visit_update( | ||||
origin=self.url, visit_id=visit.visit, status=status_visit, | origin=self.url, visit_id=visit.visit, status=status_visit, | ||||
snapshot=snapshot and snapshot.id) | snapshot=snapshot and snapshot.id) | ||||
result = { | result: Dict[str, Any] = { | ||||
'status': status_load, | 'status': status_load, | ||||
} # type: Dict[str, Any] | } | ||||
if snapshot: | if snapshot: | ||||
result['snapshot_id'] = hash_to_hex(snapshot.id) | result['snapshot_id'] = hash_to_hex(snapshot.id) | ||||
return result | return result | ||||
def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | ||||
"""Does all the loading of a revision itself: | """Does all the loading of a revision itself: | ||||
* downloads a package and uncompresses it | * downloads a package and uncompresses it | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | ||||
logger.debug('Revision: %s', revision) | logger.debug('Revision: %s', revision) | ||||
self.storage.revision_add([revision]) | self.storage.revision_add([revision]) | ||||
return (revision.id, True) | return (revision.id, True) | ||||
def _load_snapshot( | def _load_snapshot( | ||||
self, default_version: str, | self, default_version: str, | ||||
revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot: | revisions: Dict[str, List[Tuple[str, bytes]]], | ||||
"""Build snapshot out of the current revisions stored. Then load it in | extra_branches: Dict[bytes, Mapping[str, Any]] | ||||
Done Inline ActionsIt'd be great the types (revisions. extra_branches) were a tad unified here. ardumont: It'd be great the types (revisions. extra_branches) were a tad unified here.
But let's keep… | |||||
the storage. | ) -> Optional[Snapshot]: | ||||
"""Build snapshot out of the current revisions stored and extra branches. | |||||
Then load it in the storage. | |||||
""" | """ | ||||
logger.debug('revisions: %s', revisions) | logger.debug('revisions: %s', revisions) | ||||
Not Done Inline ActionsIsn't there some edge cases where generating an empty snapshot is the right thing to do? olasd: Isn't there some edge cases where generating an empty snapshot is the right thing to do? | |||||
Done Inline Actionsno idea. That'd simplify but i don't know what's better. ardumont: no idea.
That'd simplify but i don't know what's better. | |||||
# Build and load the snapshot | # Build and load the snapshot | ||||
branches = {} # type: Dict[bytes, Mapping[str, Any]] | branches = {} # type: Dict[bytes, Mapping[str, Any]] | ||||
for version, branch_name_revisions in revisions.items(): | for version, branch_name_revisions in revisions.items(): | ||||
if version == default_version and \ | if version == default_version and \ | ||||
len(branch_name_revisions) == 1: | len(branch_name_revisions) == 1: | ||||
# only 1 branch (no ambiguity), we can create an alias | # only 1 branch (no ambiguity), we can create an alias | ||||
# branch 'HEAD' | # branch 'HEAD' | ||||
branch_name, _ = branch_name_revisions[0] | branch_name, _ = branch_name_revisions[0] | ||||
# except for some corner case (deposit) | # except for some corner case (deposit) | ||||
if branch_name != 'HEAD': | if branch_name != 'HEAD': | ||||
branches[b'HEAD'] = { | branches[b'HEAD'] = { | ||||
'target_type': 'alias', | 'target_type': 'alias', | ||||
'target': branch_name.encode('utf-8'), | 'target': branch_name.encode('utf-8'), | ||||
} | } | ||||
for branch_name, target in branch_name_revisions: | for branch_name, target in branch_name_revisions: | ||||
branches[branch_name.encode('utf-8')] = { | branches[branch_name.encode('utf-8')] = { | ||||
'target_type': 'revision', | 'target_type': 'revision', | ||||
'target': target, | 'target': target, | ||||
} | } | ||||
# Deal with extra-branches | # Deal with extra-branches | ||||
for name, branch_target in self.extra_branches().items(): | for name, branch_target in extra_branches.items(): | ||||
if name in branches: | if name in branches: | ||||
logger.error("Extra branch '%s' has been ignored", | logger.error("Extra branch '%s' has been ignored", | ||||
name) | name) | ||||
else: | else: | ||||
branches[name] = branch_target | branches[name] = branch_target | ||||
snapshot_data = { | snapshot_data = { | ||||
'branches': branches | 'branches': branches | ||||
} | } | ||||
logger.debug('snapshot: %s', snapshot_data) | logger.debug('snapshot: %s', snapshot_data) | ||||
snapshot = Snapshot.from_dict(snapshot_data) | snapshot = Snapshot.from_dict(snapshot_data) | ||||
logger.debug('snapshot: %s', snapshot) | logger.debug('snapshot: %s', snapshot) | ||||
self.storage.snapshot_add([snapshot]) | self.storage.snapshot_add([snapshot]) | ||||
return snapshot | return snapshot |
I think we could be in trouble if this method raises an exception. So, maybe we could instead get extra_branches at the beginning of the load method, before loading any revisions: we are then sure to have extra_branches and we are able to catch exceptions of this method.