Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 101 Lines • ▼ Show 20 Lines | def build_revision( | ||||
SWH data dict | SWH data dict | ||||
""" | """ | ||||
raise NotImplementedError('build_revision') | raise NotImplementedError('build_revision') | ||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||
"""Retrieve the latest release version if any. | """Retrieve the latest release version if any. | ||||
This method *must* not raise exception. | |||||
Returns: | Returns: | ||||
Latest version | Latest version | ||||
""" | """ | ||||
return '' | return '' | ||||
def last_snapshot(self) -> Optional[Snapshot]: | def last_snapshot(self) -> Optional[Snapshot]: | ||||
"""Retrieve the last snapshot | """Retrieve the last snapshot | ||||
▲ Show 20 Lines • Show All 91 Lines • ▼ Show 20 Lines | def uncompress(self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], | ||||
for a_path, _ in dl_artifacts: | for a_path, _ in dl_artifacts: | ||||
uncompress(a_path, dest=uncompressed_path) | uncompress(a_path, dest=uncompressed_path) | ||||
return uncompressed_path | return uncompressed_path | ||||
def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: | def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: | ||||
"""Return an extra dict of branches that are used to update the set of | """Return an extra dict of branches that are used to update the set of | ||||
branches. | branches. | ||||
This method *must* not raise exception. | |||||
""" | """ | ||||
return {} | return {} | ||||
def load(self) -> Dict: | def load(self) -> Dict: | ||||
"""Load for a specific origin the associated contents. | """Load for a specific origin the associated contents. | ||||
for each package version of the origin | for each package version of the origin | ||||
▲ Show 20 Lines • Show All 50 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
return {'status': 'failed'} | return {'status': 'failed'} | ||||
try: | try: | ||||
last_snapshot = self.last_snapshot() | last_snapshot = self.last_snapshot() | ||||
logger.debug('last snapshot: %s', last_snapshot) | logger.debug('last snapshot: %s', last_snapshot) | ||||
known_artifacts = self.known_artifacts(last_snapshot) | known_artifacts = self.known_artifacts(last_snapshot) | ||||
logger.debug('known artifacts: %s', known_artifacts) | logger.debug('known artifacts: %s', known_artifacts) | ||||
# Retrieve the default release version (the "latest" one) | |||||
default_version = self.get_default_version() | |||||
logger.debug('default version: %s', default_version) | |||||
for version in self.get_versions(): # for each | for version in self.get_versions(): # for each | ||||
logger.debug('version: %s', version) | logger.debug('version: %s', version) | ||||
tmp_revisions[version] = [] | tmp_revisions[version] = [] | ||||
# `p_` stands for `package_` | # `p_` stands for `package_` | ||||
for branch_name, p_info in self.get_package_info(version): | for branch_name, p_info in self.get_package_info(version): | ||||
logger.debug('package_info: %s', p_info) | logger.debug('package_info: %s', p_info) | ||||
revision_id = self.resolve_revision_from( | revision_id = self.resolve_revision_from( | ||||
known_artifacts, p_info['raw']) | known_artifacts, p_info['raw']) | ||||
if revision_id is None: | if revision_id is None: | ||||
(revision_id, loaded) = \ | (revision_id, loaded) = \ | ||||
self._load_revision(p_info, origin) | self._load_revision(p_info, origin) | ||||
if loaded: | if loaded: | ||||
status_load = 'eventful' | status_load = 'eventful' | ||||
else: | else: | ||||
status_visit = 'partial' | status_visit = 'partial' | ||||
if revision_id is None: | if revision_id is None: | ||||
continue | continue | ||||
tmp_revisions[version].append((branch_name, revision_id)) | tmp_revisions[version].append((branch_name, revision_id)) | ||||
snapshot = self._load_snapshot(default_version, tmp_revisions) | |||||
if hasattr(self.storage, 'flush'): | |||||
self.storage.flush() | |||||
except Exception: | except Exception: | ||||
logger.exception('Fail to load %s' % self.url) | logger.exception('Fail to load %s' % self.url) | ||||
status_visit = 'partial' | status_visit = 'partial' | ||||
status_load = 'failed' | status_load = 'failed' | ||||
finally: | finally: | ||||
lewo: I think we could be in trouble if this method raises an exception. So, maybe we could instead… | |||||
Done Inline ActionsLet's make the functional loader resilient to this and not the loader one. (i think try except within its own implementation). Same goes for default version by the way. ardumont: Let's make the functional loader resilient to this and not the loader one.
Since there is only… | |||||
# Retrieve the default release version (the "latest" one) | |||||
Done Inline Actions^ D2769 makes me wonder... ardumont: ^ D2769 makes me wonder... | |||||
Done Inline Actionsnvm that comment now, it's no longer relevant. ardumont: nvm that comment now, it's no longer relevant. | |||||
default_version = self.get_default_version() | |||||
logger.debug('default version: %s', default_version) | |||||
extra_branches = self.extra_branches() | |||||
logger.debug('extra branches: %s', extra_branches) | |||||
snapshot = self._load_snapshot( | |||||
default_version, tmp_revisions, extra_branches) | |||||
if hasattr(self.storage, 'flush'): | |||||
self.storage.flush() | |||||
self.storage.origin_visit_update( | self.storage.origin_visit_update( | ||||
origin=self.url, visit_id=visit.visit, status=status_visit, | origin=self.url, visit_id=visit.visit, status=status_visit, | ||||
snapshot=snapshot and snapshot.id) | snapshot=snapshot and snapshot.id) | ||||
result = { | result: Dict[str, Any] = { | ||||
'status': status_load, | 'status': status_load, | ||||
} # type: Dict[str, Any] | } | ||||
if snapshot: | if snapshot: | ||||
result['snapshot_id'] = hash_to_hex(snapshot.id) | result['snapshot_id'] = hash_to_hex(snapshot.id) | ||||
return result | return result | ||||
def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | ||||
"""Does all the loading of a revision itself: | """Does all the loading of a revision itself: | ||||
* downloads a package and uncompresses it | * downloads a package and uncompresses it | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: | ||||
logger.debug('Revision: %s', revision) | logger.debug('Revision: %s', revision) | ||||
self.storage.revision_add([revision]) | self.storage.revision_add([revision]) | ||||
return (revision.id, True) | return (revision.id, True) | ||||
def _load_snapshot( | def _load_snapshot( | ||||
self, default_version: str, | self, default_version: str, | ||||
revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot: | revisions: Dict[str, List[Tuple[str, bytes]]], | ||||
"""Build snapshot out of the current revisions stored. Then load it in | extra_branches: Dict[bytes, Mapping[str, Any]] | ||||
Done Inline ActionsIt'd be great the types (revisions. extra_branches) were a tad unified here. ardumont: It'd be great the types (revisions. extra_branches) were a tad unified here.
But let's keep… | |||||
the storage. | ) -> Optional[Snapshot]: | ||||
"""Build snapshot out of the current revisions stored and extra branches. | |||||
Then load it in the storage. | |||||
""" | """ | ||||
if not revisions and not extra_branches: | |||||
return None | |||||
olasdUnsubmitted Not Done Inline ActionsIsn't there some edge cases where generating an empty snapshot is the right thing to do? olasd: Isn't there some edge cases where generating an empty snapshot is the right thing to do? | |||||
ardumontAuthorUnsubmitted Done Inline Actionsno idea. That'd simplify but i don't know what's better. ardumont: no idea.
That'd simplify but i don't know what's better. | |||||
logger.debug('revisions: %s', revisions) | logger.debug('revisions: %s', revisions) | ||||
# Build and load the snapshot | # Build and load the snapshot | ||||
branches = {} # type: Dict[bytes, Mapping[str, Any]] | branches = {} # type: Dict[bytes, Mapping[str, Any]] | ||||
for version, branch_name_revisions in revisions.items(): | for version, branch_name_revisions in revisions.items(): | ||||
if version == default_version and \ | if version == default_version and \ | ||||
len(branch_name_revisions) == 1: | len(branch_name_revisions) == 1: | ||||
# only 1 branch (no ambiguity), we can create an alias | # only 1 branch (no ambiguity), we can create an alias | ||||
# branch 'HEAD' | # branch 'HEAD' | ||||
branch_name, _ = branch_name_revisions[0] | branch_name, _ = branch_name_revisions[0] | ||||
# except for some corner case (deposit) | # except for some corner case (deposit) | ||||
if branch_name != 'HEAD': | if branch_name != 'HEAD': | ||||
branches[b'HEAD'] = { | branches[b'HEAD'] = { | ||||
'target_type': 'alias', | 'target_type': 'alias', | ||||
'target': branch_name.encode('utf-8'), | 'target': branch_name.encode('utf-8'), | ||||
} | } | ||||
for branch_name, target in branch_name_revisions: | for branch_name, target in branch_name_revisions: | ||||
branches[branch_name.encode('utf-8')] = { | branches[branch_name.encode('utf-8')] = { | ||||
'target_type': 'revision', | 'target_type': 'revision', | ||||
'target': target, | 'target': target, | ||||
} | } | ||||
# Deal with extra-branches | # Deal with extra-branches | ||||
for name, branch_target in self.extra_branches().items(): | for name, branch_target in extra_branches.items(): | ||||
if name in branches: | if name in branches: | ||||
logger.error("Extra branch '%s' has been ignored", | logger.error("Extra branch '%s' has been ignored", | ||||
name) | name) | ||||
else: | else: | ||||
branches[name] = branch_target | branches[name] = branch_target | ||||
snapshot_data = { | snapshot_data = { | ||||
'branches': branches | 'branches': branches | ||||
} | } | ||||
logger.debug('snapshot: %s', snapshot_data) | logger.debug('snapshot: %s', snapshot_data) | ||||
snapshot = Snapshot.from_dict(snapshot_data) | snapshot = Snapshot.from_dict(snapshot_data) | ||||
logger.debug('snapshot: %s', snapshot) | logger.debug('snapshot: %s', snapshot) | ||||
self.storage.snapshot_add([snapshot]) | self.storage.snapshot_add([snapshot]) | ||||
return snapshot | return snapshot |
I think we could be in trouble if this method raises an exception. So, maybe we could instead get extra_branches at the beginning of the load method, before loading any revisions: we are then sure to have extra_branches and we are able to catch exceptions of this method.