diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -88,6 +88,8 @@ } == stats origin_visit = next(loader.storage.origin_visit_get(url)) + print(origin_visit) + print(loader.storage.snapshot_get(origin_visit['snapshot'])) assert origin_visit['status'] == 'partial' assert origin_visit['type'] == 'deposit' diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -268,6 +268,20 @@ tmp_revisions = {} # type: Dict[str, List] snapshot = None + def finalize_visit() -> Dict[str, Any]: + if hasattr(self.storage, 'flush'): + self.storage.flush() + self.storage.origin_visit_update( + origin=self.url, visit_id=visit.visit, status=status_visit, + snapshot=snapshot and snapshot.id) + + result: Dict[str, Any] = { + 'status': status_load, + } + if snapshot: + result['snapshot_id'] = hash_to_hex(snapshot.id) + return result + # Prepare origin and origin_visit origin = Origin(url=self.url) try: @@ -275,7 +289,8 @@ visit = self.storage.origin_visit_add( self.url, date=self.visit_date, type=self.visit_type) except Exception: - logger.exception('Failed to create origin/origin_visit:') + logger.exception('Failed to initialize origin_visit for %s', + self.url) return {'status': 'failed'} try: @@ -283,50 +298,68 @@ logger.debug('last snapshot: %s', last_snapshot) known_artifacts = self.known_artifacts(last_snapshot) logger.debug('known artifacts: %s', known_artifacts) + except Exception: + logger.exception('Failed to get previous state for %s', self.url) + status_visit = 'partial' + status_load = 'failed' + return finalize_visit() + + load_exceptions = [] + + for version in self.get_versions(): # for each + logger.debug('version: %s', version) + tmp_revisions[version] = [] + # `p_` stands for `package_` + for branch_name, p_info in self.get_package_info(version): + logger.debug('package_info: %s', p_info) + revision_id = self.resolve_revision_from( + known_artifacts, p_info['raw']) + if revision_id is None: + try: + (revision_id, loaded) = self._load_revision(p_info, + origin) + except Exception as e: + load_exceptions.append(e) + logger.exception('Failed loading branch %s for %s', + branch_name, self.url) + continue + + if loaded: + status_load = 'eventful' - for version in self.get_versions(): # for each - logger.debug('version: %s', version) - tmp_revisions[version] = [] - # `p_` stands for `package_` - for branch_name, p_info in self.get_package_info(version): - logger.debug('package_info: %s', p_info) - revision_id = self.resolve_revision_from( - known_artifacts, p_info['raw']) if revision_id is None: - (revision_id, loaded) = \ - self._load_revision(p_info, origin) - if loaded: - status_load = 'eventful' - else: - status_visit = 'partial' - if revision_id is None: - continue + continue - tmp_revisions[version].append((branch_name, revision_id)) + tmp_revisions[version].append((branch_name, revision_id)) - except Exception: - logger.exception('Fail to load %s' % self.url) + if load_exceptions: status_visit = 'partial' + + if not tmp_revisions: + # We could not load any revisions; fail completely + status_visit = 'failed' status_load = 'failed' - finally: + return finalize_visit() + + try: # Retrieve the default release version (the "latest" one) default_version = self.get_default_version() logger.debug('default version: %s', default_version) + + # Retrieve extra branches extra_branches = self.extra_branches() logger.debug('extra branches: %s', extra_branches) - snapshot = self._load_snapshot( - default_version, tmp_revisions, extra_branches) - if hasattr(self.storage, 'flush'): - self.storage.flush() - self.storage.origin_visit_update( - origin=self.url, visit_id=visit.visit, status=status_visit, - snapshot=snapshot and snapshot.id) - result: Dict[str, Any] = { - 'status': status_load, - } - if snapshot: - result['snapshot_id'] = hash_to_hex(snapshot.id) - return result + + snapshot = self._load_snapshot(default_version, tmp_revisions, + extra_branches) + + except Exception: + logger.exception('Failed to build snapshot for origin %s', + self.url) + status_visit = 'partial' + status_load = 'failed' + + return finalize_visit() def _load_revision(self, p_info, origin) -> Tuple[Optional[Sha1Git], bool]: """Does all the loading of a revision itself: