diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -42,13 +42,7 @@ self.deposit_id = deposit_id self.client = ApiClient(url=config_deposit['url'], auth=config_deposit['auth']) - self._metadata = None - - @property - def metadata(self): - if self._metadata is None: - self._metadata = self.client.metadata_get(self.deposit_id) - return self._metadata + self.metadata: Dict[str, Any] = {} def get_versions(self) -> Sequence[str]: # only 1 branch 'HEAD' with no alias since we only have 1 snapshot @@ -103,7 +97,13 @@ ) def load(self) -> Dict: - # Usual loading + # First making sure the deposit is known prior to trigger a loading + try: + self.metadata = self.client.metadata_get(self.deposit_id) + except ValueError: + logger.error(f'Unknown deposit {self.deposit_id}, ignoring') + return {'status': 'failed'} + # Then usual loading r = super().load() success = r['status'] != 'failed' diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -26,13 +26,15 @@ assert loader.client.base_url == swh_loader_config['deposit']['url'] -def test_deposit_loading_failure_to_fetch_metadata(swh_config): - """Error during fetching artifact ends us with failed/partial visit +def test_deposit_loading_unknown_deposit( + swh_config, requests_mock_datadir): + """Loading an unknown deposit should fail + no origin, no visit, no snapshot """ # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' url = 'some-url' - unknown_deposit_id = 666 + unknown_deposit_id = 667 loader = DepositLoader(url, unknown_deposit_id) # does not exist actual_load_status = loader.load() @@ -43,8 +45,8 @@ assert { 'content': 0, 'directory': 0, - 'origin': 1, - 'origin_visit': 1, + 'origin': 0, + 'origin_visit': 0, 'person': 0, 'release': 0, 'revision': 0, @@ -52,10 +54,6 @@ 'snapshot': 0, } == stats - origin_visit = next(loader.storage.origin_visit_get(url)) - assert origin_visit['status'] == 'partial' - assert origin_visit['type'] == 'deposit' - requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[ 'https://deposit.softwareheritage.org/1/private/666/raw/', diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -280,10 +280,6 @@ known_artifacts = self.known_artifacts(last_snapshot) logger.debug('known artifacts: %s', known_artifacts) - # Retrieve the default release version (the "latest" one) - default_version = self.get_default_version() - logger.debug('default version: %s', default_version) - for version in self.get_versions(): # for each logger.debug('version: %s', version) tmp_revisions[version] = [] @@ -304,20 +300,26 @@ tmp_revisions[version].append((branch_name, revision_id)) - snapshot = self._load_snapshot(default_version, tmp_revisions) - if hasattr(self.storage, 'flush'): - self.storage.flush() except Exception: logger.exception('Fail to load %s' % self.url) status_visit = 'partial' status_load = 'failed' finally: + # Retrieve the default release version (the "latest" one) + default_version = self.get_default_version() + logger.debug('default version: %s', default_version) + extra_branches = self.extra_branches() + logger.debug('extra branches: %s', extra_branches) + snapshot = self._load_snapshot( + default_version, tmp_revisions, extra_branches) + if hasattr(self.storage, 'flush'): + self.storage.flush() self.storage.origin_visit_update( origin=self.url, visit_id=visit.visit, status=status_visit, snapshot=snapshot and snapshot.id) - result = { + result: Dict[str, Any] = { 'status': status_load, - } # type: Dict[str, Any] + } if snapshot: result['snapshot_id'] = hash_to_hex(snapshot.id) return result @@ -402,9 +404,11 @@ def _load_snapshot( self, default_version: str, - revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot: - """Build snapshot out of the current revisions stored. Then load it in - the storage. + revisions: Dict[str, List[Tuple[str, bytes]]], + extra_branches: Dict[bytes, Mapping[str, Any]] + ) -> Optional[Snapshot]: + """Build snapshot out of the current revisions stored and extra branches. + Then load it in the storage. """ logger.debug('revisions: %s', revisions) @@ -430,7 +434,7 @@ } # Deal with extra-branches - for name, branch_target in self.extra_branches().items(): + for name, branch_target in extra_branches.items(): if name in branches: logger.error("Extra branch '%s' has been ignored", name) diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -233,14 +233,16 @@ # {visit: partial, status: uneventful, no snapshot} -def test_release_with_traceback(swh_config): +def test_release_with_traceback(swh_config, requests_mock_datadir): url = 'https://pypi.org/project/0805nexter' - with patch('swh.loader.package.pypi.loader.PyPILoader.get_default_version', - side_effect=ValueError('Problem')): + with patch('swh.loader.package.pypi.loader.PyPILoader.last_snapshot', + side_effect=ValueError('Fake problem to fail the visit')): loader = PyPILoader(url) actual_load_status = loader.load() - assert actual_load_status == {'status': 'failed'} + assert actual_load_status['status'] == 'failed' + assert actual_load_status[ + 'snapshot_id'] == '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e' stats = get_stats(loader.storage) @@ -253,7 +255,7 @@ 'release': 0, 'revision': 0, 'skipped_content': 0, - 'snapshot': 0, + 'snapshot': 1, } == stats origin_visit = next(loader.storage.origin_visit_get(url))