diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -103,7 +103,13 @@ ) def load(self) -> Dict: - # Usual loading + # First making sure the deposit is known prior to trigger a loading + try: + self.metadata + except ValueError: + logger.error(f'Unknown deposit {self.deposit_id}, ignoring') + return {'status': 'failed'} + # Then usual loading r = super().load() success = r['status'] != 'failed' diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -26,13 +26,15 @@ assert loader.client.base_url == swh_loader_config['deposit']['url'] -def test_deposit_loading_failure_to_fetch_metadata(swh_config): - """Error during fetching artifact ends us with failed/partial visit +def test_deposit_loading_unknown_deposit( + swh_config, requests_mock_datadir): + """Loading an unknown deposit should fail + no origin, no visit, no snapshot """ # private api url form: 'https://deposit.s.o/1/private/hal/666/raw/' url = 'some-url' - unknown_deposit_id = 666 + unknown_deposit_id = 667 loader = DepositLoader(url, unknown_deposit_id) # does not exist actual_load_status = loader.load() @@ -43,8 +45,8 @@ assert { 'content': 0, 'directory': 0, - 'origin': 1, - 'origin_visit': 1, + 'origin': 0, + 'origin_visit': 0, 'person': 0, 'release': 0, 'revision': 0, @@ -52,10 +54,6 @@ 'snapshot': 0, } == stats - origin_visit = next(loader.storage.origin_visit_get(url)) - assert origin_visit['status'] == 'partial' - assert origin_visit['type'] == 'deposit' - requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[ 'https://deposit.softwareheritage.org/1/private/666/raw/', diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -280,10 +280,6 @@ known_artifacts = self.known_artifacts(last_snapshot) logger.debug('known artifacts: %s', known_artifacts) - # Retrieve the default release version (the "latest" one) - default_version = self.get_default_version() - logger.debug('default version: %s', default_version) - for version in self.get_versions(): # for each logger.debug('version: %s', version) tmp_revisions[version] = [] @@ -304,20 +300,21 @@ tmp_revisions[version].append((branch_name, revision_id)) - snapshot = self._load_snapshot(default_version, tmp_revisions) - if hasattr(self.storage, 'flush'): - self.storage.flush() except Exception: logger.exception('Fail to load %s' % self.url) status_visit = 'partial' status_load = 'failed' finally: + if tmp_revisions: + snapshot = self._load_snapshot(tmp_revisions) + if hasattr(self.storage, 'flush'): + self.storage.flush() self.storage.origin_visit_update( origin=self.url, visit_id=visit.visit, status=status_visit, snapshot=snapshot and snapshot.id) - result = { + result: Dict[str, Any] = { 'status': status_load, - } # type: Dict[str, Any] + } if snapshot: result['snapshot_id'] = hash_to_hex(snapshot.id) return result @@ -401,12 +398,14 @@ return (revision.id, True) def _load_snapshot( - self, default_version: str, - revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot: + self, revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot: """Build snapshot out of the current revisions stored. Then load it in the storage. """ + # Retrieve the default release version (the "latest" one) + default_version = self.get_default_version() + logger.debug('default version: %s', default_version) logger.debug('revisions: %s', revisions) # Build and load the snapshot branches = {} # type: Dict[bytes, Mapping[str, Any]] diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -233,10 +233,10 @@ # {visit: partial, status: uneventful, no snapshot} -def test_release_with_traceback(swh_config): +def test_release_with_traceback(swh_config, ): url = 'https://pypi.org/project/0805nexter' - with patch('swh.loader.package.pypi.loader.PyPILoader.get_default_version', - side_effect=ValueError('Problem')): + with patch('swh.loader.package.pypi.loader.PyPILoader.last_snapshot', + side_effect=ValueError('Fake problem to fail the visit')): loader = PyPILoader(url) actual_load_status = loader.load()