Page MenuHomeSoftware Heritage

D2860.id10202.diff
No OneTemporary

D2860.id10202.diff

diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py
--- a/swh/loader/package/deposit/loader.py
+++ b/swh/loader/package/deposit/loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -42,13 +42,7 @@
self.deposit_id = deposit_id
self.client = ApiClient(url=config_deposit['url'],
auth=config_deposit['auth'])
- self._metadata = None
-
- @property
- def metadata(self):
- if self._metadata is None:
- self._metadata = self.client.metadata_get(self.deposit_id)
- return self._metadata
+ self.metadata: Dict[str, Any] = {}
def get_versions(self) -> Sequence[str]:
# only 1 branch 'HEAD' with no alias since we only have 1 snapshot
@@ -103,7 +97,13 @@
)
def load(self) -> Dict:
- # Usual loading
+ # First making sure the deposit is known prior to trigger a loading
+ try:
+ self.metadata = self.client.metadata_get(self.deposit_id)
+ except ValueError:
+ logger.error(f'Unknown deposit {self.deposit_id}, ignoring')
+ return {'status': 'failed'}
+ # Then usual loading
r = super().load()
success = r['status'] != 'failed'
diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py
--- a/swh/loader/package/deposit/tests/test_deposit.py
+++ b/swh/loader/package/deposit/tests/test_deposit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -26,13 +26,15 @@
assert loader.client.base_url == swh_loader_config['deposit']['url']
-def test_deposit_loading_failure_to_fetch_metadata(swh_config):
- """Error during fetching artifact ends us with failed/partial visit
+def test_deposit_loading_unknown_deposit(
+ swh_config, requests_mock_datadir):
+ """Loading an unknown deposit should fail
+ no origin, no visit, no snapshot
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = 'some-url'
- unknown_deposit_id = 666
+ unknown_deposit_id = 667
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
@@ -43,8 +45,8 @@
assert {
'content': 0,
'directory': 0,
- 'origin': 1,
- 'origin_visit': 1,
+ 'origin': 0,
+ 'origin_visit': 0,
'person': 0,
'release': 0,
'revision': 0,
@@ -52,10 +54,6 @@
'snapshot': 0,
} == stats
- origin_visit = next(loader.storage.origin_visit_get(url))
- assert origin_visit['status'] == 'partial'
- assert origin_visit['type'] == 'deposit'
-
requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
'https://deposit.softwareheritage.org/1/private/666/raw/',
diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py
--- a/swh/loader/package/loader.py
+++ b/swh/loader/package/loader.py
@@ -280,10 +280,6 @@
known_artifacts = self.known_artifacts(last_snapshot)
logger.debug('known artifacts: %s', known_artifacts)
- # Retrieve the default release version (the "latest" one)
- default_version = self.get_default_version()
- logger.debug('default version: %s', default_version)
-
for version in self.get_versions(): # for each
logger.debug('version: %s', version)
tmp_revisions[version] = []
@@ -304,20 +300,26 @@
tmp_revisions[version].append((branch_name, revision_id))
- snapshot = self._load_snapshot(default_version, tmp_revisions)
- if hasattr(self.storage, 'flush'):
- self.storage.flush()
except Exception:
logger.exception('Fail to load %s' % self.url)
status_visit = 'partial'
status_load = 'failed'
finally:
+ # Retrieve the default release version (the "latest" one)
+ default_version = self.get_default_version()
+ logger.debug('default version: %s', default_version)
+ extra_branches = self.extra_branches()
+ logger.debug('extra branches: %s', extra_branches)
+ snapshot = self._load_snapshot(
+ default_version, tmp_revisions, extra_branches)
+ if hasattr(self.storage, 'flush'):
+ self.storage.flush()
self.storage.origin_visit_update(
origin=self.url, visit_id=visit.visit, status=status_visit,
snapshot=snapshot and snapshot.id)
- result = {
+ result: Dict[str, Any] = {
'status': status_load,
- } # type: Dict[str, Any]
+ }
if snapshot:
result['snapshot_id'] = hash_to_hex(snapshot.id)
return result
@@ -402,9 +404,11 @@
def _load_snapshot(
self, default_version: str,
- revisions: Dict[str, List[Tuple[str, bytes]]]) -> Snapshot:
- """Build snapshot out of the current revisions stored. Then load it in
- the storage.
+ revisions: Dict[str, List[Tuple[str, bytes]]],
+ extra_branches: Dict[bytes, Mapping[str, Any]]
+ ) -> Optional[Snapshot]:
+ """Build snapshot out of the current revisions stored and extra branches.
+ Then load it in the storage.
"""
logger.debug('revisions: %s', revisions)
@@ -430,7 +434,7 @@
}
# Deal with extra-branches
- for name, branch_target in self.extra_branches().items():
+ for name, branch_target in extra_branches.items():
if name in branches:
logger.error("Extra branch '%s' has been ignored",
name)
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -233,14 +233,16 @@
# {visit: partial, status: uneventful, no snapshot}
-def test_release_with_traceback(swh_config):
+def test_release_with_traceback(swh_config, requests_mock_datadir):
url = 'https://pypi.org/project/0805nexter'
- with patch('swh.loader.package.pypi.loader.PyPILoader.get_default_version',
- side_effect=ValueError('Problem')):
+ with patch('swh.loader.package.pypi.loader.PyPILoader.last_snapshot',
+ side_effect=ValueError('Fake problem to fail the visit')):
loader = PyPILoader(url)
actual_load_status = loader.load()
- assert actual_load_status == {'status': 'failed'}
+ assert actual_load_status['status'] == 'failed'
+ assert actual_load_status[
+ 'snapshot_id'] == '1a8893e6a86f444e8be8e7bda6cb34fb1735a00e'
stats = get_stats(loader.storage)
@@ -253,7 +255,7 @@
'release': 0,
'revision': 0,
'skipped_content': 0,
- 'snapshot': 0,
+ 'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:23 PM (6 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3215751

Event Timeline