Changeset View
Standalone View
swh/loader/package/debian/loader.py
Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | class DebianLoader(PackageLoader): | ||||
def get_package_info(self, version: str) -> Generator[ | def get_package_info(self, version: str) -> Generator[ | ||||
Tuple[str, Mapping[str, Any]], None, None]: | Tuple[str, Mapping[str, Any]], None, None]: | ||||
meta = self.packages[version] | meta = self.packages[version] | ||||
p_info = meta.copy() | p_info = meta.copy() | ||||
p_info['raw'] = meta | p_info['raw'] = meta | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_package_artifacts: Dict, artifact_metadata: Dict) \ | self, known_package_artifacts: Mapping, | ||||
artifact_metadata: Mapping[str, Any]) \ | |||||
-> Optional[bytes]: | -> Optional[bytes]: | ||||
try: | |||||
ardumont: docstring is in the main class [1]
i do not know how we can deal with that properly without… | |||||
artifacts_to_fetch = artifact_metadata['files'] | artifacts_to_fetch = artifact_metadata['files'] | ||||
logger.debug('k_p_artifacts: %s', known_package_artifacts) | logger.debug('k_p_artifacts: %s', known_package_artifacts) | ||||
logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch) | logger.debug('artifacts_to_fetch: %s', artifacts_to_fetch) | ||||
for rev_id, known_artifacts in known_package_artifacts.items(): | for rev_id, known_artifacts in known_package_artifacts.items(): | ||||
logger.debug('Revision: %s', rev_id) | logger.debug('Revision: %s', rev_id) | ||||
logger.debug('Associated known_artifacts: %s', known_artifacts) | logger.debug('Associated known_artifacts: %s', known_artifacts) | ||||
Not Done Inline Actionsthese lines are backwards olasd: these lines are backwards | |||||
known_artifacts = known_artifacts['extrinsic']['raw']['files'] | known_artifacts = known_artifacts['extrinsic']['raw']['files'] | ||||
rev_found = True | rev_found = False | ||||
Done Inline Actionsthis i found backward... ardumont: this i found backward... | |||||
for a_name, k_artifact in known_artifacts.items(): | for a_name, k_artifact in known_artifacts.items(): | ||||
artifact_to_fetch = artifacts_to_fetch.get(a_name) | artifact_to_fetch = artifacts_to_fetch.get(a_name) | ||||
logger.debug('artifact_to_fetch: %s', artifact_to_fetch) | logger.debug('artifact_to_fetch: %s', artifact_to_fetch) | ||||
if artifact_to_fetch is None: | if artifact_to_fetch is None: | ||||
# as soon as we do not see an artifact, we consider we need | # as soon as we do not see an artifact, we consider we | ||||
# to check the other revision | # need to check the other revision | ||||
rev_found = False | continue | ||||
if k_artifact['sha256'] != artifact_to_fetch['sha256']: | if k_artifact['sha256'] == artifact_to_fetch['sha256']: | ||||
# Hash is different, we consider we need to check the other | # Hash is different, we consider we need to check the | ||||
# revisions | # other revisions | ||||
rev_found = False | rev_found = True | ||||
Not Done Inline ActionsIt looks like what we want is to match the artifacts of the current package with all packages referenced in the previous snapshot in turn. I _think_ that the logic in this loop is inverted, but I have a hard time comprehending it. The comments are backwards too :( The logic would be much clearer if we did the comparison using set logic:
We could even make these sets ordered by filename, which lets us turn them into (hashable) tuples, which lets us build a dict mapping ((filename, (hash, hash, size)), ...) to revision_ids directly when fetching the previous snapshot, making this function a plain dict lookup. olasd: It looks like what we want is to match the artifacts of the current package with all packages… | |||||
break | |||||
if rev_found: | if rev_found: | ||||
logger.debug('Existing revision %s found for new artifacts.', | logger.debug( | ||||
'Existing revision %s found for new artifacts.', | |||||
rev_id) | rev_id) | ||||
return rev_id | return rev_id | ||||
except KeyError as e: | |||||
logger.warning( | |||||
Not Done Inline ActionsThis try block feels overly broad. Could you just do if 'files' not in artifact_metadata: logger.warning(...) return None # or even artifact_metadata.get('revision_id') at the beginning of the function instead? olasd: This try block feels overly broad.
Could you just do
```lang=python
if 'files' not in… | |||||
Done Inline Actionsthat's not enough that can explode a bit later. ardumont: that's not enough
that can explode a bit later. | |||||
Not Done Inline ActionsI agree with @olasd that this try block is too long. There's a high risk of catching unrelated error. vlorentz: I agree with @olasd that this try block is too long. There's a high risk of catching unrelated… | |||||
Done Inline Actionsi agree the try is long. So it should fail only for that. ardumont: i agree the try is long.
But here, all this shenanigans is to solve a revision from metadata… | |||||
Done Inline Actions
as in: 8d073f5dc6c8b7a322618c35'}}, 'version': '0.3.9-2', 'revision_id': None}} Dec 19 14:58:02 worker01 python3[557]: [2019-12-19 14:58:02,443: ERROR/ForkPoolWorker-1] Fail to load deb://Debian/packages/freshplayerplugin Traceback (most recent call last): File "/usr/lib/python3/dist-packages/swh/loader/package/loader.py", line 286, in load known_artifacts, p_info['raw']) File "/usr/lib/python3/dist-packages/swh/loader/package/debian/loader.py", line 104, in resolve_revision_from known_artifacts = known_artifacts['extrinsic']['raw']['files'] KeyError: 'extrinsic' ardumont: > that can explode a bit later
as in:
```
8d073f5dc6c8b7a322618c35'}}, 'version': '0.3.9-2'… | |||||
Not Done Inline ActionsOne more argument for parsing these only once when fetching the old snapshot (see my other inline comment). ;) olasd: One more argument for parsing these only once when fetching the old snapshot (see my other… | |||||
'Older revision format detected, skipping. Reason: %s', e) | |||||
logger.debug('No existing revision found for the new artifacts.') | logger.debug('No existing revision found for the new artifacts.') | ||||
return None | return None | ||||
def download_package(self, p_info: Mapping[str, Any], | def download_package(self, p_info: Mapping[str, Any], | ||||
tmpdir: str) -> List[Tuple[str, Mapping]]: | tmpdir: str) -> List[Tuple[str, Mapping]]: | ||||
"""Contrary to other package loaders (1 package, 1 artifact), | """Contrary to other package loaders (1 package, 1 artifact), | ||||
`a_metadata` represents the package's datafiles set to fetch: | `a_metadata` represents the package's datafiles set to fetch: | ||||
- <package-version>.orig.tar.gz | - <package-version>.orig.tar.gz | ||||
▲ Show 20 Lines • Show All 270 Lines • Show Last 20 Lines |
docstring is in the main class [1]
i do not know how we can deal with that properly without replicating stuff.
[1] https://forge.softwareheritage.org/source/swh-loader-core/browse/master/swh/loader/package/loader.py$0-158