Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/debian/loader.py
# Copyright (C) 2017-2021 The Software Heritage developers | # Copyright (C) 2017-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import email.utils | import email.utils | ||||
import logging | import logging | ||||
from os import path | from os import path | ||||
import re | import re | ||||
import subprocess | import subprocess | ||||
from typing import ( | from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple | ||||
Any, | |||||
Dict, | |||||
FrozenSet, | |||||
Iterator, | |||||
List, | |||||
Mapping, | |||||
Optional, | |||||
Sequence, | |||||
Tuple, | |||||
) | |||||
import attr | import attr | ||||
from dateutil.parser import parse as parse_date | from dateutil.parser import parse as parse_date | ||||
from debian.changelog import Changelog | from debian.changelog import Changelog | ||||
from debian.deb822 import Dsc | from debian.deb822 import Dsc | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import download, release_name | from swh.loader.package.utils import download, release_name | ||||
▲ Show 20 Lines • Show All 218 Lines • ▼ Show 20 Lines | ) -> Optional[bytes]: | ||||
visit) and the new artifact to fetch, try to solve the corresponding | visit) and the new artifact to fetch, try to solve the corresponding | ||||
revision. | revision. | ||||
""" | """ | ||||
artifacts_to_fetch = p_info.files | artifacts_to_fetch = p_info.files | ||||
if not artifacts_to_fetch: | if not artifacts_to_fetch: | ||||
return None | return None | ||||
def to_set(data: DebianPackageInfo) -> FrozenSet[Tuple[str, str, int]]: | new_dsc_files = [ | ||||
return frozenset( | file for (name, file) in p_info.files.items() if name.endswith(".dsc") | ||||
(name, meta.sha256, meta.size) for name, meta in data.files.items() | ] | ||||
if len(new_dsc_files) != 1: | |||||
raise ValueError( | |||||
f"Expected exactly one new .dsc file for package {p_info.name}, " | |||||
f"got {len(new_dsc_files)}" | |||||
) | ) | ||||
# what we want to avoid downloading back if we have them already | new_dsc_sha256 = new_dsc_files[0].sha256 | ||||
set_new_artifacts = to_set(p_info) | |||||
known_artifacts_revision_id = {} | |||||
for rev_id, known_artifacts in known_package_artifacts.items(): | for rev_id, known_artifacts in known_package_artifacts.items(): | ||||
extrinsic = known_artifacts.get("extrinsic") | extrinsic = known_artifacts.get("extrinsic") | ||||
if not extrinsic: | if not extrinsic: | ||||
continue | continue | ||||
s = to_set(DebianPackageInfo.from_metadata(extrinsic["raw"], url=p_info.url)) | known_p_info = DebianPackageInfo.from_metadata(extrinsic["raw"], url=p_info.url) | ||||
known_artifacts_revision_id[s] = rev_id | dsc = [ | ||||
file for (name, file) in known_p_info.files.items() if name.endswith(".dsc") | |||||
] | |||||
if len(dsc) != 1: | |||||
raise ValueError( | |||||
f"Expected exactly one known .dsc file for package {p_info.name}, " | |||||
f"got {len(dsc)}" | |||||
) | |||||
return known_artifacts_revision_id.get(set_new_artifacts) | if new_dsc_sha256 == dsc[0].sha256: | ||||
return rev_id | |||||
return None | |||||
def uid_to_person(uid: str) -> Dict[str, str]: | def uid_to_person(uid: str) -> Dict[str, str]: | ||||
"""Convert an uid to a person suitable for insertion. | """Convert an uid to a person suitable for insertion. | ||||
Args: | Args: | ||||
uid: an uid of the form "Name <email@ddress>" | uid: an uid of the form "Name <email@ddress>" | ||||
▲ Show 20 Lines • Show All 212 Lines • Show Last 20 Lines |