Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/debian/loader.py
Show All 11 Lines | |||||
import attr | import attr | ||||
from dateutil.parser import parse as parse_date | from dateutil.parser import parse as parse_date | ||||
from debian.changelog import Changelog | from debian.changelog import Changelog | ||||
from debian.deb822 import Dsc | from debian.deb822 import Dsc | ||||
from swh.loader.package.loader import BasePackageInfo, PackageLoader | from swh.loader.package.loader import BasePackageInfo, PackageLoader | ||||
from swh.loader.package.utils import download, release_name | from swh.loader.package.utils import download, release_name | ||||
from swh.model.hashutil import hash_to_bytes | |||||
from swh.model.model import ( | from swh.model.model import ( | ||||
Person, | Person, | ||||
Revision, | Revision, | ||||
RevisionType, | RevisionType, | ||||
Sha1Git, | Sha1Git, | ||||
TimestampWithTimezone, | TimestampWithTimezone, | ||||
) | ) | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
UPLOADERS_SPLIT = re.compile(r"(?<=\>)\s*,\s*") | UPLOADERS_SPLIT = re.compile(r"(?<=\>)\s*,\s*") | ||||
class DscCountError(ValueError): | |||||
"""Raised when an unexpected number of .dsc files is seen""" | |||||
pass | |||||
@attr.s | @attr.s | ||||
class DebianFileMetadata: | class DebianFileMetadata: | ||||
md5sum = attr.ib(type=str) | md5sum = attr.ib(type=str) | ||||
name = attr.ib(type=str) | name = attr.ib(type=str) | ||||
"""Filename""" | """Filename""" | ||||
sha256 = attr.ib(type=str) | sha256 = attr.ib(type=str) | ||||
size = attr.ib(type=int) | size = attr.ib(type=int) | ||||
uri = attr.ib(type=str) | uri = attr.ib(type=str) | ||||
Show All 28 Lines | def from_metadata(cls, a_metadata: Dict[str, Any], url: str) -> "DebianPackageInfo": | ||||
files={ | files={ | ||||
file_name: DebianFileMetadata(**file_metadata) | file_name: DebianFileMetadata(**file_metadata) | ||||
for (file_name, file_metadata) in a_metadata.get("files", {}).items() | for (file_name, file_metadata) in a_metadata.get("files", {}).items() | ||||
}, | }, | ||||
name=a_metadata["name"], | name=a_metadata["name"], | ||||
version=a_metadata["version"], | version=a_metadata["version"], | ||||
) | ) | ||||
def extid(self) -> Optional[bytes]: | |||||
dsc_files = [ | |||||
file for (name, file) in self.files.items() if name.endswith(".dsc") | |||||
] | |||||
if len(dsc_files) != 1: | |||||
raise DscCountError( | |||||
f"Expected exactly one .dsc file for package {self.name}, " | |||||
f"got {len(dsc_files)}" | |||||
) | |||||
return hash_to_bytes(dsc_files[0].sha256) | |||||
@attr.s | @attr.s | ||||
class IntrinsicPackageMetadata: | class IntrinsicPackageMetadata: | ||||
"""Metadata extracted from a package's .dsc file.""" | """Metadata extracted from a package's .dsc file.""" | ||||
name = attr.ib(type=str) | name = attr.ib(type=str) | ||||
version = attr.ib(type=str) | version = attr.ib(type=str) | ||||
changelog = attr.ib(type=DebianPackageChangelog) | changelog = attr.ib(type=DebianPackageChangelog) | ||||
▲ Show 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | def get_versions(self) -> Sequence[str]: | ||||
""" | """ | ||||
return list(self.packages.keys()) | return list(self.packages.keys()) | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: | ||||
meta = self.packages[version] | meta = self.packages[version] | ||||
p_info = DebianPackageInfo.from_metadata(meta, url=self.url) | p_info = DebianPackageInfo.from_metadata(meta, url=self.url) | ||||
yield release_name(version), p_info | yield release_name(version), p_info | ||||
def known_artifact_to_extid(self, known_artifact: Dict) -> Optional[bytes]: | |||||
sha256 = _artifact_to_dsc_sha256(known_artifact, url=self.url) | |||||
if sha256 is None: | |||||
return None | |||||
return hash_to_bytes(sha256) | |||||
def resolve_revision_from( | def resolve_revision_from( | ||||
self, known_package_artifacts: Mapping, p_info: DebianPackageInfo | self, known_artifacts: Dict, p_info: DebianPackageInfo, | ||||
) -> Optional[bytes]: | ) -> Optional[bytes]: | ||||
return resolve_revision_from(known_package_artifacts, p_info) | try: | ||||
return super().resolve_revision_from(known_artifacts, p_info) | |||||
except DscCountError: | |||||
# known_artifacts are corrupted, ignore them instead of crashing | |||||
return None | |||||
def download_package( | def download_package( | ||||
self, p_info: DebianPackageInfo, tmpdir: str | self, p_info: DebianPackageInfo, tmpdir: str | ||||
) -> List[Tuple[str, Mapping]]: | ) -> List[Tuple[str, Mapping]]: | ||||
"""Contrary to other package loaders (1 package, 1 artifact), | """Contrary to other package loaders (1 package, 1 artifact), | ||||
`p_info.files` represents the package's datafiles set to fetch: | `p_info.files` represents the package's datafiles set to fetch: | ||||
- <package-version>.orig.tar.gz | - <package-version>.orig.tar.gz | ||||
- <package-version>.dsc | - <package-version>.dsc | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | ) -> Optional[Revision]: | ||||
"provider": dsc_url, | "provider": dsc_url, | ||||
"when": self.visit_date.isoformat(), | "when": self.visit_date.isoformat(), | ||||
"raw": p_info.raw_info, | "raw": p_info.raw_info, | ||||
}, | }, | ||||
}, | }, | ||||
) | ) | ||||
def resolve_revision_from( | |||||
known_package_artifacts: Mapping, p_info: DebianPackageInfo | |||||
) -> Optional[bytes]: | |||||
"""Given known package artifacts (resolved from the snapshot of previous | |||||
visit) and the new artifact to fetch, try to solve the corresponding | |||||
revision. | |||||
""" | |||||
artifacts_to_fetch = p_info.files | |||||
if not artifacts_to_fetch: | |||||
return None | |||||
new_dsc_files = [ | |||||
file for (name, file) in p_info.files.items() if name.endswith(".dsc") | |||||
] | |||||
if len(new_dsc_files) != 1: | |||||
raise ValueError( | |||||
f"Expected exactly one new .dsc file for package {p_info.name}, " | |||||
f"got {len(new_dsc_files)}" | |||||
) | |||||
new_dsc_sha256 = new_dsc_files[0].sha256 | |||||
for rev_id, known_artifacts in known_package_artifacts.items(): | |||||
if new_dsc_sha256 == _artifact_to_dsc_sha256(known_artifacts, p_info.url): | |||||
return rev_id | |||||
return None | |||||
def _artifact_to_dsc_sha256(known_artifacts: Dict, url: str) -> Optional[str]: | def _artifact_to_dsc_sha256(known_artifacts: Dict, url: str) -> Optional[str]: | ||||
extrinsic = known_artifacts.get("extrinsic") | extrinsic = known_artifacts.get("extrinsic") | ||||
if not extrinsic: | if not extrinsic: | ||||
return None | return None | ||||
known_p_info = DebianPackageInfo.from_metadata(extrinsic["raw"], url=url) | known_p_info = DebianPackageInfo.from_metadata(extrinsic["raw"], url=url) | ||||
dsc = [file for (name, file) in known_p_info.files.items() if name.endswith(".dsc")] | dsc = [file for (name, file) in known_p_info.files.items() if name.endswith(".dsc")] | ||||
if len(dsc) != 1: | if len(dsc) != 1: | ||||
raise ValueError( | raise DscCountError( | ||||
f"Expected exactly one known .dsc file for package {known_p_info.name}, " | f"Expected exactly one known .dsc file for package {known_p_info.name}, " | ||||
f"got {len(dsc)}" | f"got {len(dsc)}" | ||||
) | ) | ||||
return dsc[0].sha256 | return dsc[0].sha256 | ||||
def uid_to_person(uid: str) -> Dict[str, str]: | def uid_to_person(uid: str) -> Dict[str, str]: | ||||
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | Returns: | ||||
Tuple of dsc file's uri, dsc's full disk path | Tuple of dsc file's uri, dsc's full disk path | ||||
""" | """ | ||||
dsc_name = None | dsc_name = None | ||||
dsc_url = None | dsc_url = None | ||||
for filename, fileinfo in p_info.files.items(): | for filename, fileinfo in p_info.files.items(): | ||||
if filename.endswith(".dsc"): | if filename.endswith(".dsc"): | ||||
if dsc_name: | if dsc_name: | ||||
raise ValueError( | raise DscCountError( | ||||
"Package %s_%s references several dsc files." | "Package %s_%s references several dsc files." | ||||
% (p_info.name, p_info.version) | % (p_info.name, p_info.version) | ||||
) | ) | ||||
dsc_url = fileinfo.uri | dsc_url = fileinfo.uri | ||||
dsc_name = filename | dsc_name = filename | ||||
return dsc_url, dsc_name | return dsc_url, dsc_name | ||||
▲ Show 20 Lines • Show All 122 Lines • Show Last 20 Lines |