Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7437733
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
31 KB
Subscribers
None
View Options
diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
index 9b7cdef..395b8d9 100644
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -1,463 +1,466 @@
# Copyright (C) 2017-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import email.utils
import logging
from os import path
import re
import subprocess
from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple
import attr
from dateutil.parser import parse as parse_date
from debian.changelog import Changelog
from debian.deb822 import Dsc
from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID
from swh.loader.package.utils import download, release_name
from swh.model.hashutil import hash_to_bytes
from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
logger = logging.getLogger(__name__)
UPLOADERS_SPLIT = re.compile(r"(?<=\>)\s*,\s*")
EXTID_TYPE = "dsc-sha256"
class DscCountError(ValueError):
"""Raised when an unexpected number of .dsc files is seen"""
pass
@attr.s
class DebianFileMetadata:
name = attr.ib(type=str)
"""Filename"""
sha256 = attr.ib(type=str)
size = attr.ib(type=int)
uri = attr.ib(type=str)
"""URL of this specific file"""
# md5sum is not always available, make it optional
md5sum = attr.ib(type=str, default="")
+ # sha1 is not always available, make it optional
+ sha1 = attr.ib(type=str, default="")
+
# Some of the DSC files imported in swh apparently had a Checksums-SHA512
# field which got recorded in the archive. Current versions of dpkg-source
# don't seem to generate them, but keep the field available for
# future-proofing.
sha512 = attr.ib(type=str, default="")
@attr.s
class DebianPackageChangelog:
person = attr.ib(type=Dict[str, str])
"""A dict with fields like, model.Person, except they are str instead
of bytes, and 'email' is optional."""
date = attr.ib(type=str)
"""Date of the changelog entry."""
history = attr.ib(type=List[Tuple[str, str]])
"""List of tuples (package_name, version)"""
@attr.s
class DebianPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
files = attr.ib(type=Dict[str, DebianFileMetadata])
"""Metadata of the files (.deb, .dsc, ...) of the package."""
name = attr.ib(type=str)
full_version = attr.ib(type=str)
"""eg. stretch/contrib/0.7.2-3"""
@classmethod
def from_metadata(
cls, a_metadata: Dict[str, Any], url: str, version: str
) -> "DebianPackageInfo":
return cls(
url=url,
filename=None,
version=version,
raw_info=a_metadata,
files={
file_name: DebianFileMetadata(**file_metadata)
for (file_name, file_metadata) in a_metadata.get("files", {}).items()
},
name=a_metadata["name"],
full_version=a_metadata["version"],
)
def extid(self) -> Optional[PartialExtID]:
dsc_files = [
file for (name, file) in self.files.items() if name.endswith(".dsc")
]
if len(dsc_files) != 1:
raise DscCountError(
f"Expected exactly one .dsc file for package {self.name}, "
f"got {len(dsc_files)}"
)
return (EXTID_TYPE, hash_to_bytes(dsc_files[0].sha256))
@attr.s
class IntrinsicPackageMetadata:
"""Metadata extracted from a package's .dsc file."""
name = attr.ib(type=str)
version = attr.ib(type=str)
changelog = attr.ib(type=DebianPackageChangelog)
maintainers = attr.ib(type=List[Dict[str, str]])
"""A list of dicts with fields like, model.Person, except they are str instead
of bytes, and 'email' is optional."""
class DebianLoader(PackageLoader[DebianPackageInfo]):
"""Load debian origins into swh archive.
"""
visit_type = "deb"
def __init__(
self,
storage: StorageInterface,
url: str,
packages: Mapping[str, Any],
max_content_size: Optional[int] = None,
):
"""Debian Loader implementation.
Args:
url: Origin url (e.g. deb://Debian/packages/cicero)
date: Ignored
packages: versioned packages and associated artifacts, example::
{
'stretch/contrib/0.7.2-3': {
'name': 'cicero',
'version': '0.7.2-3'
'files': {
'cicero_0.7.2-3.diff.gz': {
'md5sum': 'a93661b6a48db48d59ba7d26796fc9ce',
'name': 'cicero_0.7.2-3.diff.gz',
'sha256': 'f039c9642fe15c75bed5254315e2a29f...',
'size': 3964,
'uri': 'http://d.d.o/cicero_0.7.2-3.diff.gz',
},
'cicero_0.7.2-3.dsc': {
'md5sum': 'd5dac83eb9cfc9bb52a15eb618b4670a',
'name': 'cicero_0.7.2-3.dsc',
'sha256': '35b7f1048010c67adfd8d70e4961aefb...',
'size': 1864,
'uri': 'http://d.d.o/cicero_0.7.2-3.dsc',
},
'cicero_0.7.2.orig.tar.gz': {
'md5sum': '4353dede07c5728319ba7f5595a7230a',
'name': 'cicero_0.7.2.orig.tar.gz',
'sha256': '63f40f2436ea9f67b44e2d4bd669dbab...',
'size': 96527,
'uri': 'http://d.d.o/cicero_0.7.2.orig.tar.gz',
}
},
},
# ...
}
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
self.packages = packages
def get_versions(self) -> Sequence[str]:
"""Returns the keys of the packages input (e.g.
stretch/contrib/0.7.2-3, etc...)
"""
return list(self.packages.keys())
def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]:
meta = self.packages[version]
p_info = DebianPackageInfo.from_metadata(meta, url=self.url, version=version)
yield release_name(version), p_info
def download_package(
self, p_info: DebianPackageInfo, tmpdir: str
) -> List[Tuple[str, Mapping]]:
"""Contrary to other package loaders (1 package, 1 artifact),
`p_info.files` represents the package's datafiles set to fetch:
- <package-version>.orig.tar.gz
- <package-version>.dsc
- <package-version>.diff.gz
This is delegated to the `download_package` function.
"""
all_hashes = download_package(p_info, tmpdir)
logger.debug("all_hashes: %s", all_hashes)
res = []
for hashes in all_hashes.values():
res.append((tmpdir, hashes))
logger.debug("res: %s", res)
return res
def uncompress(
self, dl_artifacts: List[Tuple[str, Mapping[str, Any]]], dest: str
) -> str:
logger.debug("dl_artifacts: %s", dl_artifacts)
return extract_package(dl_artifacts, dest=dest)
def build_release(
self, p_info: DebianPackageInfo, uncompressed_path: str, directory: Sha1Git,
) -> Optional[Release]:
dsc_url, dsc_name = dsc_information(p_info)
if not dsc_name:
raise ValueError("dsc name for url %s should not be None" % dsc_url)
dsc_path = path.join(path.dirname(uncompressed_path), dsc_name)
intrinsic_metadata = get_intrinsic_package_metadata(
p_info, dsc_path, uncompressed_path
)
logger.debug("intrinsic_metadata: %s", intrinsic_metadata)
logger.debug("p_info: %s", p_info)
msg = (
f"Synthetic release for Debian source package {p_info.name} "
f"version {p_info.full_version}\n"
)
author = prepare_person(intrinsic_metadata.changelog.person)
date = TimestampWithTimezone.from_iso8601(intrinsic_metadata.changelog.date)
# inspired from swh.loader.debian.converters.package_metadata_to_revision
return Release(
name=p_info.version.encode(),
message=msg.encode(),
author=author,
date=date,
target=directory,
target_type=ObjectType.DIRECTORY,
synthetic=True,
)
def uid_to_person(uid: str) -> Dict[str, str]:
"""Convert an uid to a person suitable for insertion.
Args:
uid: an uid of the form "Name <email@ddress>"
Returns:
a dictionary with the following keys:
- name: the name associated to the uid
- email: the mail associated to the uid
- fullname: the actual uid input
"""
logger.debug("uid: %s", uid)
ret = {
"name": "",
"email": "",
"fullname": uid,
}
name, mail = email.utils.parseaddr(uid)
if name and email:
ret["name"] = name
ret["email"] = mail
else:
ret["name"] = uid
return ret
def prepare_person(person: Mapping[str, str]) -> Person:
"""Prepare person for swh serialization...
Args:
A person dict
Returns:
A person ready for storage
"""
return Person.from_dict(
{key: value.encode("utf-8") for (key, value) in person.items()}
)
def download_package(p_info: DebianPackageInfo, tmpdir: Any) -> Mapping[str, Any]:
"""Fetch a source package in a temporary directory and check the checksums
for all files.
Args:
p_info: Information on a package
tmpdir: Where to download and extract the files to ingest
Returns:
Dict of swh hashes per filename key
"""
all_hashes = {}
for filename, fileinfo in p_info.files.items():
uri = fileinfo.uri
logger.debug("fileinfo: %s", fileinfo)
extrinsic_hashes = {"sha256": fileinfo.sha256}
logger.debug("extrinsic_hashes(%s): %s", filename, extrinsic_hashes)
filepath, hashes = download(
uri, dest=tmpdir, filename=filename, hashes=extrinsic_hashes
)
all_hashes[filename] = hashes
logger.debug("all_hashes: %s", all_hashes)
return all_hashes
def dsc_information(p_info: DebianPackageInfo) -> Tuple[Optional[str], Optional[str]]:
"""Retrieve dsc information from a package.
Args:
p_info: Package metadata information
Returns:
Tuple of dsc file's uri, dsc's full disk path
"""
dsc_name = None
dsc_url = None
for filename, fileinfo in p_info.files.items():
if filename.endswith(".dsc"):
if dsc_name:
raise DscCountError(
"Package %s_%s references several dsc files."
% (p_info.name, p_info.version)
)
dsc_url = fileinfo.uri
dsc_name = filename
return dsc_url, dsc_name
def extract_package(dl_artifacts: List[Tuple[str, Mapping]], dest: str) -> str:
"""Extract a Debian source package to a given directory.
Note that after extraction the target directory will be the root of the
extracted package, rather than containing it.
Args:
package: package information dictionary
dest: directory where the package files are stored
Returns:
Package extraction directory
"""
a_path = dl_artifacts[0][0]
logger.debug("dl_artifacts: %s", dl_artifacts)
for _, hashes in dl_artifacts:
logger.debug("hashes: %s", hashes)
filename = hashes["filename"]
if filename.endswith(".dsc"):
dsc_name = filename
break
dsc_path = path.join(a_path, dsc_name)
destdir = path.join(dest, "extracted")
logfile = path.join(dest, "extract.log")
logger.debug(
"extract Debian source package %s in %s" % (dsc_path, destdir),
extra={"swh_type": "deb_extract", "swh_dsc": dsc_path, "swh_destdir": destdir,},
)
cmd = [
"dpkg-source",
"--no-copy",
"--no-check",
"--ignore-bad-version",
"-x",
dsc_path,
destdir,
]
try:
with open(logfile, "w") as stdout:
subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
logdata = open(logfile, "r").read()
raise ValueError(
"dpkg-source exited with code %s: %s" % (e.returncode, logdata)
) from None
return destdir
def get_intrinsic_package_metadata(
p_info: DebianPackageInfo, dsc_path: str, extracted_path: str
) -> IntrinsicPackageMetadata:
"""Get the package metadata from the source package at dsc_path,
extracted in extracted_path.
Args:
p_info: the package information
dsc_path: path to the package's dsc file
extracted_path: the path where the package got extracted
Returns:
dict: a dictionary with the following keys:
- history: list of (package_name, package_version) tuples parsed from
the package changelog
"""
with open(dsc_path, "rb") as dsc:
parsed_dsc = Dsc(dsc)
# Parse the changelog to retrieve the rest of the package information
changelog_path = path.join(extracted_path, "debian/changelog")
with open(changelog_path, "rb") as changelog_file:
try:
parsed_changelog = Changelog(changelog_file)
except UnicodeDecodeError:
logger.warning(
"Unknown encoding for changelog %s,"
" falling back to iso" % changelog_path,
extra={
"swh_type": "deb_changelog_encoding",
"swh_name": p_info.name,
"swh_version": str(p_info.version),
"swh_changelog": changelog_path,
},
)
# need to reset as Changelog scrolls to the end of the file
changelog_file.seek(0)
parsed_changelog = Changelog(changelog_file, encoding="iso-8859-15")
history: List[Tuple[str, str]] = []
for block in parsed_changelog:
assert block.package is not None
history.append((block.package, str(block.version)))
changelog = DebianPackageChangelog(
person=uid_to_person(parsed_changelog.author),
date=parse_date(parsed_changelog.date).isoformat(),
history=history[1:],
)
maintainers = [
uid_to_person(parsed_dsc["Maintainer"]),
]
maintainers.extend(
uid_to_person(person)
for person in UPLOADERS_SPLIT.split(parsed_dsc.get("Uploaders", ""))
)
return IntrinsicPackageMetadata(
name=p_info.name,
version=str(p_info.version),
changelog=changelog,
maintainers=maintainers,
)
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
index 1a61646..6f40e9e 100644
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -1,463 +1,478 @@
# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from copy import deepcopy
+import hashlib
import logging
from os import path
import pytest
+import requests
from swh.loader.package.debian.loader import (
DebianLoader,
DebianPackageChangelog,
DebianPackageInfo,
IntrinsicPackageMetadata,
download_package,
dsc_information,
extract_package,
get_intrinsic_package_metadata,
prepare_person,
uid_to_person,
)
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
ObjectType,
Person,
Release,
Snapshot,
SnapshotBranch,
TargetType,
Timestamp,
TimestampWithTimezone,
)
logger = logging.getLogger(__name__)
URL = "deb://Debian/packages/cicero"
PACKAGE_FILES = {
"name": "cicero",
"version": "0.7.2-3",
"files": {
"cicero_0.7.2-3.diff.gz": {
"md5sum": "a93661b6a48db48d59ba7d26796fc9ce",
"name": "cicero_0.7.2-3.diff.gz",
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa
"size": 3964,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.diff.gz", # noqa
},
"cicero_0.7.2-3.dsc": {
"md5sum": "d5dac83eb9cfc9bb52a15eb618b4670a",
"name": "cicero_0.7.2-3.dsc",
"sha256": "35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03", # noqa
"size": 1864,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc", # noqa
}, # noqa
"cicero_0.7.2.orig.tar.gz": {
"md5sum": "4353dede07c5728319ba7f5595a7230a",
"name": "cicero_0.7.2.orig.tar.gz",
"sha256": "63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786", # noqa
"size": 96527,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz", # noqa
},
},
}
PACKAGE_FILES2 = {
"name": "cicero",
"version": "0.7.2-4",
"files": {
"cicero_0.7.2-4.diff.gz": {
"md5sum": "1e7e6fc4a59d57c98082a3af78145734",
"name": "cicero_0.7.2-4.diff.gz",
"sha256": "2e6fa296ee7005473ff58d0971f4fd325617b445671480e9f2cfb738d5dbcd01", # noqa
"size": 4038,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.diff.gz", # noqa
},
"cicero_0.7.2-4.dsc": {
"md5sum": "1a6c8855a73b4282bb31d15518f18cde",
"name": "cicero_0.7.2-4.dsc",
"sha256": "913ee52f7093913420de5cbe95d63cfa817f1a1daf997961149501894e754f8b", # noqa
"size": 1881,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-4.dsc", # noqa
}, # noqa
"cicero_0.7.2.orig.tar.gz": {
"md5sum": "4353dede07c5728319ba7f5595a7230a",
"name": "cicero_0.7.2.orig.tar.gz",
"sha256": "63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786", # noqa
"size": 96527,
"uri": "http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2.orig.tar.gz", # noqa
},
},
}
PACKAGE_PER_VERSION = {
"stretch/contrib/0.7.2-3": PACKAGE_FILES,
}
PACKAGES_PER_VERSION = {
"stretch/contrib/0.7.2-3": PACKAGE_FILES,
"buster/contrib/0.7.2-4": PACKAGE_FILES2,
}
def test_debian_first_visit(swh_storage, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
loader = DebianLoader(swh_storage, URL, packages=PACKAGE_PER_VERSION,)
actual_load_status = loader.load()
expected_snapshot_id = "ad1367b5470a03857be7c7325a5a8bde698e1800"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
assert_last_visit_matches(
swh_storage,
URL,
status="full",
type="deb",
snapshot=hash_to_bytes(expected_snapshot_id),
)
release_id = hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf")
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE, target=release_id,
)
},
) # different than the previous loader as no release is done
check_snapshot(expected_snapshot, swh_storage)
assert swh_storage.release_get([release_id])[0] == Release(
id=release_id,
name=b"stretch/contrib/0.7.2-3",
message=b"Synthetic release for Debian source package cicero version 0.7.2-3\n",
target=hash_to_bytes("798df511408c53bf842a8e54d4d335537836bdc3"),
target_type=ObjectType.DIRECTORY,
synthetic=True,
author=Person(
fullname=b"Samuel Thibault <sthibault@debian.org>",
name=b"Samuel Thibault",
email=b"sthibault@debian.org",
),
date=TimestampWithTimezone(
timestamp=Timestamp(seconds=1413730355, microseconds=0),
offset=120,
negative_utc=False,
),
)
stats = get_stats(swh_storage)
assert {
"content": 42,
"directory": 2,
"origin": 1,
"origin_visit": 1,
"release": 1, # all artifacts under 1 release
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadir):
"""With no prior visit, load a debian project ends up with 1 snapshot
"""
loader = DebianLoader(swh_storage, URL, packages=PACKAGE_PER_VERSION,)
actual_load_status = loader.load()
expected_snapshot_id = "ad1367b5470a03857be7c7325a5a8bde698e1800"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
assert_last_visit_matches(
swh_storage,
URL,
status="full",
type="deb",
snapshot=hash_to_bytes(expected_snapshot_id),
)
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
target=hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf"),
)
},
) # different than the previous loader as no release is done
check_snapshot(expected_snapshot, swh_storage)
stats = get_stats(swh_storage)
assert {
"content": 42,
"directory": 2,
"origin": 1,
"origin_visit": 1,
"release": 1, # all artifacts under 1 release
"revision": 0,
"skipped_content": 0,
"snapshot": 1,
} == stats
# No change in between load
actual_load_status2 = loader.load()
assert actual_load_status2["status"] == "uneventful"
assert_last_visit_matches(
swh_storage,
URL,
status="full",
type="deb",
snapshot=hash_to_bytes(expected_snapshot_id),
)
stats2 = get_stats(swh_storage)
assert {
"content": 42 + 0,
"directory": 2 + 0,
"origin": 1,
"origin_visit": 1 + 1, # a new visit occurred
"release": 1,
"revision": 0,
"skipped_content": 0,
"snapshot": 1, # same snapshot across 2 visits
} == stats2
urls = [
m.url
for m in requests_mock_datadir.request_history
if m.url.startswith("http://deb.debian.org")
]
# visited each package artifact twice across 2 visits
assert len(urls) == len(set(urls))
def test_debian_uid_to_person():
uid = "Someone Name <someone@orga.org>"
actual_person = uid_to_person(uid)
assert actual_person == {
"name": "Someone Name",
"email": "someone@orga.org",
"fullname": uid,
}
def test_debian_prepare_person():
actual_author = prepare_person(
{
"name": "Someone Name",
"email": "someone@orga.org",
"fullname": "Someone Name <someone@orga.org>",
}
)
assert actual_author == Person(
name=b"Someone Name",
email=b"someone@orga.org",
fullname=b"Someone Name <someone@orga.org>",
)
def test_debian_download_package(datadir, tmpdir, requests_mock_datadir):
tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue)
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3")
all_hashes = download_package(p_info, tmpdir)
assert all_hashes == {
"cicero_0.7.2-3.diff.gz": {
"checksums": {
"sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc",
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa
},
"filename": "cicero_0.7.2-3.diff.gz",
"length": 3964,
"url": (
"http://deb.debian.org/debian/pool/contrib/c/cicero/"
"cicero_0.7.2-3.diff.gz"
),
},
"cicero_0.7.2-3.dsc": {
"checksums": {
"sha1": "abbec4e8efbbc80278236e1dd136831eac08accd",
"sha256": "35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03", # noqa
},
"filename": "cicero_0.7.2-3.dsc",
"length": 1864,
"url": (
"http://deb.debian.org/debian/pool/contrib/c/cicero/cicero_0.7.2-3.dsc"
),
},
"cicero_0.7.2.orig.tar.gz": {
"checksums": {
"sha1": "a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43",
"sha256": "63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786", # noqa
},
"filename": "cicero_0.7.2.orig.tar.gz",
"length": 96527,
"url": (
"http://deb.debian.org/debian/pool/contrib/c/cicero/"
"cicero_0.7.2.orig.tar.gz"
),
},
}
def test_debian_dsc_information_ok():
fname = "cicero_0.7.2-3.dsc"
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3")
dsc_url, dsc_name = dsc_information(p_info)
assert dsc_url == PACKAGE_FILES["files"][fname]["uri"]
assert dsc_name == PACKAGE_FILES["files"][fname]["name"]
def test_debian_dsc_information_not_found():
fname = "cicero_0.7.2-3.dsc"
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3")
p_info.files.pop(fname)
dsc_url, dsc_name = dsc_information(p_info)
assert dsc_url is None
assert dsc_name is None
def test_debian_dsc_information_missing_md5sum():
package_files = deepcopy(PACKAGE_FILES)
for package_metadata in package_files["files"].values():
del package_metadata["md5sum"]
p_info = DebianPackageInfo.from_metadata(package_files, url=URL, version="0.7.2-3")
for debian_file_metadata in p_info.files.values():
assert not debian_file_metadata.md5sum
+def test_debian_dsc_information_extra_sha1(requests_mock_datadir):
+ package_files = deepcopy(PACKAGE_FILES)
+
+ for package_metadata in package_files["files"].values():
+ file_bytes = requests.get(package_metadata["uri"]).content
+ package_metadata["sha1"] = hashlib.sha1(file_bytes).hexdigest()
+
+ p_info = DebianPackageInfo.from_metadata(package_files, url=URL, version="0.7.2-3")
+
+ for debian_file_metadata in p_info.files.values():
+ assert debian_file_metadata.sha1
+
+
def test_debian_dsc_information_too_many_dsc_entries():
# craft an extra dsc file
fname = "cicero_0.7.2-3.dsc"
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3")
data = p_info.files[fname]
fname2 = fname.replace("cicero", "ciceroo")
p_info.files[fname2] = data
with pytest.raises(
ValueError,
match="Package %s_%s references several dsc"
% (PACKAGE_FILES["name"], PACKAGE_FILES["version"]),
):
dsc_information(p_info)
def test_debian_get_intrinsic_package_metadata(
requests_mock_datadir, datadir, tmp_path
):
tmp_path = str(tmp_path) # py3.5 compat.
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3")
logger.debug("p_info: %s", p_info)
# download the packages
all_hashes = download_package(p_info, tmp_path)
# Retrieve information from package
_, dsc_name = dsc_information(p_info)
dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()]
# Extract information from package
extracted_path = extract_package(dl_artifacts, tmp_path)
# Retrieve information on package
dsc_path = path.join(path.dirname(extracted_path), dsc_name)
actual_package_info = get_intrinsic_package_metadata(
p_info, dsc_path, extracted_path
)
logger.debug("actual_package_info: %s", actual_package_info)
assert actual_package_info == IntrinsicPackageMetadata(
changelog=DebianPackageChangelog(
date="2014-10-19T16:52:35+02:00",
history=[
("cicero", "0.7.2-2"),
("cicero", "0.7.2-1"),
("cicero", "0.7-1"),
],
person={
"email": "sthibault@debian.org",
"fullname": "Samuel Thibault <sthibault@debian.org>",
"name": "Samuel Thibault",
},
),
maintainers=[
{
"email": "debian-accessibility@lists.debian.org",
"fullname": "Debian Accessibility Team "
"<debian-accessibility@lists.debian.org>",
"name": "Debian Accessibility Team",
},
{
"email": "sthibault@debian.org",
"fullname": "Samuel Thibault <sthibault@debian.org>",
"name": "Samuel Thibault",
},
],
name="cicero",
version="0.7.2-3",
)
def test_debian_multiple_packages(swh_storage, requests_mock_datadir):
loader = DebianLoader(swh_storage, URL, packages=PACKAGES_PER_VERSION,)
actual_load_status = loader.load()
expected_snapshot_id = "a83fa5c089b048161f0677b9614a4aae96a6ca18"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
}
assert_last_visit_matches(
swh_storage,
URL,
status="full",
type="deb",
snapshot=hash_to_bytes(expected_snapshot_id),
)
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
target=hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf"),
),
b"releases/buster/contrib/0.7.2-4": SnapshotBranch(
target_type=TargetType.RELEASE,
target=hash_to_bytes("9f6d8d868514f991af0d9f5d7173aba1236a5a75"),
),
},
)
check_snapshot(expected_snapshot, swh_storage)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, Apr 15, 3:26 AM (9 h, 7 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3277485
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment