Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/debian/lister.py
# Copyright (C) 2017-2021 The Software Heritage developers | # Copyright (C) 2017-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import bz2 | import bz2 | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||
import gzip | import gzip | ||||
from itertools import product | from itertools import product | ||||
import logging | import logging | ||||
import lzma | import lzma | ||||
import os | |||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple | from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple | ||||
from urllib.parse import urljoin | from urllib.parse import urljoin | ||||
from debian.deb822 import Sources | from debian.deb822 import Sources | ||||
import requests | import requests | ||||
from swh.scheduler.interface import SchedulerInterface | from swh.scheduler.interface import SchedulerInterface | ||||
from swh.scheduler.model import ListedOrigin | from swh.scheduler.model import ListedOrigin | ||||
▲ Show 20 Lines • Show All 171 Lines • ▼ Show 20 Lines | def get_origins_from_page(self, page: DebianPageType) -> Iterator[ListedOrigin]: | ||||
for field_ in src_pkg._multivalued_fields: | for field_ in src_pkg._multivalued_fields: | ||||
if field_.startswith("checksums-"): | if field_.startswith("checksums-"): | ||||
sum_name = field_[len("checksums-") :] | sum_name = field_[len("checksums-") :] | ||||
else: | else: | ||||
sum_name = "md5sum" | sum_name = "md5sum" | ||||
if field_ in src_pkg: | if field_ in src_pkg: | ||||
for entry in src_pkg[field_]: | for entry in src_pkg[field_]: | ||||
name = entry["name"] | name = entry["name"] | ||||
files[name]["name"] = entry["name"] | files[name]["name"] = name | ||||
files[name]["size"] = int(entry["size"], 10) | files[name]["size"] = int(entry["size"], 10) | ||||
files[name][sum_name] = entry[sum_name] | files[name][sum_name] = entry[sum_name] | ||||
files[name]["uri"] = os.path.join( | |||||
self.url, src_pkg["Directory"], name | |||||
) | |||||
# extract package name and version | # extract package name and version | ||||
package_name = src_pkg["Package"] | package_name = src_pkg["Package"] | ||||
package_version = src_pkg["Version"] | package_version = src_pkg["Version"] | ||||
# build origin url | # build origin url | ||||
origin_url = self.origin_url_for_package(package_name) | origin_url = self.origin_url_for_package(package_name) | ||||
# create package version key as expected by the debian loader | # create package version key as expected by the debian loader | ||||
▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines |