Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/debian/lister.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import bz2 | import bz2 | ||||
from collections import defaultdict | from collections import defaultdict | ||||
import datetime | import datetime | ||||
import gzip | import gzip | ||||
import lzma | import lzma | ||||
import logging | import logging | ||||
from debian.deb822 import Sources | from debian.deb822 import Sources | ||||
from sqlalchemy.orm import joinedload, load_only | from sqlalchemy.orm import joinedload, load_only | ||||
from sqlalchemy.schema import CreateTable, DropTable | from sqlalchemy.schema import CreateTable, DropTable | ||||
from typing import Mapping, Optional | from typing import Mapping, Optional, Dict, Any | ||||
from requests import Response | |||||
from swh.lister.debian.models import ( | from swh.lister.debian.models import ( | ||||
AreaSnapshot, Distribution, DistributionSnapshot, Package, | AreaSnapshot, Distribution, DistributionSnapshot, Package, | ||||
TempPackage, | TempPackage, | ||||
) | ) | ||||
from swh.lister.core.lister_base import ListerBase, FetchError | from swh.lister.core.lister_base import ListerBase, FetchError | ||||
from swh.lister.core.lister_transports import ListerHttpTransport | from swh.lister.core.lister_transports import ListerHttpTransport | ||||
Show All 28 Lines | def __init__(self, distribution: str = 'Debian', | ||||
""" | """ | ||||
ListerHttpTransport.__init__(self, url="notused") | ListerHttpTransport.__init__(self, url="notused") | ||||
ListerBase.__init__(self, override_config=override_config) | ListerBase.__init__(self, override_config=override_config) | ||||
self.distribution = override_config.get('distribution', distribution) | self.distribution = override_config.get('distribution', distribution) | ||||
self.date = override_config.get('date', date) or datetime.datetime.now( | self.date = override_config.get('date', date) or datetime.datetime.now( | ||||
tz=datetime.timezone.utc) | tz=datetime.timezone.utc) | ||||
def transport_request(self, identifier): | def transport_request(self, identifier) -> Response: | ||||
"""Subvert ListerHttpTransport.transport_request, to try several | """Subvert ListerHttpTransport.transport_request, to try several | ||||
index URIs in turn. | index URIs in turn. | ||||
The Debian repository format supports several compression algorithms | The Debian repository format supports several compression algorithms | ||||
across the ages, so we try several URIs. | across the ages, so we try several URIs. | ||||
Once we have found a working URI, we break and set `self.decompressor` | Once we have found a working URI, we break and set `self.decompressor` | ||||
to the one that matched. | to the one that matched. | ||||
Show All 19 Lines | def transport_request(self, identifier) -> Response: | ||||
return response | return response | ||||
def request_uri(self, identifier): | def request_uri(self, identifier): | ||||
# In the overridden transport_request, we pass | # In the overridden transport_request, we pass | ||||
# ListerBase.transport_request() the full URI as identifier, so we | # ListerBase.transport_request() the full URI as identifier, so we | ||||
# need to return it here. | # need to return it here. | ||||
return identifier | return identifier | ||||
def request_params(self, identifier): | def request_params(self, identifier) -> Dict[str, Any]: | ||||
# Enable streaming to allow wrapping the response in the decompressor | # Enable streaming to allow wrapping the response in the decompressor | ||||
# in transport_response_simplified. | # in transport_response_simplified. | ||||
params = super().request_params(identifier) | params = super().request_params(identifier) | ||||
params['stream'] = True | params['stream'] = True | ||||
return params | return params | ||||
def transport_response_simplified(self, response): | def transport_response_simplified(self, response): | ||||
"""Decompress and parse the package index fetched in `transport_request`. | """Decompress and parse the package index fetched in `transport_request`. | ||||
▲ Show 20 Lines • Show All 150 Lines • Show Last 20 Lines |