Changeset View
Changeset View
Standalone View
Standalone View
swh/lister/debian/lister.py
# Copyright (C) 2017-2019 The Software Heritage developers | # Copyright (C) 2017-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import bz2 | import bz2 | ||||
from collections import defaultdict | from collections import defaultdict | ||||
import datetime | import datetime | ||||
import gzip | import gzip | ||||
import lzma | import lzma | ||||
import logging | import logging | ||||
from debian.deb822 import Sources | from debian.deb822 import Sources | ||||
from sqlalchemy.orm import joinedload, load_only | from sqlalchemy.orm import joinedload, load_only | ||||
from sqlalchemy.schema import CreateTable, DropTable | from sqlalchemy.schema import CreateTable, DropTable | ||||
from typing import Mapping, Optional | |||||
from swh.lister.debian.models import ( | from swh.lister.debian.models import ( | ||||
AreaSnapshot, Distribution, DistributionSnapshot, Package, | AreaSnapshot, Distribution, DistributionSnapshot, Package, | ||||
TempPackage, | TempPackage, | ||||
) | ) | ||||
from swh.lister.core.lister_base import ListerBase, FetchError | from swh.lister.core.lister_base import ListerBase, FetchError | ||||
from swh.lister.core.lister_transports import ListerHttpTransport | from swh.lister.core.lister_transports import ListerHttpTransport | ||||
Show All 9 Lines | |||||
class DebianLister(ListerHttpTransport, ListerBase): | class DebianLister(ListerHttpTransport, ListerBase): | ||||
MODEL = Package | MODEL = Package | ||||
PATH_TEMPLATE = None | PATH_TEMPLATE = None | ||||
LISTER_NAME = 'debian' | LISTER_NAME = 'debian' | ||||
instance = 'debian' | instance = 'debian' | ||||
def __init__(self, override_config=None): | def __init__(self, distribution: str = 'Debian', | ||||
date: Optional[datetime.datetime] = None, | |||||
override_config: Mapping = {}): | |||||
"""Initialize the debian lister for a given distribution at a given | |||||
date. | |||||
Args: | |||||
distribution: name of the distribution (e.g. "Debian") | |||||
date: date the snapshot is taken (defaults to now if empty) | |||||
override_config: Override configuration (which takes precedence | |||||
over the parameters if provided) | |||||
""" | |||||
ListerHttpTransport.__init__(self, url="notused") | ListerHttpTransport.__init__(self, url="notused") | ||||
ListerBase.__init__(self, override_config=override_config) | ListerBase.__init__(self, override_config=override_config) | ||||
self.distribution = override_config.get('distribution', distribution) | |||||
self.date = override_config.get('date', date) or datetime.datetime.now( | |||||
ardumont: Now we move the weirdness in the constructor though...
That's to tiptoe around the… | |||||
tz=datetime.timezone.utc) | |||||
def transport_request(self, identifier): | def transport_request(self, identifier): | ||||
"""Subvert ListerHttpTransport.transport_request, to try several | """Subvert ListerHttpTransport.transport_request, to try several | ||||
index URIs in turn. | index URIs in turn. | ||||
The Debian repository format supports several compression algorithms | The Debian repository format supports several compression algorithms | ||||
across the ages, so we try several URIs. | across the ages, so we try several URIs. | ||||
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | class DebianLister(ListerHttpTransport, ListerBase): | ||||
def create_tasks_for_snapshot(self, snapshot): | def create_tasks_for_snapshot(self, snapshot): | ||||
tasks = [ | tasks = [ | ||||
snapshot.task_for_package(name, versions) | snapshot.task_for_package(name, versions) | ||||
for name, versions in snapshot.get_packages().items() | for name, versions in snapshot.get_packages().items() | ||||
] | ] | ||||
return self.scheduler.create_tasks(tasks) | return self.scheduler.create_tasks(tasks) | ||||
def run(self, distribution='Debian', date=None): | def run(self): | ||||
"""Run the lister for a given (distribution, area) tuple. | """Run the lister for a given (distribution, area) tuple. | ||||
Args: | |||||
distribution (str): name of the distribution (e.g. "Debian") | |||||
date (datetime.datetime): date the snapshot is taken (defaults to | |||||
now) | |||||
""" | """ | ||||
distribution = self.db_session\ | distribution = self.db_session\ | ||||
.query(Distribution)\ | .query(Distribution)\ | ||||
.options(joinedload(Distribution.areas))\ | .options(joinedload(Distribution.areas))\ | ||||
.filter(Distribution.name == distribution)\ | .filter(Distribution.name == self.distribution)\ | ||||
.one_or_none() | .one_or_none() | ||||
if not distribution: | if not distribution: | ||||
raise ValueError("Distribution %s is not registered" % | raise ValueError("Distribution %s is not registered" % | ||||
distribution) | self.distribution) | ||||
Done Inline ActionsThat actually fixes a current bug in the logs. ardumont: That actually fixes a current bug in the logs.
When the distribution is not registered (without… | |||||
if not distribution.type == 'deb': | if not distribution.type == 'deb': | ||||
raise ValueError("Distribution %s is not a Debian derivative" % | raise ValueError("Distribution %s is not a Debian derivative" % | ||||
distribution) | distribution) | ||||
date = date or datetime.datetime.now(tz=datetime.timezone.utc) | date = self.date | ||||
logger.debug('Creating snapshot for distribution %s on date %s' % | logger.debug('Creating snapshot for distribution %s on date %s' % | ||||
(distribution, date)) | (distribution, date)) | ||||
snapshot = DistributionSnapshot(date=date, distribution=distribution) | snapshot = DistributionSnapshot(date=date, distribution=distribution) | ||||
self.db_session.add(snapshot) | self.db_session.add(snapshot) | ||||
Show All 19 Lines |
Now we move the weirdness in the constructor though...
That's to tiptoe around the get_lister which initialize lister (only the override_config is passed there).
So here, i'm reading first on override_config (used in the cli context) and fallback to the constructor parameters if passed...
At last, i fallback on default values if nothing is provided.