diff --git a/swh/lister/core/models.py b/swh/lister/core/models.py index 62ab0b7..ed6a591 100644 --- a/swh/lister/core/models.py +++ b/swh/lister/core/models.py @@ -1,79 +1,80 @@ -# Copyright (C) 2015-2017 the Software Heritage developers +# Copyright (C) 2015-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import abc from datetime import datetime import logging from sqlalchemy import Column, DateTime, Integer, String -from sqlalchemy.ext.declarative import DeclarativeMeta +from sqlalchemy.ext.declarative import DeclarativeMeta, declarative_base from typing import Type, Union from .abstractattribute import AbstractAttribute -from swh.storage.schemata.distribution import SQLBase + +SQLBase = declarative_base() logger = logging.getLogger(__name__) class ABCSQLMeta(abc.ABCMeta, DeclarativeMeta): pass class ModelBase(SQLBase, metaclass=ABCSQLMeta): """a common repository""" __abstract__ = True __tablename__ = \ AbstractAttribute # type: Union[Type[AbstractAttribute], str] uid = AbstractAttribute( 'Column(, primary_key=True)' ) # type: Union[AbstractAttribute, Column] name = Column(String, index=True) full_name = Column(String, index=True) html_url = Column(String) origin_url = Column(String) origin_type = Column(String) last_seen = Column(DateTime, nullable=False) task_id = Column(Integer) def __init__(self, **kw): kw['last_seen'] = datetime.now() super().__init__(**kw) class IndexingModelBase(ModelBase, metaclass=ABCSQLMeta): __abstract__ = True __tablename__ = \ AbstractAttribute # type: Union[Type[AbstractAttribute], str] # The value used for sorting, segmenting, or api query paging, # because uids aren't always sequential. indexable = AbstractAttribute( 'Column(, index=True)' ) # type: Union[AbstractAttribute, Column] def initialize(db_engine, drop_tables=False, **kwargs): """Default database initialization function for a lister. Typically called from the lister's initialization hook. Args: models (list): list of SQLAlchemy tables/models to drop/create. db_engine (): the SQLAlchemy DB engine. drop_tables (bool): if True, tables will be dropped before (re)creating them. """ if drop_tables: logger.info('Dropping tables') SQLBase.metadata.drop_all(db_engine, checkfirst=True) logger.info('Creating tables') SQLBase.metadata.create_all(db_engine, checkfirst=True) diff --git a/swh/lister/debian/__init__.py b/swh/lister/debian/__init__.py index a071887..b1398f6 100644 --- a/swh/lister/debian/__init__.py +++ b/swh/lister/debian/__init__.py @@ -1,59 +1,58 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Any, List, Mapping def debian_init(db_engine, lister=None, override_conf: Mapping[str, Any] = {}, distributions: List[str] = ['stretch', 'buster'], area_names: List[str] = ['main', 'contrib', 'non-free']): """Initialize the debian data model. Args: db_engine: SQLAlchemy manipulation database object lister: Debian lister instance. None by default. override_conf: Override conf to pass to instantiate a lister distributions: Default distribution to build """ distribution_name = 'Debian' - from swh.storage.schemata.distribution import ( - Distribution, Area) + from swh.lister.debian.models import Distribution, Area if lister is None: from .lister import DebianLister lister = DebianLister(distribution=distribution_name, override_config=override_conf) if not lister.db_session\ .query(Distribution)\ .filter(Distribution.name == distribution_name)\ .one_or_none(): d = Distribution( name=distribution_name, type='deb', mirror_uri='http://deb.debian.org/debian/') lister.db_session.add(d) areas = [] for distribution_name in distributions: for area_name in area_names: areas.append(Area( name='%s/%s' % (distribution_name, area_name), distribution=d, )) lister.db_session.add_all(areas) lister.db_session.commit() def register() -> Mapping[str, Any]: from .lister import DebianLister return {'models': [DebianLister.MODEL], 'lister': DebianLister, 'task_modules': ['%s.tasks' % __name__], 'init': debian_init} diff --git a/swh/lister/debian/models.py b/swh/lister/debian/models.py index 6675cd2..3cba2f8 100644 --- a/swh/lister/debian/models.py +++ b/swh/lister/debian/models.py @@ -1,251 +1,250 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import binascii from collections import defaultdict import datetime from sqlalchemy import ( Boolean, Column, DateTime, Enum, ForeignKey, Integer, LargeBinary, String, Table, UniqueConstraint, ) from typing import Any, Mapping try: from sqlalchemy import JSON except ImportError: # SQLAlchemy < 1.1 from sqlalchemy.dialects.postgresql import JSONB as JSON -from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship -SQLBase = declarative_base() +from swh.lister.core.models import SQLBase class Distribution(SQLBase): """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" __tablename__ = 'distribution' id = Column(Integer, primary_key=True) name = Column(String, unique=True, nullable=False) type = Column(Enum('deb', 'rpm', name='distribution_types'), nullable=False) mirror_uri = Column(String, nullable=False) areas = relationship('Area', back_populates='distribution') def origin_for_package(self, package_name: str) -> str: """Return the origin url for the given package """ return '%s://%s/packages/%s' % (self.type, self.name, package_name) def __repr__(self): return 'Distribution(%s (%s) on %s)' % ( self.name, self.type, self.mirror_uri, ) class Area(SQLBase): __tablename__ = 'area' __table_args__ = ( UniqueConstraint('distribution_id', 'name'), ) id = Column(Integer, primary_key=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) name = Column(String, nullable=False) active = Column(Boolean, nullable=False, default=True) distribution = relationship('Distribution', back_populates='areas') def index_uris(self): """Get possible URIs for this component's package index""" if self.distribution.type == 'deb': compression_exts = ('xz', 'bz2', 'gz', None) base_uri = '%s/dists/%s/source/Sources' % ( self.distribution.mirror_uri, self.name, ) for ext in compression_exts: if ext: yield (base_uri + '.' + ext, ext) else: yield (base_uri, None) raise NotImplementedError( 'Do not know how to build index URI for Distribution type %s' % self.distribution.type ) def __repr__(self): return 'Area(%s of %s)' % ( self.name, self.distribution.name, ) class Package(SQLBase): __tablename__ = 'package' __table_args__ = ( UniqueConstraint('area_id', 'name', 'version'), ) id = Column(Integer, primary_key=True) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) name = Column(String, nullable=False) version = Column(String, nullable=False) directory = Column(String, nullable=False) files = Column(JSON, nullable=False) origin_id = Column(Integer) task_id = Column(Integer) revision_id = Column(LargeBinary(20)) area = relationship('Area') @property def distribution(self): return self.area.distribution def fetch_uri(self, filename): """Get the URI to fetch the `filename` file associated with the package""" if self.distribution.type == 'deb': return '%s/%s/%s' % ( self.distribution.mirror_uri, self.directory, filename, ) else: raise NotImplementedError( 'Do not know how to build fetch URI for Distribution type %s' % self.distribution.type ) def loader_dict(self): ret = { 'id': self.id, 'name': self.name, 'version': self.version, } if self.revision_id: ret['revision_id'] = binascii.hexlify(self.revision_id).decode() else: files = { name: checksums.copy() for name, checksums in self.files.items() } for name in files: files[name]['uri'] = self.fetch_uri(name) ret.update({ 'revision_id': None, 'files': files, }) return ret def __repr__(self): return 'Package(%s_%s of %s %s)' % ( self.name, self.version, self.distribution.name, self.area.name, ) class DistributionSnapshot(SQLBase): __tablename__ = 'distribution_snapshot' id = Column(Integer, primary_key=True) date = Column(DateTime, nullable=False, index=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) distribution = relationship('Distribution') areas = relationship('AreaSnapshot', back_populates='snapshot') def task_for_package(self, package_name: str, package_versions: Mapping) -> Mapping[str, Any]: """Return the task dictionary for the given list of package versions """ origin_url = self.distribution.origin_for_package(package_name) return { 'policy': 'oneshot', 'type': 'load-%s-package' % self.distribution.type, 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), 'arguments': { 'args': [], 'kwargs': { 'url': origin_url, 'date': self.date.isoformat(), 'packages': package_versions, }, }, 'retries_left': 3, } def get_packages(self): packages = defaultdict(dict) for area_snapshot in self.areas: area_name = area_snapshot.area.name for package in area_snapshot.packages: ref_name = '%s/%s' % (area_name, package.version) packages[package.name][ref_name] = package.loader_dict() return packages area_snapshot_package_assoc = Table( 'area_snapshot_package', SQLBase.metadata, Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), nullable=False), Column('package_id', Integer, ForeignKey('package.id'), nullable=False), ) class AreaSnapshot(SQLBase): __tablename__ = 'area_snapshot' id = Column(Integer, primary_key=True) snapshot_id = Column(Integer, ForeignKey('distribution_snapshot.id'), nullable=False) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) snapshot = relationship('DistributionSnapshot', back_populates='areas') area = relationship('Area') packages = relationship('Package', secondary=area_snapshot_package_assoc) class TempPackage(SQLBase): __tablename__ = 'temp_package' __table_args__ = { 'prefixes': ['TEMPORARY'], } id = Column(Integer, primary_key=True) area_id = Column(Integer) name = Column(String) version = Column(String) diff --git a/swh/lister/debian/utils.py b/swh/lister/debian/utils.py index 2ed3c54..19a3e97 100644 --- a/swh/lister/debian/utils.py +++ b/swh/lister/debian/utils.py @@ -1,81 +1,81 @@ -# Copyright (C) 2017 the Software Heritage developers +# Copyright (C) 2017-2019 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import click -from swh.storage.schemata.distribution import Distribution, Area, SQLBase +from swh.lister.debian.models import Distribution, Area, SQLBase from swh.lister.debian.lister import DebianLister @click.group() @click.option('--verbose/--no-verbose', default=False) @click.pass_context def cli(ctx, verbose): ctx.obj['lister'] = DebianLister() if verbose: loglevel = logging.DEBUG logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) else: loglevel = logging.INFO logging.basicConfig( format='%(asctime)s %(process)d %(levelname)s %(message)s', level=loglevel, ) @cli.command() @click.pass_context def create_schema(ctx): """Create the schema from the models""" SQLBase.metadata.create_all(ctx.obj['lister'].db_engine) @cli.command() @click.option('--name', help='The name of the distribution') @click.option('--type', help='The type of distribution') @click.option('--mirror-uri', help='The URL to the mirror of the distribution') @click.option('--area', help='The areas for the distribution', multiple=True) @click.pass_context def create_distribution(ctx, name, type, mirror_uri, area): to_add = [] db_session = ctx.obj['lister'].db_session d = db_session.query(Distribution)\ .filter(Distribution.name == name)\ .filter(Distribution.type == type)\ .one_or_none() if not d: d = Distribution(name=name, type=type, mirror_uri=mirror_uri) to_add.append(d) for area_name in area: a = None if d.id: a = db_session.query(Area)\ .filter(Area.distribution == d)\ .filter(Area.name == area_name)\ .one_or_none() if not a: a = Area(name=area_name, distribution=d) to_add.append(a) db_session.add_all(to_add) db_session.commit() @cli.command() @click.option('--name', help='The name of the distribution') @click.pass_context def list_distribution(ctx, name): """List the distribution""" ctx.obj['lister'].run(name) if __name__ == '__main__': cli(obj={})