diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,3 +1,2 @@ swh.core >= 0.0.75 -swh.storage[schemata] >= 0.0.122 swh.scheduler >= 0.0.58 diff --git a/swh/lister/debian/lister.py b/swh/lister/debian/lister.py --- a/swh/lister/debian/lister.py +++ b/swh/lister/debian/lister.py @@ -14,7 +14,7 @@ from sqlalchemy.orm import joinedload, load_only from sqlalchemy.schema import CreateTable, DropTable -from swh.storage.schemata.distribution import ( +from swh.lister.debian.models import ( AreaSnapshot, Distribution, DistributionSnapshot, Package, TempPackage, ) diff --git a/swh/lister/debian/models.py b/swh/lister/debian/models.py new file mode 100644 --- /dev/null +++ b/swh/lister/debian/models.py @@ -0,0 +1,251 @@ +# Copyright (C) 2017-2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import binascii +from collections import defaultdict +import datetime + +from sqlalchemy import ( + Boolean, + Column, + DateTime, + Enum, + ForeignKey, + Integer, + LargeBinary, + String, + Table, + UniqueConstraint, +) + +try: + from sqlalchemy import JSON +except ImportError: + # SQLAlchemy < 1.1 + from sqlalchemy.dialects.postgresql import JSONB as JSON + +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +SQLBase = declarative_base() + + +class Distribution(SQLBase): + """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" + __tablename__ = 'distribution' + + id = Column(Integer, primary_key=True) + name = Column(String, unique=True, nullable=False) + type = Column(Enum('deb', 'rpm', name='distribution_types'), + nullable=False) + mirror_uri = Column(String, nullable=False) + + areas = relationship('Area', back_populates='distribution') + + def origin_for_package(self, package_name, package_versions): + """Return the origin dictionary for the given package""" + return { + 'type': self.type, + 'url': '%s://%s/packages/%s' % ( + self.type, self.name, package_name + ), + } + + def __repr__(self): + return 'Distribution(%s (%s) on %s)' % ( + self.name, + self.type, + self.mirror_uri, + ) + + +class Area(SQLBase): + __tablename__ = 'area' + __table_args__ = ( + UniqueConstraint('distribution_id', 'name'), + ) + + id = Column(Integer, primary_key=True) + distribution_id = Column(Integer, ForeignKey('distribution.id'), + nullable=False) + name = Column(String, nullable=False) + active = Column(Boolean, nullable=False, default=True) + + distribution = relationship('Distribution', back_populates='areas') + + def index_uris(self): + """Get possible URIs for this component's package index""" + if self.distribution.type == 'deb': + compression_exts = ('xz', 'bz2', 'gz', None) + base_uri = '%s/dists/%s/source/Sources' % ( + self.distribution.mirror_uri, + self.name, + ) + for ext in compression_exts: + if ext: + yield (base_uri + '.' + ext, ext) + else: + yield (base_uri, None) + + raise NotImplementedError( + 'Do not know how to build index URI for Distribution type %s' % + self.distribution.type + ) + + def __repr__(self): + return 'Area(%s of %s)' % ( + self.name, + self.distribution.name, + ) + + +class Package(SQLBase): + __tablename__ = 'package' + __table_args__ = ( + UniqueConstraint('area_id', 'name', 'version'), + ) + + id = Column(Integer, primary_key=True) + area_id = Column(Integer, ForeignKey('area.id'), nullable=False) + name = Column(String, nullable=False) + version = Column(String, nullable=False) + directory = Column(String, nullable=False) + files = Column(JSON, nullable=False) + + origin_id = Column(Integer) + task_id = Column(Integer) + + revision_id = Column(LargeBinary(20)) + + area = relationship('Area') + + @property + def distribution(self): + return self.area.distribution + + def fetch_uri(self, filename): + """Get the URI to fetch the `filename` file associated with the + package""" + if self.distribution.type == 'deb': + return '%s/%s/%s' % ( + self.distribution.mirror_uri, + self.directory, + filename, + ) + else: + raise NotImplementedError( + 'Do not know how to build fetch URI for Distribution type %s' % + self.distribution.type + ) + + def loader_dict(self): + ret = { + 'id': self.id, + 'name': self.name, + 'version': self.version, + } + if self.revision_id: + ret['revision_id'] = binascii.hexlify(self.revision_id).decode() + else: + files = { + name: checksums.copy() + for name, checksums in self.files.items() + } + for name in files: + files[name]['uri'] = self.fetch_uri(name) + + ret.update({ + 'revision_id': None, + 'files': files, + }) + return ret + + def __repr__(self): + return 'Package(%s_%s of %s %s)' % ( + self.name, + self.version, + self.distribution.name, + self.area.name, + ) + + +class DistributionSnapshot(SQLBase): + __tablename__ = 'distribution_snapshot' + + id = Column(Integer, primary_key=True) + date = Column(DateTime, nullable=False, index=True) + distribution_id = Column(Integer, + ForeignKey('distribution.id'), + nullable=False) + + distribution = relationship('Distribution') + areas = relationship('AreaSnapshot', back_populates='snapshot') + + def task_for_package(self, package_name, package_versions): + """Return the task dictionary for the given list of package versions""" + origin = self.distribution.origin_for_package( + package_name, package_versions, + ) + + return { + 'policy': 'oneshot', + 'type': 'load-%s-package' % self.distribution.type, + 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), + 'arguments': { + 'args': [], + 'kwargs': { + 'origin': origin, + 'date': self.date.isoformat(), + 'packages': package_versions, + }, + } + } + + def get_packages(self): + packages = defaultdict(dict) + for area_snapshot in self.areas: + area_name = area_snapshot.area.name + for package in area_snapshot.packages: + ref_name = '%s/%s' % (area_name, package.version) + packages[package.name][ref_name] = package.loader_dict() + + return packages + + +area_snapshot_package_assoc = Table( + 'area_snapshot_package', SQLBase.metadata, + Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), + nullable=False), + Column('package_id', Integer, ForeignKey('package.id'), + nullable=False), +) + + +class AreaSnapshot(SQLBase): + __tablename__ = 'area_snapshot' + + id = Column(Integer, primary_key=True) + snapshot_id = Column(Integer, + ForeignKey('distribution_snapshot.id'), + nullable=False) + area_id = Column(Integer, + ForeignKey('area.id'), + nullable=False) + + snapshot = relationship('DistributionSnapshot', back_populates='areas') + area = relationship('Area') + packages = relationship('Package', secondary=area_snapshot_package_assoc) + + +class TempPackage(SQLBase): + __tablename__ = 'temp_package' + __table_args__ = { + 'prefixes': ['TEMPORARY'], + } + + id = Column(Integer, primary_key=True) + area_id = Column(Integer) + name = Column(String) + version = Column(String)