diff --git a/swh/storage/schemata/distribution.py b/swh/storage/schemata/distribution.py index 90b4d4f5..6ef86ba4 100644 --- a/swh/storage/schemata/distribution.py +++ b/swh/storage/schemata/distribution.py @@ -1,227 +1,244 @@ # Copyright (C) 2017 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import binascii +from collections import defaultdict import datetime from sqlalchemy import ( Boolean, Column, DateTime, Enum, ForeignKey, Integer, JSON, LargeBinary, String, Table, UniqueConstraint, ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship SQLBase = declarative_base() class Distribution(SQLBase): """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" __tablename__ = 'distribution' id = Column(Integer, primary_key=True) name = Column(String, unique=True, nullable=False) type = Column(Enum('deb', 'rpm', name='distribution_types'), nullable=False) mirror_uri = Column(String, nullable=False) areas = relationship('Area', back_populates='distribution') def origin_for_package(self, package_name, package_versions): """Return the origin dictionary for the given package""" return { 'type': self.type, 'url': '%s://%s/packages/%s' % ( self.type, self.name, package_name ), } def __repr__(self): return 'Distribution(%s (%s) on %s)' % ( self.name, self.type, self.mirror_uri, ) class Area(SQLBase): __tablename__ = 'area' __table_args__ = ( UniqueConstraint('distribution_id', 'name'), ) id = Column(Integer, primary_key=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) name = Column(String, nullable=False) active = Column(Boolean, nullable=False, default=True) distribution = relationship('Distribution', back_populates='areas') def index_uris(self): """Get possible URIs for this component's package index""" if self.distribution.type == 'deb': compression_exts = ('xz', 'bz2', 'gz', None) base_uri = '%s/dists/%s/source/Sources' % ( self.distribution.mirror_uri, self.name, ) for ext in compression_exts: if ext: yield (base_uri + '.' + ext, ext) else: yield (base_uri, None) raise NotImplementedError( 'Do not know how to build index URI for Distribution type %s' % self.distribution.type ) def __repr__(self): return 'Area(%s of %s)' % ( self.name, self.distribution.name, ) class Package(SQLBase): __tablename__ = 'package' __table_args__ = ( UniqueConstraint('area_id', 'name', 'version'), ) id = Column(Integer, primary_key=True) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) name = Column(String, nullable=False) version = Column(String, nullable=False) directory = Column(String, nullable=False) files = Column(JSON, nullable=False) origin_id = Column(Integer) task_id = Column(Integer) revision_id = Column(LargeBinary(20)) area = relationship('Area') @property def distribution(self): return self.area.distribution - def fetch_uris(self): - """Get the URIs to fetch the files associated with the package""" + def fetch_uri(self, filename): + """Get the URI to fetch the `filename` file associated with the + package""" if self.distribution.type == 'deb': - for file in self.files: - yield '%s/%s/%s' % ( - self.distribution.mirror_uri, - self.directory, - file, - ) + return '%s/%s/%s' % ( + self.distribution.mirror_uri, + self.directory, + filename, + ) else: raise NotImplementedError( 'Do not know how to build fetch URI for Distribution type %s' % self.distribution.type ) def loader_dict(self): ret = { 'id': self.id, 'name': self.name, 'version': self.version, } if self.revision_id: ret['revision_id'] = binascii.hexlify(self.revision_id).decode() else: + files = { + name: checksums.copy() + for name, checksums in self.files.items() + } + for name in files: + files[name]['uri'] = self.fetch_uri(name) + ret.update({ 'revision_id': None, - 'files': self.files, - 'fetch_uris': list(self.fetch_uris()), + 'files': files, }) return ret def __repr__(self): return 'Package(%s_%s of %s %s)' % ( self.name, self.version, self.distribution.name, self.area.name, ) class DistributionSnapshot(SQLBase): __tablename__ = 'distribution_snapshot' id = Column(Integer, primary_key=True) date = Column(DateTime, nullable=False, index=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) distribution = relationship('Distribution') areas = relationship('AreaSnapshot', back_populates='snapshot') def task_for_package(self, package_name, package_versions): """Return the task dictionary for the given list of package versions""" origin = self.distribution.origin_for_package( package_name, package_versions, ) return { 'policy': 'oneshot', 'type': 'load-%s-package' % self.distribution.type, 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), 'arguments': { 'args': [], 'kwargs': { 'origin': origin, 'date': self.date.isoformat(), 'packages': package_versions, }, } } + def get_packages(self): + packages = defaultdict(dict) + for area_snapshot in self.areas: + area_name = area_snapshot.area.name + for package in area_snapshot.packages: + ref_name = '%s/%s' % (area_name, package.version) + packages[package.name][ref_name] = package.loader_dict() + + return packages + area_snapshot_package_assoc = Table( 'area_snapshot_package', SQLBase.metadata, Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), nullable=False), Column('package_id', Integer, ForeignKey('package.id'), nullable=False), ) class AreaSnapshot(SQLBase): __tablename__ = 'area_snapshot' id = Column(Integer, primary_key=True) snapshot_id = Column(Integer, ForeignKey('distribution_snapshot.id'), nullable=False) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) snapshot = relationship('DistributionSnapshot', back_populates='areas') area = relationship('Area') packages = relationship('Package', secondary=area_snapshot_package_assoc) class TempPackage(SQLBase): __tablename__ = 'temp_package' __table_args__ = { 'prefixes': ['TEMPORARY'], } id = Column(Integer, primary_key=True) area_id = Column(Integer) name = Column(String) version = Column(String)