diff --git a/PKG-INFO b/PKG-INFO index 7c7e23828..6bc70ef7e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 2.1 Name: swh.storage -Version: 0.0.91 +Version: 0.0.92 Summary: Software Heritage storage manager Home-page: https://forge.softwareheritage.org/diffusion/DSTO/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Provides-Extra: listener Provides-Extra: schemata diff --git a/debian/control b/debian/control index d1e149e06..5db6cda27 100644 --- a/debian/control +++ b/debian/control @@ -1,58 +1,58 @@ Source: swh-storage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python (>= 2), python3-aiohttp, python3-all, python3-click, python3-dateutil, python3-flask, python3-kafka, python3-nose, python3-psycopg2, python3-requests, python3-setuptools, - python3-sqlalchemy, + python3-sqlalchemy (>= 1.0), python3-swh.core (>= 0.0.28~), python3-swh.model (>= 0.0.18~), python3-swh.objstorage (>= 0.0.17~), python3-swh.scheduler (>= 0.0.14~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DSTO/ Package: python3-swh.storage Architecture: all Depends: python3-swh.core (>= 0.0.28~), python3-swh.model (>= 0.0.18~), python3-swh.objstorage (>= 0.0.17~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage utilities Package: python3-swh.storage.listener Architecture: all Depends: python3-kafka (>= 1.3.1~), python3-swh.journal (>= 0.0.2~), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage listener Package: python3-swh.storage.provenance Architecture: all Depends: python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage Provenance Package: python3-swh.storage.schemata Architecture: all -Depends: python3-sqlalchemy, +Depends: python3-sqlalchemy (>= 1.0), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, ${python3:Depends} Description: Ancillary schemata for Software Heritage diff --git a/swh.storage.egg-info/PKG-INFO b/swh.storage.egg-info/PKG-INFO index 7c7e23828..6bc70ef7e 100644 --- a/swh.storage.egg-info/PKG-INFO +++ b/swh.storage.egg-info/PKG-INFO @@ -1,12 +1,12 @@ Metadata-Version: 2.1 Name: swh.storage -Version: 0.0.91 +Version: 0.0.92 Summary: Software Heritage storage manager Home-page: https://forge.softwareheritage.org/diffusion/DSTO/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Provides-Extra: listener Provides-Extra: schemata diff --git a/swh/storage/schemata/distribution.py b/swh/storage/schemata/distribution.py index 6ef86ba4b..76aa0145d 100644 --- a/swh/storage/schemata/distribution.py +++ b/swh/storage/schemata/distribution.py @@ -1,244 +1,250 @@ # Copyright (C) 2017 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import binascii from collections import defaultdict import datetime from sqlalchemy import ( Boolean, Column, DateTime, Enum, ForeignKey, Integer, - JSON, LargeBinary, String, Table, UniqueConstraint, ) + +try: + from sqlalchemy import JSON +except ImportError: + # SQLAlchemy < 1.1 + from sqlalchemy.dialects.postgresql import JSONB as JSON + from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import relationship SQLBase = declarative_base() class Distribution(SQLBase): """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" __tablename__ = 'distribution' id = Column(Integer, primary_key=True) name = Column(String, unique=True, nullable=False) type = Column(Enum('deb', 'rpm', name='distribution_types'), nullable=False) mirror_uri = Column(String, nullable=False) areas = relationship('Area', back_populates='distribution') def origin_for_package(self, package_name, package_versions): """Return the origin dictionary for the given package""" return { 'type': self.type, 'url': '%s://%s/packages/%s' % ( self.type, self.name, package_name ), } def __repr__(self): return 'Distribution(%s (%s) on %s)' % ( self.name, self.type, self.mirror_uri, ) class Area(SQLBase): __tablename__ = 'area' __table_args__ = ( UniqueConstraint('distribution_id', 'name'), ) id = Column(Integer, primary_key=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) name = Column(String, nullable=False) active = Column(Boolean, nullable=False, default=True) distribution = relationship('Distribution', back_populates='areas') def index_uris(self): """Get possible URIs for this component's package index""" if self.distribution.type == 'deb': compression_exts = ('xz', 'bz2', 'gz', None) base_uri = '%s/dists/%s/source/Sources' % ( self.distribution.mirror_uri, self.name, ) for ext in compression_exts: if ext: yield (base_uri + '.' + ext, ext) else: yield (base_uri, None) raise NotImplementedError( 'Do not know how to build index URI for Distribution type %s' % self.distribution.type ) def __repr__(self): return 'Area(%s of %s)' % ( self.name, self.distribution.name, ) class Package(SQLBase): __tablename__ = 'package' __table_args__ = ( UniqueConstraint('area_id', 'name', 'version'), ) id = Column(Integer, primary_key=True) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) name = Column(String, nullable=False) version = Column(String, nullable=False) directory = Column(String, nullable=False) files = Column(JSON, nullable=False) origin_id = Column(Integer) task_id = Column(Integer) revision_id = Column(LargeBinary(20)) area = relationship('Area') @property def distribution(self): return self.area.distribution def fetch_uri(self, filename): """Get the URI to fetch the `filename` file associated with the package""" if self.distribution.type == 'deb': return '%s/%s/%s' % ( self.distribution.mirror_uri, self.directory, filename, ) else: raise NotImplementedError( 'Do not know how to build fetch URI for Distribution type %s' % self.distribution.type ) def loader_dict(self): ret = { 'id': self.id, 'name': self.name, 'version': self.version, } if self.revision_id: ret['revision_id'] = binascii.hexlify(self.revision_id).decode() else: files = { name: checksums.copy() for name, checksums in self.files.items() } for name in files: files[name]['uri'] = self.fetch_uri(name) ret.update({ 'revision_id': None, 'files': files, }) return ret def __repr__(self): return 'Package(%s_%s of %s %s)' % ( self.name, self.version, self.distribution.name, self.area.name, ) class DistributionSnapshot(SQLBase): __tablename__ = 'distribution_snapshot' id = Column(Integer, primary_key=True) date = Column(DateTime, nullable=False, index=True) distribution_id = Column(Integer, ForeignKey('distribution.id'), nullable=False) distribution = relationship('Distribution') areas = relationship('AreaSnapshot', back_populates='snapshot') def task_for_package(self, package_name, package_versions): """Return the task dictionary for the given list of package versions""" origin = self.distribution.origin_for_package( package_name, package_versions, ) return { 'policy': 'oneshot', 'type': 'load-%s-package' % self.distribution.type, 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), 'arguments': { 'args': [], 'kwargs': { 'origin': origin, 'date': self.date.isoformat(), 'packages': package_versions, }, } } def get_packages(self): packages = defaultdict(dict) for area_snapshot in self.areas: area_name = area_snapshot.area.name for package in area_snapshot.packages: ref_name = '%s/%s' % (area_name, package.version) packages[package.name][ref_name] = package.loader_dict() return packages area_snapshot_package_assoc = Table( 'area_snapshot_package', SQLBase.metadata, Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), nullable=False), Column('package_id', Integer, ForeignKey('package.id'), nullable=False), ) class AreaSnapshot(SQLBase): __tablename__ = 'area_snapshot' id = Column(Integer, primary_key=True) snapshot_id = Column(Integer, ForeignKey('distribution_snapshot.id'), nullable=False) area_id = Column(Integer, ForeignKey('area.id'), nullable=False) snapshot = relationship('DistributionSnapshot', back_populates='areas') area = relationship('Area') packages = relationship('Package', secondary=area_snapshot_package_assoc) class TempPackage(SQLBase): __tablename__ = 'temp_package' __table_args__ = { 'prefixes': ['TEMPORARY'], } id = Column(Integer, primary_key=True) area_id = Column(Integer) name = Column(String) version = Column(String) diff --git a/version.txt b/version.txt index c85290328..77478a716 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.91-0-g91e1e5e \ No newline at end of file +v0.0.92-0-gd35e739 \ No newline at end of file