diff --git a/debian/control b/debian/control index 5c5c8a69..36d33b32 100644 --- a/debian/control +++ b/debian/control @@ -1,48 +1,57 @@ Source: swh-storage Maintainer: Software Heritage developers Section: python Priority: optional Build-Depends: debhelper (>= 9), dh-python, python3-aiohttp, python3-all, python3-click, python3-dateutil, python3-flask, python3-nose, python3-psycopg2, python3-requests, python3-setuptools, + python3-sqlalchemy, python3-swh.core (>= 0.0.28~), python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.17~), python3-swh.scheduler (>= 0.0.14~), python3-vcversioner Standards-Version: 3.9.6 Homepage: https://forge.softwareheritage.org/diffusion/DSTO/ Package: python3-swh.storage Architecture: all Depends: python3-swh.core (>= 0.0.28~), python3-swh.model (>= 0.0.15~), python3-swh.objstorage (>= 0.0.17~), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage utilities Package: python3-swh.storage.listener Architecture: all Depends: python3-kafka (>= 1.3.1~), python3-swh.journal (>= 0.0.2~), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage listener Package: python3-swh.storage.provenance Architecture: all Depends: python3-swh.scheduler (>= 0.0.14~), python3-swh.storage (= ${binary:Version}), ${misc:Depends}, ${python3:Depends} Description: Software Heritage storage Provenance + +Package: python3-swh.storage.schemata +Architecture: all +Depends: python3-sqlalchemy, + python3-swh.storage (= ${binary:Version}), + ${misc:Depends}, + ${python3:Depends} +Description: Ancillary schemata for Software Heritage diff --git a/debian/rules b/debian/rules index cf9189bd..e3eddb52 100755 --- a/debian/rules +++ b/debian/rules @@ -1,23 +1,26 @@ #!/usr/bin/make -f export PYBUILD_NAME=swh.storage %: dh $@ --with python3 --buildsystem=pybuild override_dh_install: dh_install for pyvers in $(shell py3versions -vr); do \ mkdir -p $(CURDIR)/debian/python3-swh.storage.listener/usr/lib/python$$pyvers/dist-packages/swh/storage/ ; \ mv $(CURDIR)/debian/python3-swh.storage/usr/lib/python$$pyvers/dist-packages/swh/storage/listener.py \ $(CURDIR)/debian/python3-swh.storage.listener/usr/lib/python$$pyvers/dist-packages/swh/storage/ ; \ mkdir -p $(CURDIR)/debian/python3-swh.storage.provenance/usr/lib/python$$pyvers/dist-packages/swh/storage/provenance ; \ mv $(CURDIR)/debian/python3-swh.storage/usr/lib/python$$pyvers/dist-packages/swh/storage/provenance/* \ - $(CURDIR)/debian/python3-swh.storage.provenance/usr/lib/python$$pyvers/dist-packages/swh/storage/provenance/ ; \ + $(CURDIR)/debian/python3-swh.storage.provenance/usr/lib/python$$pyvers/dist-packages/swh/storage/provenance/ ; \ + mkdir -p $(CURDIR)/debian/python3-swh.storage.schemata/usr/lib/python$$pyvers/dist-packages/swh/storage/ ; \ + mv $(CURDIR)/debian/python3-swh.storage/usr/lib/python$$pyvers/dist-packages/swh/storage/schemata \ + $(CURDIR)/debian/python3-swh.storage.schemata/usr/lib/python$$pyvers/dist-packages/swh/storage/ ; \ done override_dh_auto_test: PYBUILD_SYSTEM=custom \ PYBUILD_TEST_ARGS="cd {build_dir}; python{version} -m nose swh -sva '!db'" \ dh_auto_test diff --git a/setup.py b/setup.py index 4ffd85e1..090b2b1e 100755 --- a/setup.py +++ b/setup.py @@ -1,37 +1,41 @@ #!/usr/bin/env python3 from setuptools import setup def parse_requirements(): requirements = [] for reqf in ('requirements.txt', 'requirements-swh.txt'): with open(reqf) as f: for line in f.readlines(): line = line.strip() if not line or line.startswith('#'): continue requirements.append(line) return requirements setup( name='swh.storage', description='Software Heritage storage manager', author='Software Heritage developers', author_email='swh-devel@inria.fr', url='https://forge.softwareheritage.org/diffusion/DSTO/', packages=[ 'swh.storage', 'swh.storage.api', 'swh.storage.provenance', + 'swh.storage.schemata', 'swh.storage.tests', ], scripts=[ 'bin/swh-storage-add-dir', ], install_requires=parse_requirements(), + extras_require={ + 'schemata': ['SQLAlchemy'], + }, setup_requires=['vcversioner'], vcversioner={}, include_package_data=True, ) diff --git a/swh/storage/schemata/distribution.py b/swh/storage/schemata/distribution.py new file mode 100644 index 00000000..90b4d4f5 --- /dev/null +++ b/swh/storage/schemata/distribution.py @@ -0,0 +1,227 @@ +# Copyright (C) 2017 the Software Heritage developers +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import binascii +import datetime + +from sqlalchemy import ( + Boolean, + Column, + DateTime, + Enum, + ForeignKey, + Integer, + JSON, + LargeBinary, + String, + Table, + UniqueConstraint, +) +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import relationship + +SQLBase = declarative_base() + + +class Distribution(SQLBase): + """A distribution (e.g. Debian, Ubuntu, Fedora, ...)""" + __tablename__ = 'distribution' + + id = Column(Integer, primary_key=True) + name = Column(String, unique=True, nullable=False) + type = Column(Enum('deb', 'rpm', name='distribution_types'), + nullable=False) + mirror_uri = Column(String, nullable=False) + + areas = relationship('Area', back_populates='distribution') + + def origin_for_package(self, package_name, package_versions): + """Return the origin dictionary for the given package""" + return { + 'type': self.type, + 'url': '%s://%s/packages/%s' % ( + self.type, self.name, package_name + ), + } + + def __repr__(self): + return 'Distribution(%s (%s) on %s)' % ( + self.name, + self.type, + self.mirror_uri, + ) + + +class Area(SQLBase): + __tablename__ = 'area' + __table_args__ = ( + UniqueConstraint('distribution_id', 'name'), + ) + + id = Column(Integer, primary_key=True) + distribution_id = Column(Integer, ForeignKey('distribution.id'), + nullable=False) + name = Column(String, nullable=False) + active = Column(Boolean, nullable=False, default=True) + + distribution = relationship('Distribution', back_populates='areas') + + def index_uris(self): + """Get possible URIs for this component's package index""" + if self.distribution.type == 'deb': + compression_exts = ('xz', 'bz2', 'gz', None) + base_uri = '%s/dists/%s/source/Sources' % ( + self.distribution.mirror_uri, + self.name, + ) + for ext in compression_exts: + if ext: + yield (base_uri + '.' + ext, ext) + else: + yield (base_uri, None) + + raise NotImplementedError( + 'Do not know how to build index URI for Distribution type %s' % + self.distribution.type + ) + + def __repr__(self): + return 'Area(%s of %s)' % ( + self.name, + self.distribution.name, + ) + + +class Package(SQLBase): + __tablename__ = 'package' + __table_args__ = ( + UniqueConstraint('area_id', 'name', 'version'), + ) + + id = Column(Integer, primary_key=True) + area_id = Column(Integer, ForeignKey('area.id'), nullable=False) + name = Column(String, nullable=False) + version = Column(String, nullable=False) + directory = Column(String, nullable=False) + files = Column(JSON, nullable=False) + + origin_id = Column(Integer) + task_id = Column(Integer) + + revision_id = Column(LargeBinary(20)) + + area = relationship('Area') + + @property + def distribution(self): + return self.area.distribution + + def fetch_uris(self): + """Get the URIs to fetch the files associated with the package""" + if self.distribution.type == 'deb': + for file in self.files: + yield '%s/%s/%s' % ( + self.distribution.mirror_uri, + self.directory, + file, + ) + else: + raise NotImplementedError( + 'Do not know how to build fetch URI for Distribution type %s' % + self.distribution.type + ) + + def loader_dict(self): + ret = { + 'id': self.id, + 'name': self.name, + 'version': self.version, + } + if self.revision_id: + ret['revision_id'] = binascii.hexlify(self.revision_id).decode() + else: + ret.update({ + 'revision_id': None, + 'files': self.files, + 'fetch_uris': list(self.fetch_uris()), + }) + return ret + + def __repr__(self): + return 'Package(%s_%s of %s %s)' % ( + self.name, + self.version, + self.distribution.name, + self.area.name, + ) + + +class DistributionSnapshot(SQLBase): + __tablename__ = 'distribution_snapshot' + + id = Column(Integer, primary_key=True) + date = Column(DateTime, nullable=False, index=True) + distribution_id = Column(Integer, + ForeignKey('distribution.id'), + nullable=False) + + distribution = relationship('Distribution') + areas = relationship('AreaSnapshot', back_populates='snapshot') + + def task_for_package(self, package_name, package_versions): + """Return the task dictionary for the given list of package versions""" + origin = self.distribution.origin_for_package( + package_name, package_versions, + ) + + return { + 'policy': 'oneshot', + 'type': 'load-%s-package' % self.distribution.type, + 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), + 'arguments': { + 'args': [], + 'kwargs': { + 'origin': origin, + 'date': self.date.isoformat(), + 'packages': package_versions, + }, + } + } + + +area_snapshot_package_assoc = Table( + 'area_snapshot_package', SQLBase.metadata, + Column('area_snapshot_id', Integer, ForeignKey('area_snapshot.id'), + nullable=False), + Column('package_id', Integer, ForeignKey('package.id'), + nullable=False), +) + + +class AreaSnapshot(SQLBase): + __tablename__ = 'area_snapshot' + + id = Column(Integer, primary_key=True) + snapshot_id = Column(Integer, + ForeignKey('distribution_snapshot.id'), + nullable=False) + area_id = Column(Integer, + ForeignKey('area.id'), + nullable=False) + + snapshot = relationship('DistributionSnapshot', back_populates='areas') + area = relationship('Area') + packages = relationship('Package', secondary=area_snapshot_package_assoc) + + +class TempPackage(SQLBase): + __tablename__ = 'temp_package' + __table_args__ = { + 'prefixes': ['TEMPORARY'], + } + + id = Column(Integer, primary_key=True) + area_id = Column(Integer) + name = Column(String) + version = Column(String)