diff --git a/swh/loader/debian/loader.py b/swh/loader/debian/loader.py --- a/swh/loader/debian/loader.py +++ b/swh/loader/debian/loader.py @@ -22,9 +22,100 @@ from . import converters +from sqlalchemy import create_engine +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import sessionmaker UPLOADERS_SPLIT = re.compile(r'(?<=\>)\s*,\s*') +db_engine = create_engine('postgresql+psycopg2:///test_lister') +Base = declarative_base(db_engine) + + +class package(Base): + __tablename__ = 'package_repo' + __table_args__ = {'autoload': True} + + +def get_pkg_information(): + """ + Get the list of source packages from the lister table. + + Args: None + + Returns: list of packages dict with the following keys: + html_url: url to .dsc file + name: source package name + version: source package version + + """ + mk_session = sessionmaker(bind=db_engine) + db_session = mk_session() + data = db_session.query(package) + packages = [] + for entries in data: + initial_url = entries.html_url + entries.directory + download_url = initial_url + '/' + entries.package_version + '.dsc' + package_list = {'name': entries.name, + 'version': entries.version, 'html_url': download_url} + packages.append(package_list) + return(packages) + + +class PackageDownloadFailed(Exception): + """Raise this exception when a package Download failed""" + pass + + +def download_src_pkg(package, basedir, log=None): + """Download a Debian source package to the basedir/package_version + + Args: + package: a dict with the following keys: + html_url: url to .dsc file + name: source package name + version: source package version + basedir : the directory where the package gets downloaded + log: a logging.Logger object + + Returns: None + + """ + + dirname = os.path.join(basedir.decode('utf-8'), + '%s_%s' % (package['name'], package['version'])) + os.makedirs(dirname, exist_ok=True) + + intended_dsc = '%s_%s.dsc' % (package['name'], package['version']) + dsc_path = os.path.join(dirname, intended_dsc) + + download_url = package['html_url'] + if log: + log.debug('Download Debian source package %s in %s' % + (download_url, basedir.decode('utf-8')), extra={ + 'swh_type': 'deb_extract', + 'swh_url': download_url, + 'swh_basedir': basedir.decode('utf-8'), + }) + logfile = b''.join([basedir, b'.log']) + os.chdir(dirname) + cmd = ['dget', '-u', '-d', download_url] + package['dsc'] = dsc_path + try: + with open(logfile, 'w') as stdout: + subprocess.check_call(cmd, stdout=stdout, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError: + if log: + data = open(logfile, 'r').read() + log.warn('Downloading Debian package %s failed: %s' % + (download_url, data), + extra={ + 'swh_type': 'deb_download_failed', + 'swh_url': download_url, + 'swh_log': data, + }) + raise PackageDownloadFailed() + class PackageExtractionFailed(Exception): """Raise this exception when a package extraction failed"""