Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
index a95b925..11b4078 100644
--- a/swh/loader/pypi/model.py
+++ b/swh/loader/pypi/model.py
@@ -1,201 +1,200 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import logging
import shutil
def info(data):
"""Given a dict of data, returns a project subset.
"""
info = data['info']
default = {
'home_page': info['home_page'],
'description': info['description'],
'summary': info['summary'],
'license': info['license'],
'package_url': info['package_url'],
'project_url': info['project_url'],
'upstream': None,
}
project_urls = info.get('project_urls')
if project_urls:
homepage = project_urls.get('Homepage')
if homepage:
default['upstream'] = homepage
return default
def author(data):
"""Given a dict of data, returns an author subset.
"""
name = data['author']
email = data['author_email']
if email:
fullname = '%s <%s>' % (name, email)
else:
fullname = name
if not fullname:
return {'fullname': b'', 'name': None, 'email': None}
return {
'fullname': fullname.encode('utf-8'),
'name': name.encode('utf-8'),
'email': email.encode('utf-8'),
}
class PyPiProject:
"""PyPi project representation
This permits to extract information for the:
- project, either the latest information (from the last revision)
- either the information for a given release
- Symmetrically for the release author information
This also fetches and uncompress the associated release artifacts.
"""
def __init__(self, client, project, project_metadata_url, data=None):
self.client = client
self.project = project
self.project_metadata_url = project_metadata_url
if data:
self.data = data
else:
self.data = client.info(project_metadata_url)
self.last_version = self.data['info']['version']
self.cache = {
self.last_version: self.data
}
def _data(self, release_name=None):
"""Fetch data per release and cache it. Returns the cache retrieved
data if already fetched.
"""
if release_name:
data = self.cache.get(release_name)
if not data:
data = self.client.release(self.project, release_name)
self.cache[release_name] = data
else:
data = self.data
return data
def info(self, release_name=None):
"""Compute release information for release provided or the latest one.
"""
return info(self._data(release_name))
def _filter_releases(self, version, release):
"""Filter release to keep only sdist (source distribution)
There can be multiple 'package_type' (sdist, bdist_egg,
bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst, ...), we are
only interested in source distribution (sdist), others bdist*
are binary
Args:
version (str): Release name or version
release (dict): Full release object
"""
if not release:
return []
if not isinstance(release, list):
release = [release]
# Filter only on 'sdist' package type
return [rel for rel in release if rel['packagetype'] == 'sdist']
def _cleanup_release_artifacts(self, archive_path, directory_path):
"""Clean intermediary files which no longer needs to be present.
"""
if directory_path and os.path.exists(directory_path):
logging.debug('Clean up uncompressed archive path %s' % (
directory_path, ))
shutil.rmtree(directory_path)
if archive_path and os.path.exists(archive_path):
logging.debug('Clean up archive %s' % archive_path)
os.unlink(archive_path)
def _fetch_and_uncompress_releases(self, version, releases):
"""Fetch an uncompress sdist releases
Args:
version (str): Release name or version
releases ([dict]): List of source distribution release artifacts
Yields:
tuple (release, filepath, uncompressed_path)
"""
for release in releases:
# flatten the metadata to ease reading
_flattenned_release = {
'name': version,
'message': release.get('comment_text', ''),
'sha256': release['digests']['sha256'],
'size': release['size'],
'filename': release['filename'],
'url': release['url'],
'date': release['upload_time'],
}
# fetch and write locally archives
yield self.client.fetch_release_artifact(
self.project, _flattenned_release)
def releases(self):
"""Fetch metadata and data per release.
This:
- downloads and uncompresses the release artifacts.
- yields the (version, release)
- Clean up the intermediary fetched artifact files
Yields:
tuple (version, release_info, release, uncompressed_path) where:
- release_info (dict): release's associated version info
- author (dict): Author information for the release
- release (dict): release metadata
- uncompressed_path (str): Path to uncompressed artifact
"""
# The compute information per release
releases_dict = self.data['releases']
for version, releases in releases_dict.items():
releases = self._filter_releases(version, releases)
if not releases:
logging.warn('%s %s: No source artifact found, skipping' % (
self.project, version))
continue
_releases = self._fetch_and_uncompress_releases(version, releases)
for _release, _archive, _dir_path, _pkginfo in _releases:
_release_info = _pkginfo
if not _release_info: # fallback to pypi api metadata
msg = 'No PKG-INFO detected for %s, %s, skipping' % (
self.project, _release['name'])
logging.warn(msg)
_release_info = self.info(release_name=version)
_author = author(_release_info['info'])
else:
_author = author(_release_info)
- logging.warn('release info: %s' % _release_info)
yield _release_info, _author, _release, _dir_path
self._cleanup_release_artifacts(_archive, _dir_path)

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 1:25 PM (6 d, 4 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3452576

Event Timeline