Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9343303
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
View Options
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
index a95b925..11b4078 100644
--- a/swh/loader/pypi/model.py
+++ b/swh/loader/pypi/model.py
@@ -1,201 +1,200 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import logging
import shutil
def info(data):
"""Given a dict of data, returns a project subset.
"""
info = data['info']
default = {
'home_page': info['home_page'],
'description': info['description'],
'summary': info['summary'],
'license': info['license'],
'package_url': info['package_url'],
'project_url': info['project_url'],
'upstream': None,
}
project_urls = info.get('project_urls')
if project_urls:
homepage = project_urls.get('Homepage')
if homepage:
default['upstream'] = homepage
return default
def author(data):
"""Given a dict of data, returns an author subset.
"""
name = data['author']
email = data['author_email']
if email:
fullname = '%s <%s>' % (name, email)
else:
fullname = name
if not fullname:
return {'fullname': b'', 'name': None, 'email': None}
return {
'fullname': fullname.encode('utf-8'),
'name': name.encode('utf-8'),
'email': email.encode('utf-8'),
}
class PyPiProject:
"""PyPi project representation
This permits to extract information for the:
- project, either the latest information (from the last revision)
- either the information for a given release
- Symmetrically for the release author information
This also fetches and uncompress the associated release artifacts.
"""
def __init__(self, client, project, project_metadata_url, data=None):
self.client = client
self.project = project
self.project_metadata_url = project_metadata_url
if data:
self.data = data
else:
self.data = client.info(project_metadata_url)
self.last_version = self.data['info']['version']
self.cache = {
self.last_version: self.data
}
def _data(self, release_name=None):
"""Fetch data per release and cache it. Returns the cache retrieved
data if already fetched.
"""
if release_name:
data = self.cache.get(release_name)
if not data:
data = self.client.release(self.project, release_name)
self.cache[release_name] = data
else:
data = self.data
return data
def info(self, release_name=None):
"""Compute release information for release provided or the latest one.
"""
return info(self._data(release_name))
def _filter_releases(self, version, release):
"""Filter release to keep only sdist (source distribution)
There can be multiple 'package_type' (sdist, bdist_egg,
bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst, ...), we are
only interested in source distribution (sdist), others bdist*
are binary
Args:
version (str): Release name or version
release (dict): Full release object
"""
if not release:
return []
if not isinstance(release, list):
release = [release]
# Filter only on 'sdist' package type
return [rel for rel in release if rel['packagetype'] == 'sdist']
def _cleanup_release_artifacts(self, archive_path, directory_path):
"""Clean intermediary files which no longer needs to be present.
"""
if directory_path and os.path.exists(directory_path):
logging.debug('Clean up uncompressed archive path %s' % (
directory_path, ))
shutil.rmtree(directory_path)
if archive_path and os.path.exists(archive_path):
logging.debug('Clean up archive %s' % archive_path)
os.unlink(archive_path)
def _fetch_and_uncompress_releases(self, version, releases):
"""Fetch an uncompress sdist releases
Args:
version (str): Release name or version
releases ([dict]): List of source distribution release artifacts
Yields:
tuple (release, filepath, uncompressed_path)
"""
for release in releases:
# flatten the metadata to ease reading
_flattenned_release = {
'name': version,
'message': release.get('comment_text', ''),
'sha256': release['digests']['sha256'],
'size': release['size'],
'filename': release['filename'],
'url': release['url'],
'date': release['upload_time'],
}
# fetch and write locally archives
yield self.client.fetch_release_artifact(
self.project, _flattenned_release)
def releases(self):
"""Fetch metadata and data per release.
This:
- downloads and uncompresses the release artifacts.
- yields the (version, release)
- Clean up the intermediary fetched artifact files
Yields:
tuple (version, release_info, release, uncompressed_path) where:
- release_info (dict): release's associated version info
- author (dict): Author information for the release
- release (dict): release metadata
- uncompressed_path (str): Path to uncompressed artifact
"""
# The compute information per release
releases_dict = self.data['releases']
for version, releases in releases_dict.items():
releases = self._filter_releases(version, releases)
if not releases:
logging.warn('%s %s: No source artifact found, skipping' % (
self.project, version))
continue
_releases = self._fetch_and_uncompress_releases(version, releases)
for _release, _archive, _dir_path, _pkginfo in _releases:
_release_info = _pkginfo
if not _release_info: # fallback to pypi api metadata
msg = 'No PKG-INFO detected for %s, %s, skipping' % (
self.project, _release['name'])
logging.warn(msg)
_release_info = self.info(release_name=version)
_author = author(_release_info['info'])
else:
_author = author(_release_info)
- logging.warn('release info: %s' % _release_info)
yield _release_info, _author, _release, _dir_path
self._cleanup_release_artifacts(_archive, _dir_path)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 1:25 PM (6 d, 8 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3452576
Attached To
rDLDPY PyPI loader
Event Timeline
Log In to Comment