Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
index cc1b8f3..5e664f3 100644
--- a/swh/loader/pypi/model.py
+++ b/swh/loader/pypi/model.py
@@ -1,187 +1,188 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
def info(data):
"""Given a dict of data, returns a project subset.
"""
info = data['info']
default = {
'home_page': info['home_page'],
'description': info['description'],
'summary': info['summary'],
'license': info['license'],
'package_url': info['package_url'],
'project_url': info['project_url'],
'upstream': None,
}
project_urls = info.get('project_urls')
if project_urls:
homepage = project_urls.get('Homepage')
if homepage:
default['upstream'] = homepage
return default
def author(data):
"""Given a dict of data, returns an author subset.
"""
name = data['info']['author']
email = data['info']['author_email']
if email:
fullname = '%s <%s>' % (name, email)
else:
fullname = name
if not fullname:
return {'fullname': b'', 'name': None, 'email': None}
return {
'fullname': fullname.encode('utf-8'),
'name': name.encode('utf-8'),
'email': email.encode('utf-8'),
}
class PyPiProject:
"""PyPi project representation
This permits to extract information for the:
- project, either the latest information (from the last revision)
- either the information for a given release
- Symmetrically for the release author information
This also fetches and uncompress the associated release artifacts.
"""
def __init__(self, client, project, project_metadata_url, data=None):
self.client = client
self.project = project
self.project_metadata_url = project_metadata_url
if data:
self.data = data
else:
self.data = client.info(project_metadata_url)
self.last_version = self.data['info']['version']
self.cache = {
self.last_version: self.data
}
def _data(self, release_name=None):
"""Fetch data per release and cache it. Returns the cache retrieved
data if already fetched.
"""
if release_name:
data = self.cache.get(release_name)
if not data:
data = self.client.release(self.project, release_name)
self.cache[release_name] = data
else:
data = self.data
return data
def info(self, release_name=None):
"""Compute release information for release provided or the latest one.
"""
return info(self._data(release_name))
def author(self, release_name=None):
"""Compute author for the provided release if provided (use the latest
release otherwise).
"""
return author(self._data(release_name))
def _parse_release_artifact(self, version, release):
"""Heuristically determine the release artifact to use as a release
file.
Choose amongst package type 'sdist' (source) file
Others are not considered (yet?)
"""
+ if not release:
+ return
sdist = []
# FIXME: there can be multiple 'package_type' here:
# sdist, bdist_egg, bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst
if isinstance(release, list):
if len(release) > 1:
logging.warn('%s %s: Multiple release artifacts (%s)' % (
self.project, version, [
(rel['packagetype'], rel['filename'])
for rel in release]))
sdist = []
# Will try to filter on 'sdist' package type (source code)
for rel in release:
_type = rel['packagetype']
if _type == 'sdist':
sdist.append(rel)
if not sdist:
logging.warn('%s %s: No source artifact found, skipping' % (
self.project, version))
return
if len(sdist) > 1:
logging.warn(
'%s %s: Multiple sdist files detected (%s)!' % (
self.project, version,
','.join([rel['filename'] for rel in sdist])
))
# FIXME: take the first one?
release = release[0]
return release
def releases(self):
"""Fetch metadata and data per release.
This downloads and uncompresses the release artifacts.
Yields:
tuple (version, release)
"""
# The compute information per release
releases_dict = self.data['releases']
for version in releases_dict:
release = releases_dict[version]
if version == self.last_version: # avoid an extra query
release_info = self.info()
else:
release_info = self.info(release_name=version)
+ release = self._parse_release_artifact(version, release)
if not release:
continue
- release = self._parse_release_artifact(version, release)
-
# flatten the metadata to ease reading
_flattenned_release = {
'name': version,
'message': release.get('comment_text', ''),
'sha256': release['digests']['sha256'],
'size': release['size'],
'filename': release['filename'],
'url': release['url'],
'date': release['upload_time'],
}
# fetch and write locally archives
_release = self.client.fetch_release_artifact(
self.project, _flattenned_release)
yield version, {
'info': release_info,
'release': _release,
}

File Metadata

Mime Type
text/x-diff
Expires
Sat, Jun 21, 5:05 PM (2 w, 11 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3444870

Event Timeline