Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9123122
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
View Options
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
index cc1b8f3..5e664f3 100644
--- a/swh/loader/pypi/model.py
+++ b/swh/loader/pypi/model.py
@@ -1,187 +1,188 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import logging
def info(data):
"""Given a dict of data, returns a project subset.
"""
info = data['info']
default = {
'home_page': info['home_page'],
'description': info['description'],
'summary': info['summary'],
'license': info['license'],
'package_url': info['package_url'],
'project_url': info['project_url'],
'upstream': None,
}
project_urls = info.get('project_urls')
if project_urls:
homepage = project_urls.get('Homepage')
if homepage:
default['upstream'] = homepage
return default
def author(data):
"""Given a dict of data, returns an author subset.
"""
name = data['info']['author']
email = data['info']['author_email']
if email:
fullname = '%s <%s>' % (name, email)
else:
fullname = name
if not fullname:
return {'fullname': b'', 'name': None, 'email': None}
return {
'fullname': fullname.encode('utf-8'),
'name': name.encode('utf-8'),
'email': email.encode('utf-8'),
}
class PyPiProject:
"""PyPi project representation
This permits to extract information for the:
- project, either the latest information (from the last revision)
- either the information for a given release
- Symmetrically for the release author information
This also fetches and uncompress the associated release artifacts.
"""
def __init__(self, client, project, project_metadata_url, data=None):
self.client = client
self.project = project
self.project_metadata_url = project_metadata_url
if data:
self.data = data
else:
self.data = client.info(project_metadata_url)
self.last_version = self.data['info']['version']
self.cache = {
self.last_version: self.data
}
def _data(self, release_name=None):
"""Fetch data per release and cache it. Returns the cache retrieved
data if already fetched.
"""
if release_name:
data = self.cache.get(release_name)
if not data:
data = self.client.release(self.project, release_name)
self.cache[release_name] = data
else:
data = self.data
return data
def info(self, release_name=None):
"""Compute release information for release provided or the latest one.
"""
return info(self._data(release_name))
def author(self, release_name=None):
"""Compute author for the provided release if provided (use the latest
release otherwise).
"""
return author(self._data(release_name))
def _parse_release_artifact(self, version, release):
"""Heuristically determine the release artifact to use as a release
file.
Choose amongst package type 'sdist' (source) file
Others are not considered (yet?)
"""
+ if not release:
+ return
sdist = []
# FIXME: there can be multiple 'package_type' here:
# sdist, bdist_egg, bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst
if isinstance(release, list):
if len(release) > 1:
logging.warn('%s %s: Multiple release artifacts (%s)' % (
self.project, version, [
(rel['packagetype'], rel['filename'])
for rel in release]))
sdist = []
# Will try to filter on 'sdist' package type (source code)
for rel in release:
_type = rel['packagetype']
if _type == 'sdist':
sdist.append(rel)
if not sdist:
logging.warn('%s %s: No source artifact found, skipping' % (
self.project, version))
return
if len(sdist) > 1:
logging.warn(
'%s %s: Multiple sdist files detected (%s)!' % (
self.project, version,
','.join([rel['filename'] for rel in sdist])
))
# FIXME: take the first one?
release = release[0]
return release
def releases(self):
"""Fetch metadata and data per release.
This downloads and uncompresses the release artifacts.
Yields:
tuple (version, release)
"""
# The compute information per release
releases_dict = self.data['releases']
for version in releases_dict:
release = releases_dict[version]
if version == self.last_version: # avoid an extra query
release_info = self.info()
else:
release_info = self.info(release_name=version)
+ release = self._parse_release_artifact(version, release)
if not release:
continue
- release = self._parse_release_artifact(version, release)
-
# flatten the metadata to ease reading
_flattenned_release = {
'name': version,
'message': release.get('comment_text', ''),
'sha256': release['digests']['sha256'],
'size': release['size'],
'filename': release['filename'],
'url': release['url'],
'date': release['upload_time'],
}
# fetch and write locally archives
_release = self.client.fetch_release_artifact(
self.project, _flattenned_release)
yield version, {
'info': release_info,
'release': _release,
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 5:05 PM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3444870
Attached To
rDLDPY PyPI loader
Event Timeline
Log In to Comment