Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9125564
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
7 KB
Subscribers
None
View Options
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
index 2ee8e06..96ec9c6 100644
--- a/swh/loader/pypi/loader.py
+++ b/swh/loader/pypi/loader.py
@@ -1,183 +1,179 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import arrow
import logging
import os
import shutil
from swh.loader.core.utils import clean_dangling_folders
from swh.loader.core.loader import SWHLoader
from swh.model.from_disk import Directory
from swh.model.identifiers import (
release_identifier, revision_identifier, snapshot_identifier,
identifier_to_bytes, normalize_timestamp
)
from .client import PyPiClient
from .model import PyPiProject
TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
class PyPiLoader(SWHLoader):
CONFIG_BASE_FILENAME = 'loader/pypi'
ADDITIONAL_CONFIG = {
'temp_directory': ('str', '/tmp/swh.loader.pypi/'),
'cache': ('bool', False),
'cache_dir': ('str', ''),
'debug': ('bool', False), # NOT FOR PRODUCTION
}
def __init__(self):
super().__init__(logging_class='swh.loader.pypi.PyPiLoader')
self.origin_id = None
self.temp_directory = self.config['temp_directory']
self.pypi_client = PyPiClient(
temp_directory=self.temp_directory,
cache=self.config['cache'],
cache_dir=self.config['cache_dir'])
self.debug = self.config['debug']
def pre_cleanup(self):
"""(override) To prevent disk explosion if some other workers exploded
in mid-air (OOM killed), we try and clean up dangling files.
"""
clean_dangling_folders(self.temp_directory,
pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
log=self.log)
def cleanup(self):
"""(override) Clean up temporary disk use
"""
if self.debug:
self.log.warn('** DEBUG MODE ** Will not clean up temp dir %s' % (
self.temp_directory
))
return
if os.path.exists(self.temp_directory):
self.log.debug('Clean up %s' % self.temp_directory)
shutil.rmtree(self.temp_directory)
def prepare_origin_visit(self, project_name, origin_url,
origin_metadata_url=None):
"""(override) Prepare the origin visit information
Args:
project_name (str): Project's simple name
origin_url (str): Project's main url
origin_metadata_url (str): Project's metadata url
"""
self.origin = {
'url': origin_url,
'type': 'pypi',
}
self.visit_date = None # loader core will populate it
def prepare(self, project_name, origin_url,
origin_metadata_url=None):
"""(override) Keep reference to the origin url (project) and the
project metadata url
Args:
project_name (str): Project's simple name
origin_url (str): Project's main url
origin_metadata_url (str): Project's metadata url
"""
self.project_name = project_name
self.origin_url = origin_url
self.origin_metadata_url = origin_metadata_url
self.project = PyPiProject(self.pypi_client, self.project_name,
self.origin_metadata_url)
def fetch_data(self):
- """(override) This will fetch and prepare the needed releases.
+ """(override) Fetch and collect swh objects.
"""
- self.pypi_releases = self.project.releases()
-
- def store_data(self):
- """(override) This collects the necessary objects information and send
- them to storage.
-
- """
- _snapshot = {
+ self._snapshot = {
'branches': {}
}
+ self._contents = []
+ self._directories = []
+ self._revisions = []
+ self._releases = []
- _contents = []
- _directories = []
- _revisions = []
- _releases = []
-
- for version, _release in self.pypi_releases:
+ for version, _release in self.project.releases():
info = self.project.info(version)
author = self.project.author(version)
logging.debug('author: %s' % author)
release = _release['release']
_dir_path = release.pop('directory')
_dir_path = _dir_path.encode('utf-8')
directory = Directory.from_disk(path=_dir_path, data=True)
_objects = directory.collect()
- _contents.extend(_objects['content'].values())
- _directories.extend(_objects['directory'].values())
+ self._contents.extend(_objects['content'].values())
+ self._directories.extend(_objects['directory'].values())
date = normalize_timestamp(
int(arrow.get(release['date']).timestamp))
name = release['name'].encode('utf-8')
message = release['message'].encode('utf-8')
_revision = {
'synthetic': True,
'metadata': {
'original_artifact': [release],
'project': info,
},
'author': author,
'date': date,
'committer': author,
'committer_date': date,
'name': name,
'message': message,
'directory': directory.hash,
'parents': [],
'type': 'tar',
}
_revision['id'] = identifier_to_bytes(
revision_identifier(_revision))
- _revisions.append(_revision)
+ self._revisions.append(_revision)
_release = {
'name': name,
'author': author,
'date': date,
'message': message,
'target_type': 'revision',
'target': _revision['id'],
'synthetic': False,
}
_release['id'] = identifier_to_bytes(
release_identifier(_release))
- _releases.append(_release)
+ self._releases.append(_release)
- _snapshot['branches'][name] = {
+ self._snapshot['branches'][name] = {
'target': _release['id'],
'target_type': 'release',
}
- _snapshot['id'] = identifier_to_bytes(
- snapshot_identifier(_snapshot))
+ self._snapshot['id'] = identifier_to_bytes(
+ snapshot_identifier(self._snapshot))
+
+ def store_data(self):
+ """(override) This sends collected objects to storage.
- self.maybe_load_contents(_contents)
- self.maybe_load_directories(_directories)
- self.maybe_load_revisions(_revisions)
- self.maybe_load_releases(_releases)
- self.maybe_load_snapshot(_snapshot)
+ """
+ self.maybe_load_contents(self._contents)
+ self.maybe_load_directories(self._directories)
+ self.maybe_load_revisions(self._revisions)
+ self.maybe_load_releases(self._releases)
+ self.maybe_load_snapshot(self._snapshot)
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sat, Jun 21, 9:04 PM (4 w, 17 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3342169
Attached To
rDLDPY PyPI loader
Event Timeline
Log In to Comment