Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7066374
D408.id1272.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
54 KB
Subscribers
None
D408.id1272.diff
View Options
diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
swh-loader-pypi
====================
-SWH PyPi loader's source code repository
+SWH PyPI loader's source code repository
diff --git a/debian/changelog b/debian/changelog
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-swh-loader-pypy (0.0.1-1) unstable; urgency=low
+swh-loader-pypi (0.0.1-1) unstable; urgency=low
* Initial bootstrap
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,10 @@
Build-Depends: debhelper (>= 9),
dh-python (>= 2),
python3-all,
+ python3-arrow,
python3-nose,
+ python3-pkginfo,
+ python3-requests,
python3-setuptools,
python3-swh.core,
python3-swh.storage,
@@ -21,4 +24,4 @@
python3-swh.loader.core,
python3-swh.storage,
${misc:Depends}, ${python3:Depends}
-Description: Software Heritage PyPi Loader
+Description: Software Heritage PyPI Loader
diff --git a/debian/rules b/debian/rules
--- a/debian/rules
+++ b/debian/rules
@@ -9,3 +9,4 @@
override_dh_install:
dh_install
rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py
+ rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/loader/__init__.py
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
setuptools
vcversioner
+requests
+arrow
+pkginfo
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
setup(
name='swh.loader.pypi',
- description='Software Heritage PyPi Loader',
+ description='Software Heritage PyPI Loader',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/source/swh-loader-pypi.git',
@@ -35,6 +35,6 @@
install_requires=parse_requirements() + parse_requirements('swh'),
test_requires=parse_requirements('test'),
setup_requires=['vcversioner'],
- vcversioner={},
+ vcversioner={'version_module_paths': ['swh/loader/pypi/_version.py']},
include_package_data=True,
)
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
--- a/swh/loader/__init__.py
+++ b/swh/loader/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/loader/pypi/.gitignore b/swh/loader/pypi/.gitignore
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/.gitignore
@@ -0,0 +1 @@
+_version.py
diff --git a/swh/loader/pypi/client.py b/swh/loader/pypi/client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/client.py
@@ -0,0 +1,469 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import hashlib
+import logging
+import os
+import requests
+import shutil
+
+from .converters import info, author
+
+from pkginfo import UnpackedSDist
+
+from swh.core import tarball
+from swh.model import hashutil
+
+try:
+ from swh.loader.pypi._version import __version__
+except ImportError:
+ __version__ = 'devel'
+
+
+def convert_to_hex(d):
+ """Convert a flat dictionary with bytes in values to the same dictionary
+ with hex as values.
+
+ Args:
+ dict: flat dictionary with sha bytes in their values.
+
+ Returns:
+ Mirror dictionary with values as string hex.
+
+ """
+ if not d:
+ return d
+
+ checksums = {}
+ for key, h in d.items():
+ if isinstance(h, bytes):
+ checksums[key] = hashutil.hash_to_hex(h)
+ else:
+ checksums[key] = h
+
+ return checksums
+
+
+def _to_dict(pkginfo):
+ """Given a pkginfo parsed structure, convert it to a dict.
+
+ Args:
+ pkginfo (UnpackedSDist): The sdist parsed structure
+
+ Returns:
+ parsed structure as a dict
+
+ """
+ m = {}
+ for k in pkginfo:
+ m[k] = getattr(pkginfo, k)
+ return m
+
+
+def _project_pkginfo(dir_path):
+ """Given an uncompressed path holding the pkginfo file, returns a
+ pkginfo parsed structure as a dict.
+
+ The release artifact contains at their root one folder. For example:
+ $ tar tvf zprint-0.0.6.tar.gz
+ drwxr-xr-x root/root 0 2018-08-22 11:01 zprint-0.0.6/
+ ...
+
+ Args:
+
+ dir_path (str): Path to the uncompressed directory
+ representing a release artifact from pypi.
+
+ Returns:
+ the pkginfo parsed structure as a dict if any or None if
+ none was present.
+
+ """
+ # Retrieve the root folder of the archive
+ project_dirname = os.listdir(dir_path)[0]
+ pkginfo_path = os.path.join(dir_path, project_dirname, 'PKG-INFO')
+ if not os.path.exists(pkginfo_path):
+ return None
+ pkginfo = UnpackedSDist(pkginfo_path)
+ return _to_dict(pkginfo)
+
+
+class PyPIClient:
+ """PyPI client in charge of discussing with the pypi server.
+
+ Args:
+ base_url (str): PyPI instance's base url
+ temp_directory (str): Path to the temporary disk location used
+ for uncompressing the release artifacts
+
+ cache (bool): Use an internal cache to keep the archives on
+ disk. Default is not to use it.
+ cache_dir (str): cache's disk location (relevant only with
+ `cache` to True)
+
+ Those last 2 parameters are not for production use.
+
+ """
+ def __init__(self, base_url='https://pypi.org/pypi',
+ temp_directory=None, cache=False, cache_dir=None):
+ self.version = __version__
+ self.base_url = base_url
+ self.temp_directory = temp_directory
+
+ self.do_cache = cache
+ if self.do_cache:
+ self.cache_dir = cache_dir
+ self.cache_raw_dir = os.path.join(cache_dir, 'archives')
+ os.makedirs(self.cache_raw_dir, exist_ok=True)
+ self.session = requests.session()
+ self.params = {
+ 'headers': {
+ 'User-Agent': 'Software Heritage PyPI Loader (%s)' % (
+ __version__
+ )
+ }
+ }
+
+ def _save_response(self, response, project=None):
+ """Log the response from a server request to a cache dir.
+
+ Args:
+ response (Response): full server response
+ cache_dir (str): system path for cache dir
+
+ Returns:
+ nothing
+
+ """
+ import gzip
+ from json import dumps
+ datepath = arrow.utcnow().isoformat()
+ name = '%s.gz' % datepath if project is None else '%s-%s.gz' % (
+ project, datepath)
+ fname = os.path.join(self.cache_dir, name)
+ with gzip.open(fname, 'w') as f:
+ f.write(bytes(
+ dumps(response.json()),
+ 'utf-8'
+ ))
+
+ def _save_raw(self, filepath):
+ """In cache mode, backup the filepath to self.cache_raw_dir
+
+ Args:
+ filepath (str): Path of the file to save
+
+ """
+ _filename = os.path.basename(filepath)
+ _archive = os.path.join(self.cache_raw_dir, _filename)
+ shutil.copyfile(filepath, _archive)
+
+ def _get_raw(self, filepath):
+ """In cache mode, we try to retrieve the cached file.
+
+ """
+ _filename = os.path.basename(filepath)
+ _archive = os.path.join(self.cache_raw_dir, _filename)
+ if not os.path.exists(_archive):
+ return None
+ shutil.copyfile(_archive, filepath)
+ return filepath
+
+ def _get(self, url, project=None):
+ """Get query to the url.
+
+ Args:
+ url (str): Url
+
+ Raises:
+ ValueError in case of failing to query
+
+ Returns:
+ Response as dict if ok
+
+ """
+ response = self.session.get(url, **self.params)
+ if response.status_code != 200:
+ raise ValueError("Fail to query '%s'. Reason: %s" % (
+ url, response.status_code))
+
+ if self.do_cache:
+ self._save_response(response, project=project)
+
+ return response.json()
+
+ def info(self, project_url, project=None):
+ """Given a metadata project url, retrieve the raw json response
+
+ Args:
+ project_url (str): Project's pypi to retrieve information
+
+ Returns:
+ Main project information as dict.
+
+ """
+ return self._get(project_url, project=project)
+
+ def release(self, project, release):
+ """Given a project and a release name, retrieve the raw information
+ for said project's release.
+
+ Args:
+ project (str): Project's name
+ release (dict): Release information
+
+ Returns:
+ Release information as dict
+
+ """
+ release_url = '%s/%s/%s/json' % (self.base_url, project, release)
+ return self._get(release_url, project=project)
+
+ def prepare_release_artifacts(self, project, version, release_artifacts):
+ """For a given project's release version, fetch and prepare the
+ associated release artifacts.
+
+ Args:
+ project (str): PyPI Project
+ version (str): Release version
+ release_artifacts ([dict]): List of source distribution
+ release artifacts
+
+ Yields:
+ tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+ - artifact (dict): release artifact's associated info
+ - release (dict): release information
+ - filepath (str): Local artifact's path
+ - uncompressed_archive_path (str): uncompressed archive path
+ - pkginfo (dict): package information or None if none found
+
+ """
+ for artifact in release_artifacts:
+ release = {
+ 'name': version,
+ 'message': artifact.get('comment_text', ''),
+ }
+ artifact = {
+ 'sha256': artifact['digests']['sha256'],
+ 'size': artifact['size'],
+ 'filename': artifact['filename'],
+ 'url': artifact['url'],
+ 'date': artifact['upload_time'],
+ }
+ yield self.prepare_release_artifact(project, release, artifact)
+
+ def prepare_release_artifact(self, project, release, artifact):
+ """For a given release project, fetch and prepare the associated
+ artifact.
+
+ This:
+ - fetches the artifact
+ - checks the size, hashes match
+ - uncompress the artifact locally
+ - computes the swh hashes
+ - returns the associated information for the artifact
+
+ Args:
+ project (str): Project's name
+ release (dict): Release information
+ artifact (dict): Release artifact information
+
+ Returns:
+ tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+ - release (dict): Release information (name, message)
+ - artifact (dict): release artifact's information
+ - filepath (str): Local artifact's path
+ - uncompressed_archive_path (str): uncompressed archive path
+ - pkginfo (dict): package information or None if none found
+
+ """
+ version = release['name']
+ logging.debug('Release version: %s' % version)
+ path = os.path.join(self.temp_directory, project, version)
+ os.makedirs(path, exist_ok=True)
+ filepath = os.path.join(path, artifact['filename'])
+ logging.debug('Artifact local path: %s' % filepath)
+
+ _filepath = None
+ if self.do_cache:
+ _filepath = self._get_raw(filepath)
+
+ if not _filepath: # no cache hit, we fetch from pypi
+ url = artifact['url']
+ r = self.session.get(url, **self.params)
+ status = r.status_code
+ if status != 200:
+ if status == 404:
+ raise ValueError("Project '%s' not found" % url)
+ else:
+ msg = "Fail to query '%s'\nCode: %s\nDetails: %s" % (
+ url, r.status_code, r.content)
+ raise ValueError(msg)
+
+ _len = len(r.content)
+ if _len != artifact['size']:
+ raise ValueError('Error when checking size: %s != %s' % (
+ artifact['size'], _len))
+
+ # checking digest and writing
+ h = hashlib.sha256()
+ with open(filepath, 'wb') as f:
+ for chunk in r.iter_content():
+ h.update(chunk)
+ f.write(chunk)
+
+ actual_digest = h.hexdigest()
+ if actual_digest != artifact['sha256']:
+ raise ValueError(
+ '%s %s: Checksum mismatched: %s != %s' % (
+ project, version, artifact['sha256'], actual_digest))
+
+ if self.do_cache:
+ self._save_raw(filepath)
+
+ uncompress_path = os.path.join(path, 'uncompress')
+ os.makedirs(uncompress_path, exist_ok=True)
+
+ nature = tarball.uncompress(filepath, uncompress_path)
+
+ hashes = hashutil.hash_path(filepath)
+ hashes.pop('length') # 'size' entry is already referenced
+ artifact_hashes = convert_to_hex(hashes)
+ artifact['archive_type'] = nature
+ artifact.update(artifact_hashes)
+ pkginfo = _project_pkginfo(uncompress_path)
+ return release, artifact, filepath, uncompress_path, pkginfo
+
+
+class PyPIProject:
+ """PyPI project representation
+
+ This allows to extract information for a given project:
+ - either its latest information (from the latest release)
+ - either for a given release version
+ - uncompress associated fetched release artifacts
+
+ This also fetches and uncompresses the associated release
+ artifacts.
+
+ """
+ def __init__(self, client, project, project_metadata_url, data=None):
+ self.client = client
+ self.project = project
+ self.project_metadata_url = project_metadata_url
+ if data:
+ self.data = data
+ else:
+ self.data = client.info(project_metadata_url, project)
+
+ self.last_version = self.data['info']['version']
+ self.cache = {
+ self.last_version: self.data
+ }
+
+ def _data(self, release_name=None):
+ """Fetch data per release and cache it. Returns the cache retrieved
+ data if already fetched.
+
+ """
+ if release_name:
+ data = self.cache.get(release_name)
+ if not data:
+ data = self.client.release(self.project, release_name)
+ self.cache[release_name] = data
+ else:
+ data = self.data
+ return data
+
+ def info(self, release_name=None):
+ """Compute release information for provided release (or latest one).
+
+ """
+ return info(self._data(release_name))
+
+ def _filter_release_artifacts(self, version, releases, known_artifacts):
+ """Filter not already known sdist (source distribution) release.
+
+ There can be multiple 'package_type' (sdist, bdist_egg,
+ bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst, ...), we are
+ only interested in source distribution (sdist), others bdist*
+ are binary
+
+ Args:
+ version (str): Release name or version
+ releases (dict/[dict]): Full release object (or a list of)
+ known_artifacts ([tuple]): List of known releases (tuple filename,
+ sha256)
+
+ Yields:
+ an unknown release artifact
+
+ """
+ if not releases:
+ return []
+ if not isinstance(releases, list):
+ releases = [releases]
+ for artifact in releases:
+ name = artifact['filename']
+ sha256 = artifact['digests']['sha256']
+ if (name, sha256) in known_artifacts:
+ logging.debug('artifact (%s, %s) already seen for release %s, skipping' % ( # noqa
+ name, sha256, version))
+ continue
+ if artifact['packagetype'] != 'sdist':
+ continue
+ yield artifact
+
+ def _cleanup_release_artifacts(self, archive_path, directory_path):
+ """Clean intermediary files which no longer needs to be present.
+
+ """
+ if directory_path and os.path.exists(directory_path):
+ logging.debug('Clean up uncompressed archive path %s' % (
+ directory_path, ))
+ shutil.rmtree(directory_path)
+
+ if archive_path and os.path.exists(archive_path):
+ logging.debug('Clean up archive %s' % archive_path)
+ os.unlink(archive_path)
+
+ def releases(self, known_artifacts):
+ """Fetch metadata/data per release (if new release artifact detected)
+
+ For new release artifact, this:
+ - downloads and uncompresses the release artifacts.
+ - yields the (release info, author info, release, dir_path)
+ - Clean up the intermediary fetched artifact files
+
+ Args:
+ known_artifacts (tuple): artifact name, artifact sha256 hash
+
+ Yields:
+ tuple (version, release_info, release, uncompressed_path) where:
+
+ - project_info (dict): release's associated version info
+ - author (dict): Author information for the release
+ - artifact (dict): Release artifact information
+ - release (dict): release metadata
+ - uncompressed_path (str): Path to uncompressed artifact
+
+ """
+ releases_dict = self.data['releases']
+ for version, releases in releases_dict.items():
+ releases = self._filter_release_artifacts(
+ version, releases, known_artifacts)
+ releases = self.client.prepare_release_artifacts(
+ self.project, version, releases)
+ for release, artifact, archive, dir_path, pkginfo in releases:
+ if pkginfo is None: # fallback to pypi api metadata
+ msg = '%s %s: No PKG-INFO detected, skipping' % ( # noqa
+ self.project, version)
+ logging.warn(msg)
+ continue
+ yield pkginfo, author(pkginfo), release, artifact, dir_path
+ self._cleanup_release_artifacts(archive, dir_path)
diff --git a/swh/loader/pypi/converters.py b/swh/loader/pypi/converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/converters.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def info(data):
+ """Given a dict of a PyPI project information, returns a project
+ subset.
+
+ Args:
+ data (dict): Representing either artifact information or
+ release information.
+
+ Returns:
+ A dict subset of project information.
+
+ """
+ _info = data['info']
+ default = {
+ 'home_page': _info['home_page'],
+ 'description': _info['description'],
+ 'summary': _info['summary'],
+ 'license': _info['license'],
+ 'package_url': _info['package_url'],
+ 'project_url': _info['project_url'],
+ 'upstream': None,
+ }
+
+ project_urls = _info.get('project_urls')
+ if project_urls:
+ homepage = project_urls.get('Homepage')
+ if homepage:
+ default['upstream'] = homepage
+
+ return default
+
+
+def author(data):
+ """Given a dict of project/release artifact information (coming from
+ PyPI), returns an author subset.
+
+ Args:
+ data (dict): Representing either artifact information or
+ release information.
+
+ Returns:
+ swh-model dict representing a person.
+
+ """
+ name = data['author']
+ email = data['author_email']
+ if email:
+ fullname = '%s <%s>' % (name, email)
+ else:
+ fullname = name
+
+ if not fullname:
+ return {'fullname': b'', 'name': None, 'email': None}
+
+ if fullname:
+ fullname = fullname.encode('utf-8')
+
+ if name:
+ name = name.encode('utf-8')
+
+ if email:
+ email = email.encode('utf-8')
+
+ return {'fullname': fullname, 'name': name, 'email': email}
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/loader.py
@@ -0,0 +1,244 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import os
+import shutil
+
+from tempfile import mkdtemp
+
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import SWHLoader
+from swh.model.from_disk import Directory
+from swh.model.identifiers import (
+ revision_identifier, snapshot_identifier,
+ identifier_to_bytes, normalize_timestamp
+)
+
+from .client import PyPIClient, PyPIProject
+
+
+TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
+DEBUG_MODE = '** DEBUG MODE **'
+
+
+class PyPILoader(SWHLoader):
+ CONFIG_BASE_FILENAME = 'loader/pypi'
+ ADDITIONAL_CONFIG = {
+ 'temp_directory': ('str', '/tmp/swh.loader.pypi/'),
+ 'cache': ('bool', False),
+ 'cache_dir': ('str', ''),
+ 'debug': ('bool', False), # NOT FOR PRODUCTION
+ }
+
+ def __init__(self, client=None):
+ super().__init__(logging_class='swh.loader.pypi.PyPILoader')
+ self.origin_id = None
+ if not client:
+ temp_directory = self.config['temp_directory']
+ os.makedirs(temp_directory, exist_ok=True)
+ self.temp_directory = mkdtemp(
+ suffix='-%s' % os.getpid(),
+ prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+ dir=temp_directory)
+ self.pypi_client = PyPIClient(
+ temp_directory=self.temp_directory,
+ cache=self.config['cache'],
+ cache_dir=self.config['cache_dir'])
+ else:
+ self.temp_directory = client.temp_directory
+ self.pypi_client = client
+ self.debug = self.config['debug']
+ self.done = False
+
+ def pre_cleanup(self):
+ """To prevent disk explosion if some other workers exploded
+ in mid-air (OOM killed), we try and clean up dangling files.
+
+ """
+ if self.debug:
+ self.log.warn('%s Will not pre-clean up temp dir %s' % (
+ DEBUG_MODE, self.temp_directory
+ ))
+ return
+ clean_dangling_folders(self.config['temp_directory'],
+ pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+ log=self.log)
+
+ def cleanup(self):
+ """Clean up temporary disk use
+
+ """
+ if self.debug:
+ self.log.warn('%s Will not clean up temp dir %s' % (
+ DEBUG_MODE, self.temp_directory
+ ))
+ return
+ if os.path.exists(self.temp_directory):
+ self.log.debug('Clean up %s' % self.temp_directory)
+ shutil.rmtree(self.temp_directory)
+
+ def prepare_origin_visit(self, project_name, origin_url,
+ origin_metadata_url=None):
+ """Prepare the origin visit information
+
+ Args:
+ project_name (str): Project's simple name
+ origin_url (str): Project's main url
+ origin_metadata_url (str): Project's metadata url
+
+ """
+ self.origin = {
+ 'url': origin_url,
+ 'type': 'pypi',
+ }
+ self.visit_date = None # loader core will populate it
+
+ def _known_artifacts(self, last_snapshot):
+ """Retrieve the known releases/artifact for the origin_id.
+
+ Args
+ snapshot (dict): Last snapshot for the visit
+
+ Returns:
+ tuple artifact's filename, artifact's sha256
+
+ """
+ revs = [rev['target'] for rev in last_snapshot['branches'].values()]
+ known_revisions = self.storage.revision_get(revs)
+ for revision in known_revisions:
+ artifact = revision['metadata']['original_artifact']
+ yield artifact['filename'], artifact['sha256']
+
+ def _last_snapshot(self):
+ """Retrieve the last snapshot
+
+ """
+ return self.storage.snapshot_get_latest(self.origin_id)
+
+ def prepare(self, project_name, origin_url,
+ origin_metadata_url=None):
+ """Keep reference to the origin url (project) and the
+ project metadata url
+
+ Args:
+ project_name (str): Project's simple name
+ origin_url (str): Project's main url
+ origin_metadata_url (str): Project's metadata url
+
+ """
+ self.project_name = project_name
+ self.origin_url = origin_url
+ self.origin_metadata_url = origin_metadata_url
+ self.project = PyPIProject(self.pypi_client, self.project_name,
+ self.origin_metadata_url)
+ self._prepare_state()
+
+ def _prepare_state(self):
+ """Initialize internal state (snapshot, contents, directories, etc...)
+
+ This is called from `prepare` method.
+
+ """
+ last_snapshot = self._last_snapshot()
+ if last_snapshot:
+ self._snapshot = last_snapshot.copy()
+ known_artifacts = self._known_artifacts(self._snapshot)
+ else:
+ self._snapshot = {
+ 'branches': {}
+ }
+ known_artifacts = []
+ # and the artifacts
+ # that will be the source of data to retrieve
+ self.release_artifacts = self.project.releases(known_artifacts)
+ # temporary state
+ self._contents = []
+ self._directories = []
+ self._revisions = []
+
+ def fetch_data(self):
+ """Called once per release artifact version (can be many for one
+ release).
+
+ This will for each call:
+ - retrieve a release artifact (associated to a release version)
+ - Uncompress it and compute the necessary information
+ - Computes the swh objects
+
+ Returns:
+ True as long as data to fetch exist
+
+ """
+ data = None
+ if self.done:
+ return False
+
+ try:
+ data = next(self.release_artifacts)
+ except StopIteration:
+ self.done = True
+ return False
+
+ project_info, author, release, artifact, dir_path = data
+ dir_path = dir_path.encode('utf-8')
+ directory = Directory.from_disk(path=dir_path, data=True)
+ _objects = directory.collect()
+
+ self._contents = _objects['content'].values()
+ self._directories = _objects['directory'].values()
+ date = normalize_timestamp(
+ int(arrow.get(artifact['date']).timestamp))
+
+ name = release['name'].encode('utf-8')
+ message = release['message'].encode('utf-8')
+ if message:
+ message = b'%s: %s' % (name, message)
+ else:
+ message = name
+
+ _revision = {
+ 'synthetic': True,
+ 'metadata': {
+ 'original_artifact': artifact,
+ 'project': project_info,
+ },
+ 'author': author,
+ 'date': date,
+ 'committer': author,
+ 'committer_date': date,
+ 'message': message,
+ 'directory': directory.hash,
+ 'parents': [],
+ 'type': 'tar',
+ }
+ _revision['id'] = identifier_to_bytes(
+ revision_identifier(_revision))
+ self._revisions.append(_revision)
+
+ branch_name = artifact['filename'].encode('utf-8')
+ self._snapshot['branches'][branch_name] = {
+ 'target': _revision['id'],
+ 'target_type': 'revision',
+ }
+
+ return not self.done
+
+ def generate_and_load_snapshot(self):
+ self._snapshot['id'] = identifier_to_bytes(
+ snapshot_identifier(self._snapshot))
+ self.maybe_load_snapshot(self._snapshot)
+
+ def store_data(self):
+ """(override) This sends collected objects to storage.
+
+ """
+ self.maybe_load_contents(self._contents)
+ self.maybe_load_directories(self._directories)
+ self.maybe_load_revisions(self._revisions)
+
+ if self.done:
+ self.generate_and_load_snapshot()
+ self.flush()
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/model.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import logging
+import shutil
+
+from .converters import info, author
diff --git a/swh/loader/pypi/tasks.py b/swh/loader/pypi/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.task import Task
+
+from .loader import PyPILoader
+
+
+class LoadPyPITsk(Task):
+ task_queue = 'swh_loader_pypi'
+
+ def run_task(self, project_name, project_url, project_metadata_url=None):
+ loader = PyPILoader()
+ loader.log = self.log
+ return loader.load(project_name,
+ project_url,
+ origin_metadata_url=project_metadata_url)
diff --git a/swh/loader/__init__.py b/swh/loader/pypi/tests/__init__.py
copy from swh/loader/__init__.py
copy to swh/loader/pypi/tests/__init__.py
diff --git a/swh/loader/pypi/tests/common.py b/swh/loader/pypi/tests/common.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/common.py
@@ -0,0 +1,151 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import os
+import tempfile
+
+from nose.plugins.attrib import attr
+from unittest import TestCase
+
+from swh.loader.pypi.client import PyPIClient, PyPIProject
+
+
+RESOURCES_PATH = './swh/loader/pypi/tests/resources'
+
+
+class PyPIClientWithCache(PyPIClient):
+ """Force the use of the cache to bypass pypi calls
+
+ """
+ def __init__(self, temp_directory, cache_dir):
+ super().__init__(temp_directory=temp_directory,
+ cache=True, cache_dir=cache_dir)
+
+
+class LoaderNoStorage:
+ """Mixin class to inhibit the persistence (storage calls) and keep in
+ memory the data sent.
+
+ """
+ CONFIG_BASE_FILENAME = '' # do not provide a real path
+ ADDITIONAL_CONFIG = {
+ 'storage': ('dict', {
+ 'cls': 'remote',
+ 'args': {
+ 'url': 'http://nowhere:5002/', # do not provide a real storage
+ }
+ }),
+
+ # do not send any data to the storage
+ 'send_contents': ('bool', False),
+ 'send_directories': ('bool', False),
+ 'send_revisions': ('bool', False),
+ 'send_releases': ('bool', False),
+ 'send_snapshot': ('bool', False),
+ 'debug': ('bool', False),
+ }
+
+ def __init__(self, client=None):
+ super().__init__(client=client)
+ self.all_contents = []
+ self.all_directories = []
+ self.all_revisions = []
+ self.all_releases = []
+ self.all_snapshots = []
+
+ # typed data
+ self.objects = {
+ 'content': self.all_contents,
+ 'directory': self.all_directories,
+ 'revision': self.all_revisions,
+ 'release': self.all_releases,
+ 'snapshot': self.all_snapshots
+ }
+
+ def _add(self, type, l):
+ """Add without duplicates and keeping the insertion order.
+
+ Args:
+ type (str): Type of objects concerned by the action
+ l ([object]): List of 'type' object
+
+ """
+ col = self.objects[type]
+ for o in l:
+ if o in col:
+ continue
+ col.extend([o])
+
+ def maybe_load_contents(self, all_contents):
+ self._add('content', all_contents)
+
+ def maybe_load_directories(self, all_directories):
+ self._add('directory', all_directories)
+
+ def maybe_load_revisions(self, all_revisions):
+ self._add('revision', all_revisions)
+
+ def maybe_load_releases(self, releases):
+ raise ValueError('If called, the test must break.')
+
+ def maybe_load_snapshot(self, snapshot):
+ self.objects['snapshot'].append(snapshot)
+
+ def _store_origin_visit(self):
+ pass
+
+ def open_fetch_history(self):
+ pass
+
+ def close_fetch_history_success(self, fetch_history_id):
+ pass
+
+ def close_fetch_history_failure(self, fetch_history_id):
+ pass
+
+ def update_origin_visit(self, origin_id, visit, status):
+ pass
+
+ # Override to do nothing at the end
+ def close_failure(self):
+ pass
+
+ def close_success(self):
+ pass
+
+ def pre_cleanup(self):
+ pass
+
+
+@attr('fs')
+class WithProjectTest(TestCase):
+ def setUp(self):
+ project = '0805nexter'
+ project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project)
+
+ with open(project_metadata_file) as f:
+ data = json.load(f)
+
+ temp_dir = tempfile.mkdtemp(
+ dir='/tmp/', prefix='swh.loader.pypi.tests-')
+ project_metadata_url = 'https://pypi.org/pypi/%s/json' % project
+ # Will use the pypi with cache
+ client = PyPIClientWithCache(
+ temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+ self.project = PyPIProject(
+ client=client,
+ project=project,
+ project_metadata_url=project_metadata_url,
+ data=data)
+
+ self.data = data
+ self.temp_dir = temp_dir
+ self.project_name = project
+
+ def tearDown(self):
+ if os.path.exists(self.temp_dir):
+ shutil.rmtree(self.temp_dir)
diff --git a/swh/loader/pypi/tests/resources/0805nexter.json b/swh/loader/pypi/tests/resources/0805nexter.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/resources/0805nexter.json
@@ -0,0 +1 @@
+{"info":{"author":"hgtkpython","author_email":"2868989685@qq.com","bugtrack_url":null,"classifiers":[],"description":"UNKNOWN","description_content_type":null,"docs_url":null,"download_url":"UNKNOWN","downloads":{"last_day":-1,"last_month":-1,"last_week":-1},"home_page":"http://www.hp.com","keywords":null,"license":"UNKNOWN","maintainer":null,"maintainer_email":null,"name":"0805nexter","package_url":"https://pypi.org/project/0805nexter/","platform":"UNKNOWN","project_url":"https://pypi.org/project/0805nexter/","project_urls":{"Download":"UNKNOWN","Homepage":"http://www.hp.com"},"release_url":"https://pypi.org/project/0805nexter/1.2.0/","requires_dist":null,"requires_python":null,"summary":"a simple printer of nested lest","version":"1.2.0"},"last_serial":1931736,"releases":{"1.1.0":[{"comment_text":"","digests":{"md5":"07fc93fc12821c1405c3483db88154af","sha256":"52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035"},"downloads":-1,"filename":"0805nexter-1.1.0.zip","has_sig":false,"md5_digest":"07fc93fc12821c1405c3483db88154af","packagetype":"sdist","python_version":"source","requires_python":null,"size":862,"upload_time":"2016-01-31T05:28:42","url":"https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip"}],"1.2.0":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]},"urls":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]}
\ No newline at end of file
diff --git a/swh/loader/pypi/tests/test_client.py b/swh/loader/pypi/tests/test_client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_client.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+
+from nose.tools import istest
+
+from swh.loader.pypi import converters
+from swh.loader.pypi.client import _project_pkginfo
+
+from .common import WithProjectTest
+
+
+class PyPIProjectTest(WithProjectTest):
+ @istest
+ def releases(self):
+ actual_releases = self.project.releases([])
+
+ expected_release_artifacts = {
+ '1.1.0': {
+ 'archive_type': 'zip',
+ 'blake2s256': 'df9413bde66e6133b10cadefad6fcf9cbbc369b47831089112c846d79f14985a', # noqa
+ 'date': '2016-01-31T05:28:42',
+ 'filename': '0805nexter-1.1.0.zip',
+ 'sha1': '127d8697db916ba1c67084052196a83319a25000',
+ 'sha1_git': '4b8f1350e6d9fa00256e974ae24c09543d85b196',
+ 'sha256': '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035', # noqa
+ 'size': 862,
+ 'url': 'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
+ },
+ '1.2.0': {
+ 'archive_type': 'zip',
+ 'blake2s256': '67010586b5b9a4aaa3b1c386f9dc8b4c99e6e40f37732a717a5f9b9b1185e588', # noqa
+ 'date': '2016-01-31T05:51:25',
+ 'filename': '0805nexter-1.2.0.zip',
+ 'sha1': 'd55238554b94da7c5bf4a349ece0fe3b2b19f79c',
+ 'sha1_git': '8638d33a96cb25d8319af21417f00045ec6ee810',
+ 'sha256': '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709', # noqa
+ 'size': 898,
+ 'url': 'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa
+ }
+ }
+
+ expected_releases = {
+ '1.1.0': {
+ 'name': '1.1.0',
+ 'message': '',
+ },
+ '1.2.0': {
+ 'name': '1.2.0',
+ 'message': '',
+ },
+ }
+
+ dir_paths = []
+ for pkginfo, author, release, artifact, dir_path in actual_releases:
+ version = pkginfo['version']
+ expected_pkginfo = _project_pkginfo(dir_path)
+ self.assertEquals(pkginfo, expected_pkginfo)
+ expected_author = converters.author(expected_pkginfo)
+ self.assertEqual(author, expected_author)
+ expected_artifact = expected_release_artifacts[version]
+ self.assertEqual(artifact, expected_artifact)
+ expected_release = expected_releases[version]
+ self.assertEqual(release, expected_release)
+
+ self.assertTrue(version in dir_path)
+ self.assertTrue(self.project_name in dir_path)
+ # path still exists
+ self.assertTrue(os.path.exists(dir_path))
+ dir_paths.append(dir_path)
+
+ # Ensure uncompressed paths have been destroyed
+ for dir_path in dir_paths:
+ # path no longer exists
+ self.assertFalse(os.path.exists(dir_path))
diff --git a/swh/loader/pypi/tests/test_converters.py b/swh/loader/pypi/tests/test_converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_converters.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest import TestCase
+from nose.tools import istest
+
+from swh.loader.pypi.converters import author, info
+
+from .common import WithProjectTest
+
+
+class Test(WithProjectTest):
+ @istest
+ def info(self):
+ actual_info = self.project.info()
+
+ expected_info = {
+ 'home_page': self.data['info']['home_page'],
+ 'description': self.data['info']['description'],
+ 'summary': self.data['info']['summary'],
+ 'license': self.data['info']['license'],
+ 'package_url': self.data['info']['package_url'],
+ 'project_url': self.data['info']['project_url'],
+ 'upstream': self.data['info']['project_urls']['Homepage'],
+ }
+
+ self.assertEqual(expected_info, actual_info)
+
+ @istest
+ def author(self):
+ info = self.data['info']
+ actual_author = author(info)
+
+ name = info['author'].encode('utf-8')
+ email = info['author_email'].encode('utf-8')
+ expected_author = {
+ 'fullname': b'%s <%s>' % (name, email),
+ 'name': name,
+ 'email': email,
+ }
+
+ self.assertEqual(expected_author, actual_author)
+
+
+class ParseAuthorTest(TestCase):
+ @istest
+ def author_basic(self):
+ data = {
+ 'author': "i-am-groot",
+ 'author_email': 'iam@groot.org',
+ }
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'i-am-groot <iam@groot.org>',
+ 'name': b'i-am-groot',
+ 'email': b'iam@groot.org',
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed(self):
+ data = {
+ 'author': "['pierre', 'paul', 'jacques']",
+ 'author_email': None,
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b"['pierre', 'paul', 'jacques']",
+ 'name': b"['pierre', 'paul', 'jacques']",
+ 'email': None,
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed_2(self):
+ data = {
+ 'author': '[marie, jeanne]',
+ 'author_email': '[marie@some, jeanne@thing]',
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
+ 'name': b'[marie, jeanne]',
+ 'email': b'[marie@some, jeanne@thing]',
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed_3(self):
+ data = {
+ 'author': '[marie, jeanne, pierre]',
+ 'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
+ 'name': b'[marie, jeanne, pierre]',
+ 'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
+ }
+
+ self.assertEquals(actual_author, expected_author)
diff --git a/swh/loader/pypi/tests/test_loader.py b/swh/loader/pypi/tests/test_loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_loader.py
@@ -0,0 +1,259 @@
+# Copyright (C) 2016-2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import tempfile
+
+from nose.plugins.attrib import attr
+from nose.tools import istest
+from unittest import TestCase
+
+from swh.model import hashutil
+
+from swh.loader.pypi.client import PyPIProject
+from swh.loader.pypi.loader import PyPILoader
+from .common import PyPIClientWithCache, RESOURCES_PATH, LoaderNoStorage
+
+
+class TestPyPILoader(LoaderNoStorage, PyPILoader):
+ """Real PyPILoader for test purposes (storage and pypi interactions
+ inhibited)
+
+ """
+ def __init__(self, project_name):
+ project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project_name)
+ project_metadata_url = 'https://pypi.org/pypi/%s/json' % project_name
+ with open(project_metadata_file) as f:
+ data = json.load(f)
+
+ temp_dir = tempfile.mkdtemp(
+ dir='/tmp/', prefix='swh.loader.pypi.tests-')
+ # Will use the pypi with cache
+ client = PyPIClientWithCache(
+ temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+ super().__init__(client=client)
+ self.project = PyPIProject(
+ client=client,
+ project=project_name,
+ project_metadata_url=project_metadata_url,
+ data=data)
+
+ def prepare(self, project_name, origin_url,
+ origin_metadata_url=None):
+ self.project_name = project_name
+ self.origin_url = origin_url
+ self.origin_metadata_url = origin_metadata_url
+ self.visit = 1 # first visit
+ self._prepare_state()
+
+
+@attr('fs')
+class BaseLoaderITest(TestCase):
+ """Loader Test Mixin to prepare the pypi to 'load' in a test context.
+
+ In this setup, the loader uses the cache to load data so no
+ network interaction (no storage, no pypi).
+
+ """
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ self.tmp_root_path = tempfile.mkdtemp()
+ self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+ self._project = project_name
+ self._origin_url = '%s/pypi/%s/' % (dummy_pypi_instance, project_name)
+ self._project_metadata_url = '%s/pypi/%s/json' % (
+ dummy_pypi_instance, project_name)
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp_root_path)
+
+ def assertContentsOk(self, expected_contents):
+ contents = self.loader.all_contents
+ self.assertEquals(len(contents), len(expected_contents))
+
+ for content in contents:
+ content_id = hashutil.hash_to_hex(content['sha1'])
+ self.assertIn(content_id, expected_contents)
+
+ def assertDirectoriesOk(self, expected_directories):
+ directories = self.loader.all_directories
+ self.assertEquals(len(directories), len(expected_directories))
+
+ for _dir in directories:
+ _dir_id = hashutil.hash_to_hex(_dir['id'])
+ self.assertIn(_dir_id, expected_directories)
+
+ def assertSnapshotOk(self, expected_snapshot, expected_revisions):
+ snapshots = self.loader.all_snapshots
+ self.assertEqual(len(snapshots), 1)
+
+ snap = snapshots[0]
+ snap_id = hashutil.hash_to_hex(snap['id'])
+ self.assertEqual(snap_id, expected_snapshot)
+
+ branches = snap['branches']
+ self.assertEqual(len(expected_revisions), len(branches))
+
+ for branch, target in branches.items():
+ rev_id = hashutil.hash_to_hex(target['target'])
+ self.assertIn(rev_id, expected_revisions)
+ self.assertEqual('revision', target['target_type'])
+
+ def assertRevisionsOk(self, expected_revisions): # noqa: N802
+ """Check the loader's revisions match the expected revisions.
+
+ Expects self.loader to be instantiated and ready to be
+ inspected (meaning the loading took place).
+
+ Args:
+ expected_revisions (dict): Dict with key revision id,
+ value the targeted directory id.
+
+ """
+ # The last revision being the one used later to start back from
+ for rev in self.loader.all_revisions:
+ rev_id = hashutil.hash_to_hex(rev['id'])
+ directory_id = hashutil.hash_to_hex(rev['directory'])
+
+ self.assertEquals(expected_revisions[rev_id], directory_id)
+
+
+# Define loaders with no storage
+# They'll just accumulate the data in place
+# Only for testing purposes.
+
+
+class PyPILoaderNoSnapshot(TestPyPILoader):
+ """Same as TestPyPILoader with no prior snapshot seen
+
+ """
+ def _last_snapshot(self):
+ return None
+
+
+class LoaderITest(BaseLoaderITest):
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ super().setUp(project_name, dummy_pypi_instance)
+ self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+
+ @istest
+ def load(self):
+ """Load a pypi origin
+
+ """
+ # when
+ self.loader.load(
+ self._project, self._origin_url, self._project_metadata_url)
+
+ # then
+ self.assertEquals(len(self.loader.all_contents), 6,
+ '3 contents per release artifact files (2)')
+ self.assertEquals(len(self.loader.all_directories), 4)
+ self.assertEquals(len(self.loader.all_revisions), 2,
+ '2 releases so 2 revisions should be created')
+ self.assertEquals(len(self.loader.all_releases), 0,
+ 'No release is created in the pypi loader')
+ self.assertEquals(len(self.loader.all_snapshots), 1,
+ 'Only 1 snapshot targetting all revisions')
+
+ expected_contents = [
+ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
+ '938c33483285fd8ad57f15497f538320df82aeb8',
+ 'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
+ '405859113963cb7a797642b45f171d6360425d16',
+ 'e5686aa568fdb1d19d7f1329267082fe40482d31',
+ '83ecf6ec1114fd260ca7a833a2d165e71258c338',
+ ]
+
+ self.assertContentsOk(expected_contents)
+
+ expected_directories = [
+ '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
+ 'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
+ 'b178b66bd22383d5f16f4f5c923d39ca798861b4',
+ 'c3a58f8b57433a4b56caaa5033ae2e0931405338',
+ ]
+ self.assertDirectoriesOk(expected_directories)
+
+ # {revision hash: directory hash}
+ expected_revisions = {
+ '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa
+ 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa
+ }
+ self.assertRevisionsOk(expected_revisions)
+
+ self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+ expected_revisions)
+
+
+class PyPILoaderWithSnapshot(TestPyPILoader):
+ """Same as TestPyPILoader with no prior snapshot seen
+
+ """
+ def _last_snapshot(self):
+ return {
+ 'id': b'\xf4V\xb0>\x8b\xf1\x92\rd\xb0\r\xf24\xb1\xef\xed\xc2[l\x93', # noqa
+ 'branches': {
+ b'0805nexter-1.1.0.zip': {
+ 'target': b'L\x99\x89\x1f\x93\xb8\x14P'
+ b'8Ww#Z7\xb5\xe9f\xdd\x15q',
+ 'target_type': 'revision'
+ },
+ b'0805nexter-1.2.0.zip': {
+ 'target': b'\xe4E\xdaM\xa2+1\xbf'
+ b'\xeb\xb6\xff\xc48=\xbf\x83'
+ b'\x9a\x07M!',
+ 'target_type': 'revision'
+ },
+ },
+ }
+
+ def _known_artifacts(self, last_snapshot):
+ yield from [
+ (
+ '0805nexter-1.1.0.zip',
+ '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035' # noqa
+ ),
+ (
+ '0805nexter-1.2.0.zip',
+ '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709' # noqa
+ )
+ ]
+
+
+class LoaderWithOriginAlreadySeenITest(BaseLoaderITest):
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ super().setUp(project_name, dummy_pypi_instance)
+ self.loader = PyPILoaderWithSnapshot(project_name=project_name)
+
+ @istest
+ def load(self):
+ """Load a pypi origin already injected will result with only 1 snapshot
+
+ """
+ # when
+ self.loader.load(
+ self._project, self._origin_url, self._project_metadata_url)
+
+ # then
+ self.assertEquals(len(self.loader.all_contents), 0)
+ self.assertEquals(len(self.loader.all_directories), 0)
+ self.assertEquals(len(self.loader.all_revisions), 0)
+ self.assertEquals(len(self.loader.all_releases), 0)
+ self.assertEquals(len(self.loader.all_snapshots), 1)
+
+ self.assertContentsOk([])
+ self.assertDirectoriesOk([])
+ self.assertRevisionsOk(expected_revisions={})
+
+ expected_revisions = {
+ '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa
+ 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa
+ }
+ self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+ expected_revisions)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Nov 5 2024, 7:00 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227789
Attached To
D408: Bootstrap pypi loader
Event Timeline
Log In to Comment