Page MenuHomeSoftware Heritage

D408.id1272.diff
No OneTemporary

D408.id1272.diff

diff --git a/README.md b/README.md
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
swh-loader-pypi
====================
-SWH PyPi loader's source code repository
+SWH PyPI loader's source code repository
diff --git a/debian/changelog b/debian/changelog
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,4 +1,4 @@
-swh-loader-pypy (0.0.1-1) unstable; urgency=low
+swh-loader-pypi (0.0.1-1) unstable; urgency=low
* Initial bootstrap
diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -5,7 +5,10 @@
Build-Depends: debhelper (>= 9),
dh-python (>= 2),
python3-all,
+ python3-arrow,
python3-nose,
+ python3-pkginfo,
+ python3-requests,
python3-setuptools,
python3-swh.core,
python3-swh.storage,
@@ -21,4 +24,4 @@
python3-swh.loader.core,
python3-swh.storage,
${misc:Depends}, ${python3:Depends}
-Description: Software Heritage PyPi Loader
+Description: Software Heritage PyPI Loader
diff --git a/debian/rules b/debian/rules
--- a/debian/rules
+++ b/debian/rules
@@ -9,3 +9,4 @@
override_dh_install:
dh_install
rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/__init__.py
+ rm -v $(CURDIR)/debian/python3-*/usr/lib/python*/dist-packages/swh/loader/__init__.py
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,5 @@
setuptools
vcversioner
+requests
+arrow
+pkginfo
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
setup(
name='swh.loader.pypi',
- description='Software Heritage PyPi Loader',
+ description='Software Heritage PyPI Loader',
author='Software Heritage developers',
author_email='swh-devel@inria.fr',
url='https://forge.softwareheritage.org/source/swh-loader-pypi.git',
@@ -35,6 +35,6 @@
install_requires=parse_requirements() + parse_requirements('swh'),
test_requires=parse_requirements('test'),
setup_requires=['vcversioner'],
- vcversioner={},
+ vcversioner={'version_module_paths': ['swh/loader/pypi/_version.py']},
include_package_data=True,
)
diff --git a/swh/loader/__init__.py b/swh/loader/__init__.py
--- a/swh/loader/__init__.py
+++ b/swh/loader/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/loader/pypi/.gitignore b/swh/loader/pypi/.gitignore
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/.gitignore
@@ -0,0 +1 @@
+_version.py
diff --git a/swh/loader/pypi/client.py b/swh/loader/pypi/client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/client.py
@@ -0,0 +1,469 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import hashlib
+import logging
+import os
+import requests
+import shutil
+
+from .converters import info, author
+
+from pkginfo import UnpackedSDist
+
+from swh.core import tarball
+from swh.model import hashutil
+
+try:
+ from swh.loader.pypi._version import __version__
+except ImportError:
+ __version__ = 'devel'
+
+
+def convert_to_hex(d):
+ """Convert a flat dictionary with bytes in values to the same dictionary
+ with hex as values.
+
+ Args:
+ dict: flat dictionary with sha bytes in their values.
+
+ Returns:
+ Mirror dictionary with values as string hex.
+
+ """
+ if not d:
+ return d
+
+ checksums = {}
+ for key, h in d.items():
+ if isinstance(h, bytes):
+ checksums[key] = hashutil.hash_to_hex(h)
+ else:
+ checksums[key] = h
+
+ return checksums
+
+
+def _to_dict(pkginfo):
+ """Given a pkginfo parsed structure, convert it to a dict.
+
+ Args:
+ pkginfo (UnpackedSDist): The sdist parsed structure
+
+ Returns:
+ parsed structure as a dict
+
+ """
+ m = {}
+ for k in pkginfo:
+ m[k] = getattr(pkginfo, k)
+ return m
+
+
+def _project_pkginfo(dir_path):
+ """Given an uncompressed path holding the pkginfo file, returns a
+ pkginfo parsed structure as a dict.
+
+ The release artifact contains at their root one folder. For example:
+ $ tar tvf zprint-0.0.6.tar.gz
+ drwxr-xr-x root/root 0 2018-08-22 11:01 zprint-0.0.6/
+ ...
+
+ Args:
+
+ dir_path (str): Path to the uncompressed directory
+ representing a release artifact from pypi.
+
+ Returns:
+ the pkginfo parsed structure as a dict if any or None if
+ none was present.
+
+ """
+ # Retrieve the root folder of the archive
+ project_dirname = os.listdir(dir_path)[0]
+ pkginfo_path = os.path.join(dir_path, project_dirname, 'PKG-INFO')
+ if not os.path.exists(pkginfo_path):
+ return None
+ pkginfo = UnpackedSDist(pkginfo_path)
+ return _to_dict(pkginfo)
+
+
+class PyPIClient:
+ """PyPI client in charge of discussing with the pypi server.
+
+ Args:
+ base_url (str): PyPI instance's base url
+ temp_directory (str): Path to the temporary disk location used
+ for uncompressing the release artifacts
+
+ cache (bool): Use an internal cache to keep the archives on
+ disk. Default is not to use it.
+ cache_dir (str): cache's disk location (relevant only with
+ `cache` to True)
+
+ Those last 2 parameters are not for production use.
+
+ """
+ def __init__(self, base_url='https://pypi.org/pypi',
+ temp_directory=None, cache=False, cache_dir=None):
+ self.version = __version__
+ self.base_url = base_url
+ self.temp_directory = temp_directory
+
+ self.do_cache = cache
+ if self.do_cache:
+ self.cache_dir = cache_dir
+ self.cache_raw_dir = os.path.join(cache_dir, 'archives')
+ os.makedirs(self.cache_raw_dir, exist_ok=True)
+ self.session = requests.session()
+ self.params = {
+ 'headers': {
+ 'User-Agent': 'Software Heritage PyPI Loader (%s)' % (
+ __version__
+ )
+ }
+ }
+
+ def _save_response(self, response, project=None):
+ """Log the response from a server request to a cache dir.
+
+ Args:
+ response (Response): full server response
+ cache_dir (str): system path for cache dir
+
+ Returns:
+ nothing
+
+ """
+ import gzip
+ from json import dumps
+ datepath = arrow.utcnow().isoformat()
+ name = '%s.gz' % datepath if project is None else '%s-%s.gz' % (
+ project, datepath)
+ fname = os.path.join(self.cache_dir, name)
+ with gzip.open(fname, 'w') as f:
+ f.write(bytes(
+ dumps(response.json()),
+ 'utf-8'
+ ))
+
+ def _save_raw(self, filepath):
+ """In cache mode, backup the filepath to self.cache_raw_dir
+
+ Args:
+ filepath (str): Path of the file to save
+
+ """
+ _filename = os.path.basename(filepath)
+ _archive = os.path.join(self.cache_raw_dir, _filename)
+ shutil.copyfile(filepath, _archive)
+
+ def _get_raw(self, filepath):
+ """In cache mode, we try to retrieve the cached file.
+
+ """
+ _filename = os.path.basename(filepath)
+ _archive = os.path.join(self.cache_raw_dir, _filename)
+ if not os.path.exists(_archive):
+ return None
+ shutil.copyfile(_archive, filepath)
+ return filepath
+
+ def _get(self, url, project=None):
+ """Get query to the url.
+
+ Args:
+ url (str): Url
+
+ Raises:
+ ValueError in case of failing to query
+
+ Returns:
+ Response as dict if ok
+
+ """
+ response = self.session.get(url, **self.params)
+ if response.status_code != 200:
+ raise ValueError("Fail to query '%s'. Reason: %s" % (
+ url, response.status_code))
+
+ if self.do_cache:
+ self._save_response(response, project=project)
+
+ return response.json()
+
+ def info(self, project_url, project=None):
+ """Given a metadata project url, retrieve the raw json response
+
+ Args:
+ project_url (str): Project's pypi to retrieve information
+
+ Returns:
+ Main project information as dict.
+
+ """
+ return self._get(project_url, project=project)
+
+ def release(self, project, release):
+ """Given a project and a release name, retrieve the raw information
+ for said project's release.
+
+ Args:
+ project (str): Project's name
+ release (dict): Release information
+
+ Returns:
+ Release information as dict
+
+ """
+ release_url = '%s/%s/%s/json' % (self.base_url, project, release)
+ return self._get(release_url, project=project)
+
+ def prepare_release_artifacts(self, project, version, release_artifacts):
+ """For a given project's release version, fetch and prepare the
+ associated release artifacts.
+
+ Args:
+ project (str): PyPI Project
+ version (str): Release version
+ release_artifacts ([dict]): List of source distribution
+ release artifacts
+
+ Yields:
+ tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+ - artifact (dict): release artifact's associated info
+ - release (dict): release information
+ - filepath (str): Local artifact's path
+ - uncompressed_archive_path (str): uncompressed archive path
+ - pkginfo (dict): package information or None if none found
+
+ """
+ for artifact in release_artifacts:
+ release = {
+ 'name': version,
+ 'message': artifact.get('comment_text', ''),
+ }
+ artifact = {
+ 'sha256': artifact['digests']['sha256'],
+ 'size': artifact['size'],
+ 'filename': artifact['filename'],
+ 'url': artifact['url'],
+ 'date': artifact['upload_time'],
+ }
+ yield self.prepare_release_artifact(project, release, artifact)
+
+ def prepare_release_artifact(self, project, release, artifact):
+ """For a given release project, fetch and prepare the associated
+ artifact.
+
+ This:
+ - fetches the artifact
+ - checks the size, hashes match
+ - uncompress the artifact locally
+ - computes the swh hashes
+ - returns the associated information for the artifact
+
+ Args:
+ project (str): Project's name
+ release (dict): Release information
+ artifact (dict): Release artifact information
+
+ Returns:
+ tuple (artifact, filepath, uncompressed_path, pkginfo) where:
+
+ - release (dict): Release information (name, message)
+ - artifact (dict): release artifact's information
+ - filepath (str): Local artifact's path
+ - uncompressed_archive_path (str): uncompressed archive path
+ - pkginfo (dict): package information or None if none found
+
+ """
+ version = release['name']
+ logging.debug('Release version: %s' % version)
+ path = os.path.join(self.temp_directory, project, version)
+ os.makedirs(path, exist_ok=True)
+ filepath = os.path.join(path, artifact['filename'])
+ logging.debug('Artifact local path: %s' % filepath)
+
+ _filepath = None
+ if self.do_cache:
+ _filepath = self._get_raw(filepath)
+
+ if not _filepath: # no cache hit, we fetch from pypi
+ url = artifact['url']
+ r = self.session.get(url, **self.params)
+ status = r.status_code
+ if status != 200:
+ if status == 404:
+ raise ValueError("Project '%s' not found" % url)
+ else:
+ msg = "Fail to query '%s'\nCode: %s\nDetails: %s" % (
+ url, r.status_code, r.content)
+ raise ValueError(msg)
+
+ _len = len(r.content)
+ if _len != artifact['size']:
+ raise ValueError('Error when checking size: %s != %s' % (
+ artifact['size'], _len))
+
+ # checking digest and writing
+ h = hashlib.sha256()
+ with open(filepath, 'wb') as f:
+ for chunk in r.iter_content():
+ h.update(chunk)
+ f.write(chunk)
+
+ actual_digest = h.hexdigest()
+ if actual_digest != artifact['sha256']:
+ raise ValueError(
+ '%s %s: Checksum mismatched: %s != %s' % (
+ project, version, artifact['sha256'], actual_digest))
+
+ if self.do_cache:
+ self._save_raw(filepath)
+
+ uncompress_path = os.path.join(path, 'uncompress')
+ os.makedirs(uncompress_path, exist_ok=True)
+
+ nature = tarball.uncompress(filepath, uncompress_path)
+
+ hashes = hashutil.hash_path(filepath)
+ hashes.pop('length') # 'size' entry is already referenced
+ artifact_hashes = convert_to_hex(hashes)
+ artifact['archive_type'] = nature
+ artifact.update(artifact_hashes)
+ pkginfo = _project_pkginfo(uncompress_path)
+ return release, artifact, filepath, uncompress_path, pkginfo
+
+
+class PyPIProject:
+ """PyPI project representation
+
+ This allows to extract information for a given project:
+ - either its latest information (from the latest release)
+ - either for a given release version
+ - uncompress associated fetched release artifacts
+
+ This also fetches and uncompresses the associated release
+ artifacts.
+
+ """
+ def __init__(self, client, project, project_metadata_url, data=None):
+ self.client = client
+ self.project = project
+ self.project_metadata_url = project_metadata_url
+ if data:
+ self.data = data
+ else:
+ self.data = client.info(project_metadata_url, project)
+
+ self.last_version = self.data['info']['version']
+ self.cache = {
+ self.last_version: self.data
+ }
+
+ def _data(self, release_name=None):
+ """Fetch data per release and cache it. Returns the cache retrieved
+ data if already fetched.
+
+ """
+ if release_name:
+ data = self.cache.get(release_name)
+ if not data:
+ data = self.client.release(self.project, release_name)
+ self.cache[release_name] = data
+ else:
+ data = self.data
+ return data
+
+ def info(self, release_name=None):
+ """Compute release information for provided release (or latest one).
+
+ """
+ return info(self._data(release_name))
+
+ def _filter_release_artifacts(self, version, releases, known_artifacts):
+ """Filter not already known sdist (source distribution) release.
+
+ There can be multiple 'package_type' (sdist, bdist_egg,
+ bdist_wheel, bdist_rpm, bdist_msi, bdist_wininst, ...), we are
+ only interested in source distribution (sdist), others bdist*
+ are binary
+
+ Args:
+ version (str): Release name or version
+ releases (dict/[dict]): Full release object (or a list of)
+ known_artifacts ([tuple]): List of known releases (tuple filename,
+ sha256)
+
+ Yields:
+ an unknown release artifact
+
+ """
+ if not releases:
+ return []
+ if not isinstance(releases, list):
+ releases = [releases]
+ for artifact in releases:
+ name = artifact['filename']
+ sha256 = artifact['digests']['sha256']
+ if (name, sha256) in known_artifacts:
+ logging.debug('artifact (%s, %s) already seen for release %s, skipping' % ( # noqa
+ name, sha256, version))
+ continue
+ if artifact['packagetype'] != 'sdist':
+ continue
+ yield artifact
+
+ def _cleanup_release_artifacts(self, archive_path, directory_path):
+ """Clean intermediary files which no longer needs to be present.
+
+ """
+ if directory_path and os.path.exists(directory_path):
+ logging.debug('Clean up uncompressed archive path %s' % (
+ directory_path, ))
+ shutil.rmtree(directory_path)
+
+ if archive_path and os.path.exists(archive_path):
+ logging.debug('Clean up archive %s' % archive_path)
+ os.unlink(archive_path)
+
+ def releases(self, known_artifacts):
+ """Fetch metadata/data per release (if new release artifact detected)
+
+ For new release artifact, this:
+ - downloads and uncompresses the release artifacts.
+ - yields the (release info, author info, release, dir_path)
+ - Clean up the intermediary fetched artifact files
+
+ Args:
+ known_artifacts (tuple): artifact name, artifact sha256 hash
+
+ Yields:
+ tuple (version, release_info, release, uncompressed_path) where:
+
+ - project_info (dict): release's associated version info
+ - author (dict): Author information for the release
+ - artifact (dict): Release artifact information
+ - release (dict): release metadata
+ - uncompressed_path (str): Path to uncompressed artifact
+
+ """
+ releases_dict = self.data['releases']
+ for version, releases in releases_dict.items():
+ releases = self._filter_release_artifacts(
+ version, releases, known_artifacts)
+ releases = self.client.prepare_release_artifacts(
+ self.project, version, releases)
+ for release, artifact, archive, dir_path, pkginfo in releases:
+ if pkginfo is None: # fallback to pypi api metadata
+ msg = '%s %s: No PKG-INFO detected, skipping' % ( # noqa
+ self.project, version)
+ logging.warn(msg)
+ continue
+ yield pkginfo, author(pkginfo), release, artifact, dir_path
+ self._cleanup_release_artifacts(archive, dir_path)
diff --git a/swh/loader/pypi/converters.py b/swh/loader/pypi/converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/converters.py
@@ -0,0 +1,70 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+def info(data):
+ """Given a dict of a PyPI project information, returns a project
+ subset.
+
+ Args:
+ data (dict): Representing either artifact information or
+ release information.
+
+ Returns:
+ A dict subset of project information.
+
+ """
+ _info = data['info']
+ default = {
+ 'home_page': _info['home_page'],
+ 'description': _info['description'],
+ 'summary': _info['summary'],
+ 'license': _info['license'],
+ 'package_url': _info['package_url'],
+ 'project_url': _info['project_url'],
+ 'upstream': None,
+ }
+
+ project_urls = _info.get('project_urls')
+ if project_urls:
+ homepage = project_urls.get('Homepage')
+ if homepage:
+ default['upstream'] = homepage
+
+ return default
+
+
+def author(data):
+ """Given a dict of project/release artifact information (coming from
+ PyPI), returns an author subset.
+
+ Args:
+ data (dict): Representing either artifact information or
+ release information.
+
+ Returns:
+ swh-model dict representing a person.
+
+ """
+ name = data['author']
+ email = data['author_email']
+ if email:
+ fullname = '%s <%s>' % (name, email)
+ else:
+ fullname = name
+
+ if not fullname:
+ return {'fullname': b'', 'name': None, 'email': None}
+
+ if fullname:
+ fullname = fullname.encode('utf-8')
+
+ if name:
+ name = name.encode('utf-8')
+
+ if email:
+ email = email.encode('utf-8')
+
+ return {'fullname': fullname, 'name': name, 'email': email}
diff --git a/swh/loader/pypi/loader.py b/swh/loader/pypi/loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/loader.py
@@ -0,0 +1,244 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import arrow
+import os
+import shutil
+
+from tempfile import mkdtemp
+
+from swh.loader.core.utils import clean_dangling_folders
+from swh.loader.core.loader import SWHLoader
+from swh.model.from_disk import Directory
+from swh.model.identifiers import (
+ revision_identifier, snapshot_identifier,
+ identifier_to_bytes, normalize_timestamp
+)
+
+from .client import PyPIClient, PyPIProject
+
+
+TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.pypi.'
+DEBUG_MODE = '** DEBUG MODE **'
+
+
+class PyPILoader(SWHLoader):
+ CONFIG_BASE_FILENAME = 'loader/pypi'
+ ADDITIONAL_CONFIG = {
+ 'temp_directory': ('str', '/tmp/swh.loader.pypi/'),
+ 'cache': ('bool', False),
+ 'cache_dir': ('str', ''),
+ 'debug': ('bool', False), # NOT FOR PRODUCTION
+ }
+
+ def __init__(self, client=None):
+ super().__init__(logging_class='swh.loader.pypi.PyPILoader')
+ self.origin_id = None
+ if not client:
+ temp_directory = self.config['temp_directory']
+ os.makedirs(temp_directory, exist_ok=True)
+ self.temp_directory = mkdtemp(
+ suffix='-%s' % os.getpid(),
+ prefix=TEMPORARY_DIR_PREFIX_PATTERN,
+ dir=temp_directory)
+ self.pypi_client = PyPIClient(
+ temp_directory=self.temp_directory,
+ cache=self.config['cache'],
+ cache_dir=self.config['cache_dir'])
+ else:
+ self.temp_directory = client.temp_directory
+ self.pypi_client = client
+ self.debug = self.config['debug']
+ self.done = False
+
+ def pre_cleanup(self):
+ """To prevent disk explosion if some other workers exploded
+ in mid-air (OOM killed), we try and clean up dangling files.
+
+ """
+ if self.debug:
+ self.log.warn('%s Will not pre-clean up temp dir %s' % (
+ DEBUG_MODE, self.temp_directory
+ ))
+ return
+ clean_dangling_folders(self.config['temp_directory'],
+ pattern_check=TEMPORARY_DIR_PREFIX_PATTERN,
+ log=self.log)
+
+ def cleanup(self):
+ """Clean up temporary disk use
+
+ """
+ if self.debug:
+ self.log.warn('%s Will not clean up temp dir %s' % (
+ DEBUG_MODE, self.temp_directory
+ ))
+ return
+ if os.path.exists(self.temp_directory):
+ self.log.debug('Clean up %s' % self.temp_directory)
+ shutil.rmtree(self.temp_directory)
+
+ def prepare_origin_visit(self, project_name, origin_url,
+ origin_metadata_url=None):
+ """Prepare the origin visit information
+
+ Args:
+ project_name (str): Project's simple name
+ origin_url (str): Project's main url
+ origin_metadata_url (str): Project's metadata url
+
+ """
+ self.origin = {
+ 'url': origin_url,
+ 'type': 'pypi',
+ }
+ self.visit_date = None # loader core will populate it
+
+ def _known_artifacts(self, last_snapshot):
+ """Retrieve the known releases/artifact for the origin_id.
+
+ Args
+ snapshot (dict): Last snapshot for the visit
+
+ Returns:
+ tuple artifact's filename, artifact's sha256
+
+ """
+ revs = [rev['target'] for rev in last_snapshot['branches'].values()]
+ known_revisions = self.storage.revision_get(revs)
+ for revision in known_revisions:
+ artifact = revision['metadata']['original_artifact']
+ yield artifact['filename'], artifact['sha256']
+
+ def _last_snapshot(self):
+ """Retrieve the last snapshot
+
+ """
+ return self.storage.snapshot_get_latest(self.origin_id)
+
+ def prepare(self, project_name, origin_url,
+ origin_metadata_url=None):
+ """Keep reference to the origin url (project) and the
+ project metadata url
+
+ Args:
+ project_name (str): Project's simple name
+ origin_url (str): Project's main url
+ origin_metadata_url (str): Project's metadata url
+
+ """
+ self.project_name = project_name
+ self.origin_url = origin_url
+ self.origin_metadata_url = origin_metadata_url
+ self.project = PyPIProject(self.pypi_client, self.project_name,
+ self.origin_metadata_url)
+ self._prepare_state()
+
+ def _prepare_state(self):
+ """Initialize internal state (snapshot, contents, directories, etc...)
+
+ This is called from `prepare` method.
+
+ """
+ last_snapshot = self._last_snapshot()
+ if last_snapshot:
+ self._snapshot = last_snapshot.copy()
+ known_artifacts = self._known_artifacts(self._snapshot)
+ else:
+ self._snapshot = {
+ 'branches': {}
+ }
+ known_artifacts = []
+ # and the artifacts
+ # that will be the source of data to retrieve
+ self.release_artifacts = self.project.releases(known_artifacts)
+ # temporary state
+ self._contents = []
+ self._directories = []
+ self._revisions = []
+
+ def fetch_data(self):
+ """Called once per release artifact version (can be many for one
+ release).
+
+ This will for each call:
+ - retrieve a release artifact (associated to a release version)
+ - Uncompress it and compute the necessary information
+ - Computes the swh objects
+
+ Returns:
+ True as long as data to fetch exist
+
+ """
+ data = None
+ if self.done:
+ return False
+
+ try:
+ data = next(self.release_artifacts)
+ except StopIteration:
+ self.done = True
+ return False
+
+ project_info, author, release, artifact, dir_path = data
+ dir_path = dir_path.encode('utf-8')
+ directory = Directory.from_disk(path=dir_path, data=True)
+ _objects = directory.collect()
+
+ self._contents = _objects['content'].values()
+ self._directories = _objects['directory'].values()
+ date = normalize_timestamp(
+ int(arrow.get(artifact['date']).timestamp))
+
+ name = release['name'].encode('utf-8')
+ message = release['message'].encode('utf-8')
+ if message:
+ message = b'%s: %s' % (name, message)
+ else:
+ message = name
+
+ _revision = {
+ 'synthetic': True,
+ 'metadata': {
+ 'original_artifact': artifact,
+ 'project': project_info,
+ },
+ 'author': author,
+ 'date': date,
+ 'committer': author,
+ 'committer_date': date,
+ 'message': message,
+ 'directory': directory.hash,
+ 'parents': [],
+ 'type': 'tar',
+ }
+ _revision['id'] = identifier_to_bytes(
+ revision_identifier(_revision))
+ self._revisions.append(_revision)
+
+ branch_name = artifact['filename'].encode('utf-8')
+ self._snapshot['branches'][branch_name] = {
+ 'target': _revision['id'],
+ 'target_type': 'revision',
+ }
+
+ return not self.done
+
+ def generate_and_load_snapshot(self):
+ self._snapshot['id'] = identifier_to_bytes(
+ snapshot_identifier(self._snapshot))
+ self.maybe_load_snapshot(self._snapshot)
+
+ def store_data(self):
+ """(override) This sends collected objects to storage.
+
+ """
+ self.maybe_load_contents(self._contents)
+ self.maybe_load_directories(self._directories)
+ self.maybe_load_revisions(self._revisions)
+
+ if self.done:
+ self.generate_and_load_snapshot()
+ self.flush()
diff --git a/swh/loader/pypi/model.py b/swh/loader/pypi/model.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/model.py
@@ -0,0 +1,10 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import logging
+import shutil
+
+from .converters import info, author
diff --git a/swh/loader/pypi/tasks.py b/swh/loader/pypi/tasks.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tasks.py
@@ -0,0 +1,19 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scheduler.task import Task
+
+from .loader import PyPILoader
+
+
+class LoadPyPITsk(Task):
+ task_queue = 'swh_loader_pypi'
+
+ def run_task(self, project_name, project_url, project_metadata_url=None):
+ loader = PyPILoader()
+ loader.log = self.log
+ return loader.load(project_name,
+ project_url,
+ origin_metadata_url=project_metadata_url)
diff --git a/swh/loader/__init__.py b/swh/loader/pypi/tests/__init__.py
copy from swh/loader/__init__.py
copy to swh/loader/pypi/tests/__init__.py
diff --git a/swh/loader/pypi/tests/common.py b/swh/loader/pypi/tests/common.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/common.py
@@ -0,0 +1,151 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import os
+import tempfile
+
+from nose.plugins.attrib import attr
+from unittest import TestCase
+
+from swh.loader.pypi.client import PyPIClient, PyPIProject
+
+
+RESOURCES_PATH = './swh/loader/pypi/tests/resources'
+
+
+class PyPIClientWithCache(PyPIClient):
+ """Force the use of the cache to bypass pypi calls
+
+ """
+ def __init__(self, temp_directory, cache_dir):
+ super().__init__(temp_directory=temp_directory,
+ cache=True, cache_dir=cache_dir)
+
+
+class LoaderNoStorage:
+ """Mixin class to inhibit the persistence (storage calls) and keep in
+ memory the data sent.
+
+ """
+ CONFIG_BASE_FILENAME = '' # do not provide a real path
+ ADDITIONAL_CONFIG = {
+ 'storage': ('dict', {
+ 'cls': 'remote',
+ 'args': {
+ 'url': 'http://nowhere:5002/', # do not provide a real storage
+ }
+ }),
+
+ # do not send any data to the storage
+ 'send_contents': ('bool', False),
+ 'send_directories': ('bool', False),
+ 'send_revisions': ('bool', False),
+ 'send_releases': ('bool', False),
+ 'send_snapshot': ('bool', False),
+ 'debug': ('bool', False),
+ }
+
+ def __init__(self, client=None):
+ super().__init__(client=client)
+ self.all_contents = []
+ self.all_directories = []
+ self.all_revisions = []
+ self.all_releases = []
+ self.all_snapshots = []
+
+ # typed data
+ self.objects = {
+ 'content': self.all_contents,
+ 'directory': self.all_directories,
+ 'revision': self.all_revisions,
+ 'release': self.all_releases,
+ 'snapshot': self.all_snapshots
+ }
+
+ def _add(self, type, l):
+ """Add without duplicates and keeping the insertion order.
+
+ Args:
+ type (str): Type of objects concerned by the action
+ l ([object]): List of 'type' object
+
+ """
+ col = self.objects[type]
+ for o in l:
+ if o in col:
+ continue
+ col.extend([o])
+
+ def maybe_load_contents(self, all_contents):
+ self._add('content', all_contents)
+
+ def maybe_load_directories(self, all_directories):
+ self._add('directory', all_directories)
+
+ def maybe_load_revisions(self, all_revisions):
+ self._add('revision', all_revisions)
+
+ def maybe_load_releases(self, releases):
+ raise ValueError('If called, the test must break.')
+
+ def maybe_load_snapshot(self, snapshot):
+ self.objects['snapshot'].append(snapshot)
+
+ def _store_origin_visit(self):
+ pass
+
+ def open_fetch_history(self):
+ pass
+
+ def close_fetch_history_success(self, fetch_history_id):
+ pass
+
+ def close_fetch_history_failure(self, fetch_history_id):
+ pass
+
+ def update_origin_visit(self, origin_id, visit, status):
+ pass
+
+ # Override to do nothing at the end
+ def close_failure(self):
+ pass
+
+ def close_success(self):
+ pass
+
+ def pre_cleanup(self):
+ pass
+
+
+@attr('fs')
+class WithProjectTest(TestCase):
+ def setUp(self):
+ project = '0805nexter'
+ project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project)
+
+ with open(project_metadata_file) as f:
+ data = json.load(f)
+
+ temp_dir = tempfile.mkdtemp(
+ dir='/tmp/', prefix='swh.loader.pypi.tests-')
+ project_metadata_url = 'https://pypi.org/pypi/%s/json' % project
+ # Will use the pypi with cache
+ client = PyPIClientWithCache(
+ temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+ self.project = PyPIProject(
+ client=client,
+ project=project,
+ project_metadata_url=project_metadata_url,
+ data=data)
+
+ self.data = data
+ self.temp_dir = temp_dir
+ self.project_name = project
+
+ def tearDown(self):
+ if os.path.exists(self.temp_dir):
+ shutil.rmtree(self.temp_dir)
diff --git a/swh/loader/pypi/tests/resources/0805nexter.json b/swh/loader/pypi/tests/resources/0805nexter.json
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/resources/0805nexter.json
@@ -0,0 +1 @@
+{"info":{"author":"hgtkpython","author_email":"2868989685@qq.com","bugtrack_url":null,"classifiers":[],"description":"UNKNOWN","description_content_type":null,"docs_url":null,"download_url":"UNKNOWN","downloads":{"last_day":-1,"last_month":-1,"last_week":-1},"home_page":"http://www.hp.com","keywords":null,"license":"UNKNOWN","maintainer":null,"maintainer_email":null,"name":"0805nexter","package_url":"https://pypi.org/project/0805nexter/","platform":"UNKNOWN","project_url":"https://pypi.org/project/0805nexter/","project_urls":{"Download":"UNKNOWN","Homepage":"http://www.hp.com"},"release_url":"https://pypi.org/project/0805nexter/1.2.0/","requires_dist":null,"requires_python":null,"summary":"a simple printer of nested lest","version":"1.2.0"},"last_serial":1931736,"releases":{"1.1.0":[{"comment_text":"","digests":{"md5":"07fc93fc12821c1405c3483db88154af","sha256":"52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035"},"downloads":-1,"filename":"0805nexter-1.1.0.zip","has_sig":false,"md5_digest":"07fc93fc12821c1405c3483db88154af","packagetype":"sdist","python_version":"source","requires_python":null,"size":862,"upload_time":"2016-01-31T05:28:42","url":"https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip"}],"1.2.0":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]},"urls":[{"comment_text":"","digests":{"md5":"89123c78bd5d3f61cb8f46029492b18a","sha256":"49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709"},"downloads":-1,"filename":"0805nexter-1.2.0.zip","has_sig":false,"md5_digest":"89123c78bd5d3f61cb8f46029492b18a","packagetype":"sdist","python_version":"source","requires_python":null,"size":898,"upload_time":"2016-01-31T05:51:25","url":"https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip"}]}
\ No newline at end of file
diff --git a/swh/loader/pypi/tests/test_client.py b/swh/loader/pypi/tests/test_client.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_client.py
@@ -0,0 +1,78 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+
+from nose.tools import istest
+
+from swh.loader.pypi import converters
+from swh.loader.pypi.client import _project_pkginfo
+
+from .common import WithProjectTest
+
+
+class PyPIProjectTest(WithProjectTest):
+ @istest
+ def releases(self):
+ actual_releases = self.project.releases([])
+
+ expected_release_artifacts = {
+ '1.1.0': {
+ 'archive_type': 'zip',
+ 'blake2s256': 'df9413bde66e6133b10cadefad6fcf9cbbc369b47831089112c846d79f14985a', # noqa
+ 'date': '2016-01-31T05:28:42',
+ 'filename': '0805nexter-1.1.0.zip',
+ 'sha1': '127d8697db916ba1c67084052196a83319a25000',
+ 'sha1_git': '4b8f1350e6d9fa00256e974ae24c09543d85b196',
+ 'sha256': '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035', # noqa
+ 'size': 862,
+ 'url': 'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
+ },
+ '1.2.0': {
+ 'archive_type': 'zip',
+ 'blake2s256': '67010586b5b9a4aaa3b1c386f9dc8b4c99e6e40f37732a717a5f9b9b1185e588', # noqa
+ 'date': '2016-01-31T05:51:25',
+ 'filename': '0805nexter-1.2.0.zip',
+ 'sha1': 'd55238554b94da7c5bf4a349ece0fe3b2b19f79c',
+ 'sha1_git': '8638d33a96cb25d8319af21417f00045ec6ee810',
+ 'sha256': '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709', # noqa
+ 'size': 898,
+ 'url': 'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa
+ }
+ }
+
+ expected_releases = {
+ '1.1.0': {
+ 'name': '1.1.0',
+ 'message': '',
+ },
+ '1.2.0': {
+ 'name': '1.2.0',
+ 'message': '',
+ },
+ }
+
+ dir_paths = []
+ for pkginfo, author, release, artifact, dir_path in actual_releases:
+ version = pkginfo['version']
+ expected_pkginfo = _project_pkginfo(dir_path)
+ self.assertEquals(pkginfo, expected_pkginfo)
+ expected_author = converters.author(expected_pkginfo)
+ self.assertEqual(author, expected_author)
+ expected_artifact = expected_release_artifacts[version]
+ self.assertEqual(artifact, expected_artifact)
+ expected_release = expected_releases[version]
+ self.assertEqual(release, expected_release)
+
+ self.assertTrue(version in dir_path)
+ self.assertTrue(self.project_name in dir_path)
+ # path still exists
+ self.assertTrue(os.path.exists(dir_path))
+ dir_paths.append(dir_path)
+
+ # Ensure uncompressed paths have been destroyed
+ for dir_path in dir_paths:
+ # path no longer exists
+ self.assertFalse(os.path.exists(dir_path))
diff --git a/swh/loader/pypi/tests/test_converters.py b/swh/loader/pypi/tests/test_converters.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_converters.py
@@ -0,0 +1,113 @@
+# Copyright (C) 2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from unittest import TestCase
+from nose.tools import istest
+
+from swh.loader.pypi.converters import author, info
+
+from .common import WithProjectTest
+
+
+class Test(WithProjectTest):
+ @istest
+ def info(self):
+ actual_info = self.project.info()
+
+ expected_info = {
+ 'home_page': self.data['info']['home_page'],
+ 'description': self.data['info']['description'],
+ 'summary': self.data['info']['summary'],
+ 'license': self.data['info']['license'],
+ 'package_url': self.data['info']['package_url'],
+ 'project_url': self.data['info']['project_url'],
+ 'upstream': self.data['info']['project_urls']['Homepage'],
+ }
+
+ self.assertEqual(expected_info, actual_info)
+
+ @istest
+ def author(self):
+ info = self.data['info']
+ actual_author = author(info)
+
+ name = info['author'].encode('utf-8')
+ email = info['author_email'].encode('utf-8')
+ expected_author = {
+ 'fullname': b'%s <%s>' % (name, email),
+ 'name': name,
+ 'email': email,
+ }
+
+ self.assertEqual(expected_author, actual_author)
+
+
+class ParseAuthorTest(TestCase):
+ @istest
+ def author_basic(self):
+ data = {
+ 'author': "i-am-groot",
+ 'author_email': 'iam@groot.org',
+ }
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'i-am-groot <iam@groot.org>',
+ 'name': b'i-am-groot',
+ 'email': b'iam@groot.org',
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed(self):
+ data = {
+ 'author': "['pierre', 'paul', 'jacques']",
+ 'author_email': None,
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b"['pierre', 'paul', 'jacques']",
+ 'name': b"['pierre', 'paul', 'jacques']",
+ 'email': None,
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed_2(self):
+ data = {
+ 'author': '[marie, jeanne]',
+ 'author_email': '[marie@some, jeanne@thing]',
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
+ 'name': b'[marie, jeanne]',
+ 'email': b'[marie@some, jeanne@thing]',
+ }
+
+ self.assertEquals(actual_author, expected_author)
+
+ @istest
+ def author_malformed_3(self):
+ data = {
+ 'author': '[marie, jeanne, pierre]',
+ 'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
+ }
+
+ actual_author = author(data)
+
+ expected_author = {
+ 'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
+ 'name': b'[marie, jeanne, pierre]',
+ 'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
+ }
+
+ self.assertEquals(actual_author, expected_author)
diff --git a/swh/loader/pypi/tests/test_loader.py b/swh/loader/pypi/tests/test_loader.py
new file mode 100644
--- /dev/null
+++ b/swh/loader/pypi/tests/test_loader.py
@@ -0,0 +1,259 @@
+# Copyright (C) 2016-2018 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import json
+import shutil
+import tempfile
+
+from nose.plugins.attrib import attr
+from nose.tools import istest
+from unittest import TestCase
+
+from swh.model import hashutil
+
+from swh.loader.pypi.client import PyPIProject
+from swh.loader.pypi.loader import PyPILoader
+from .common import PyPIClientWithCache, RESOURCES_PATH, LoaderNoStorage
+
+
+class TestPyPILoader(LoaderNoStorage, PyPILoader):
+ """Real PyPILoader for test purposes (storage and pypi interactions
+ inhibited)
+
+ """
+ def __init__(self, project_name):
+ project_metadata_file = '%s/%s.json' % (RESOURCES_PATH, project_name)
+ project_metadata_url = 'https://pypi.org/pypi/%s/json' % project_name
+ with open(project_metadata_file) as f:
+ data = json.load(f)
+
+ temp_dir = tempfile.mkdtemp(
+ dir='/tmp/', prefix='swh.loader.pypi.tests-')
+ # Will use the pypi with cache
+ client = PyPIClientWithCache(
+ temp_directory=temp_dir, cache_dir=RESOURCES_PATH)
+ super().__init__(client=client)
+ self.project = PyPIProject(
+ client=client,
+ project=project_name,
+ project_metadata_url=project_metadata_url,
+ data=data)
+
+ def prepare(self, project_name, origin_url,
+ origin_metadata_url=None):
+ self.project_name = project_name
+ self.origin_url = origin_url
+ self.origin_metadata_url = origin_metadata_url
+ self.visit = 1 # first visit
+ self._prepare_state()
+
+
+@attr('fs')
+class BaseLoaderITest(TestCase):
+ """Loader Test Mixin to prepare the pypi to 'load' in a test context.
+
+ In this setup, the loader uses the cache to load data so no
+ network interaction (no storage, no pypi).
+
+ """
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ self.tmp_root_path = tempfile.mkdtemp()
+ self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+ self._project = project_name
+ self._origin_url = '%s/pypi/%s/' % (dummy_pypi_instance, project_name)
+ self._project_metadata_url = '%s/pypi/%s/json' % (
+ dummy_pypi_instance, project_name)
+
+ def tearDown(self):
+ shutil.rmtree(self.tmp_root_path)
+
+ def assertContentsOk(self, expected_contents):
+ contents = self.loader.all_contents
+ self.assertEquals(len(contents), len(expected_contents))
+
+ for content in contents:
+ content_id = hashutil.hash_to_hex(content['sha1'])
+ self.assertIn(content_id, expected_contents)
+
+ def assertDirectoriesOk(self, expected_directories):
+ directories = self.loader.all_directories
+ self.assertEquals(len(directories), len(expected_directories))
+
+ for _dir in directories:
+ _dir_id = hashutil.hash_to_hex(_dir['id'])
+ self.assertIn(_dir_id, expected_directories)
+
+ def assertSnapshotOk(self, expected_snapshot, expected_revisions):
+ snapshots = self.loader.all_snapshots
+ self.assertEqual(len(snapshots), 1)
+
+ snap = snapshots[0]
+ snap_id = hashutil.hash_to_hex(snap['id'])
+ self.assertEqual(snap_id, expected_snapshot)
+
+ branches = snap['branches']
+ self.assertEqual(len(expected_revisions), len(branches))
+
+ for branch, target in branches.items():
+ rev_id = hashutil.hash_to_hex(target['target'])
+ self.assertIn(rev_id, expected_revisions)
+ self.assertEqual('revision', target['target_type'])
+
+ def assertRevisionsOk(self, expected_revisions): # noqa: N802
+ """Check the loader's revisions match the expected revisions.
+
+ Expects self.loader to be instantiated and ready to be
+ inspected (meaning the loading took place).
+
+ Args:
+ expected_revisions (dict): Dict with key revision id,
+ value the targeted directory id.
+
+ """
+ # The last revision being the one used later to start back from
+ for rev in self.loader.all_revisions:
+ rev_id = hashutil.hash_to_hex(rev['id'])
+ directory_id = hashutil.hash_to_hex(rev['directory'])
+
+ self.assertEquals(expected_revisions[rev_id], directory_id)
+
+
+# Define loaders with no storage
+# They'll just accumulate the data in place
+# Only for testing purposes.
+
+
+class PyPILoaderNoSnapshot(TestPyPILoader):
+ """Same as TestPyPILoader with no prior snapshot seen
+
+ """
+ def _last_snapshot(self):
+ return None
+
+
+class LoaderITest(BaseLoaderITest):
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ super().setUp(project_name, dummy_pypi_instance)
+ self.loader = PyPILoaderNoSnapshot(project_name=project_name)
+
+ @istest
+ def load(self):
+ """Load a pypi origin
+
+ """
+ # when
+ self.loader.load(
+ self._project, self._origin_url, self._project_metadata_url)
+
+ # then
+ self.assertEquals(len(self.loader.all_contents), 6,
+ '3 contents per release artifact files (2)')
+ self.assertEquals(len(self.loader.all_directories), 4)
+ self.assertEquals(len(self.loader.all_revisions), 2,
+ '2 releases so 2 revisions should be created')
+ self.assertEquals(len(self.loader.all_releases), 0,
+ 'No release is created in the pypi loader')
+ self.assertEquals(len(self.loader.all_snapshots), 1,
+ 'Only 1 snapshot targetting all revisions')
+
+ expected_contents = [
+ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
+ '938c33483285fd8ad57f15497f538320df82aeb8',
+ 'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
+ '405859113963cb7a797642b45f171d6360425d16',
+ 'e5686aa568fdb1d19d7f1329267082fe40482d31',
+ '83ecf6ec1114fd260ca7a833a2d165e71258c338',
+ ]
+
+ self.assertContentsOk(expected_contents)
+
+ expected_directories = [
+ '05219ba38bc542d4345d5638af1ed56c7d43ca7d',
+ 'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
+ 'b178b66bd22383d5f16f4f5c923d39ca798861b4',
+ 'c3a58f8b57433a4b56caaa5033ae2e0931405338',
+ ]
+ self.assertDirectoriesOk(expected_directories)
+
+ # {revision hash: directory hash}
+ expected_revisions = {
+ '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa
+ 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa
+ }
+ self.assertRevisionsOk(expected_revisions)
+
+ self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+ expected_revisions)
+
+
+class PyPILoaderWithSnapshot(TestPyPILoader):
+ """Same as TestPyPILoader with no prior snapshot seen
+
+ """
+ def _last_snapshot(self):
+ return {
+ 'id': b'\xf4V\xb0>\x8b\xf1\x92\rd\xb0\r\xf24\xb1\xef\xed\xc2[l\x93', # noqa
+ 'branches': {
+ b'0805nexter-1.1.0.zip': {
+ 'target': b'L\x99\x89\x1f\x93\xb8\x14P'
+ b'8Ww#Z7\xb5\xe9f\xdd\x15q',
+ 'target_type': 'revision'
+ },
+ b'0805nexter-1.2.0.zip': {
+ 'target': b'\xe4E\xdaM\xa2+1\xbf'
+ b'\xeb\xb6\xff\xc48=\xbf\x83'
+ b'\x9a\x07M!',
+ 'target_type': 'revision'
+ },
+ },
+ }
+
+ def _known_artifacts(self, last_snapshot):
+ yield from [
+ (
+ '0805nexter-1.1.0.zip',
+ '52cd128ad3afe539478abc7440d4b043384295fbe6b0958a237cb6d926465035' # noqa
+ ),
+ (
+ '0805nexter-1.2.0.zip',
+ '49785c6ae39ea511b3c253d7621c0b1b6228be2f965aca8a491e6b84126d0709' # noqa
+ )
+ ]
+
+
+class LoaderWithOriginAlreadySeenITest(BaseLoaderITest):
+ def setUp(self, project_name='0805nexter',
+ dummy_pypi_instance='https://dummy.org'):
+ super().setUp(project_name, dummy_pypi_instance)
+ self.loader = PyPILoaderWithSnapshot(project_name=project_name)
+
+ @istest
+ def load(self):
+ """Load a pypi origin already injected will result with only 1 snapshot
+
+ """
+ # when
+ self.loader.load(
+ self._project, self._origin_url, self._project_metadata_url)
+
+ # then
+ self.assertEquals(len(self.loader.all_contents), 0)
+ self.assertEquals(len(self.loader.all_directories), 0)
+ self.assertEquals(len(self.loader.all_revisions), 0)
+ self.assertEquals(len(self.loader.all_releases), 0)
+ self.assertEquals(len(self.loader.all_snapshots), 1)
+
+ self.assertContentsOk([])
+ self.assertDirectoriesOk([])
+ self.assertRevisionsOk(expected_revisions={})
+
+ expected_revisions = {
+ '4c99891f93b81450385777235a37b5e966dd1571': '05219ba38bc542d4345d5638af1ed56c7d43ca7d', # noqa
+ 'e445da4da22b31bfebb6ffc4383dbf839a074d21': 'b178b66bd22383d5f16f4f5c923d39ca798861b4', # noqa
+ }
+ self.assertSnapshotOk('f456b03e8bf1920d64b00df234b1efedc25b6c93',
+ expected_revisions)

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 7:00 AM (8 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227789

Event Timeline