diff --git a/PKG-INFO b/PKG-INFO index 6e5adcf..27725ad 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,95 +1,95 @@ Metadata-Version: 2.1 Name: swh.loader.git -Version: 0.0.40 +Version: 0.0.41 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DLDG/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-git Description: swh-loader-git ============== The Software Heritage Git Loader is a tool and a library to walk a local Git repository and inject into the SWH dataset all contained files that weren't known before. License ------- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See top-level LICENSE file for the full text of the GNU General Public License along with this program. Dependencies ------------ ### Runtime - python3 - python3-dulwich - python3-retrying - python3-swh.core - python3-swh.model - python3-swh.storage - python3-swh.scheduler ### Test - python3-nose Requirements ------------ - implementation language, Python3 - coding guidelines: conform to PEP8 - Git access: via dulwich Configuration ------------- You can run the loader or the updater directly by calling: ``` python3 -m swh.loader.git.{loader,updater} ``` ### Location Both tools expect a configuration file. Either one of the following location: - /etc/softwareheritage/ - ~/.config/swh/ - ~/.swh/ Note: Will call that location $SWH_CONFIG_PATH ### Configuration sample $SWH_CONFIG_PATH/loader/git-{loader,updater}.yml: ``` storage: cls: remote args: url: http://localhost:5002/ ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/debian/changelog b/debian/changelog index b166f0c..c1f4f8d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,302 +1,303 @@ -swh-loader-git (0.0.40-1~swh1~bpo9+1) stretch-swh; urgency=medium +swh-loader-git (0.0.41-1~swh1) unstable-swh; urgency=medium - * Rebuild for stretch-backports. + * Release swh.loader.git v0.0.41 + * Use explicit keyword argument for base_url in the load task - -- Nicolas Dandrimont Tue, 09 Oct 2018 16:28:14 +0200 + -- Nicolas Dandrimont Thu, 11 Oct 2018 16:26:27 +0200 swh-loader-git (0.0.40-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.40 * Improve python packaging * Make the loader more robust against holes in the history caused by * buggy imports * Allow ignoring the history to make a full load -- Nicolas Dandrimont Tue, 09 Oct 2018 16:28:14 +0200 swh-loader-git (0.0.39-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.39 * Avoid walking the history of large git repos, which takes a long time * Really save packfiles -- Nicolas Dandrimont Thu, 20 Sep 2018 17:22:17 +0200 swh-loader-git (0.0.38-1~swh1) unstable-swh; urgency=medium * v0.0.38 * Improve origin_visit initialization step * Properly sandbox the prepare statement so that if it breaks, we can * update appropriately the visit with the correct status -- Antoine R. Dumont (@ardumont) Wed, 07 Mar 2018 11:39:30 +0100 swh-loader-git (0.0.37-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.37 * Remove spurious debug print -- Nicolas Dandrimont Tue, 06 Feb 2018 16:00:40 +0100 swh-loader-git (0.0.36-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.36 * Update to use snapshots instead of occurrences * Use dulwich get_transport_and_path rather than hardcode the tcp transport -- Nicolas Dandrimont Tue, 06 Feb 2018 14:42:36 +0100 swh-loader-git (0.0.35-1~swh1) unstable-swh; urgency=medium * v0.0.35 * swh.loader.git.loader: Warn when object is corrupted and continue * swh.loader.git.loader: Add structured data to the log message regarding skipping objects * swh.loader.git.loader: Force further checks on objects * swh.loader.git.loader: Unify reading object from the repository * swh.loader.git.loader: Warn when object malformed and continue * swh.loader.git.loader: Trap missing object id and continue * swh.loader.git.base: Reuse swh.loader.core base loader * swh.loader.git.converters: Fix release time conversion issue when no date provided -- Antoine R. Dumont (@ardumont) Mon, 18 Dec 2017 12:08:01 +0100 swh-loader-git (0.0.34-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git version 0.0.34 * Update packaging runes -- Nicolas Dandrimont Thu, 12 Oct 2017 20:12:11 +0200 swh-loader-git (0.0.33-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.33 * make the updater's parent commit cache more useful -- Nicolas Dandrimont Fri, 15 Sep 2017 18:45:41 +0200 swh-loader-git (0.0.32-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git 0.0.32 * Update tasks to new swh.scheduler API -- Nicolas Dandrimont Mon, 12 Jun 2017 18:04:50 +0200 swh-loader-git (0.0.31-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.31 * Migrate from swh.core.hashutil to swh.model.hashutil * Only send objects that are actually missing -- Nicolas Dandrimont Fri, 17 Mar 2017 17:40:17 +0100 swh-loader-git (0.0.30-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.30 * Fix handling of mergetag headers -- Nicolas Dandrimont Thu, 09 Mar 2017 11:30:08 +0100 swh-loader-git (0.0.29-1~swh1) unstable-swh; urgency=medium * v0.0.29 * GitLoaderFromArchive: Use the same configuration file as * GitLoader (permit to deploy both as the same unit) * git reader: Refactor to allow listing revisions as well as contents -- Antoine R. Dumont (@ardumont) Mon, 20 Feb 2017 11:32:24 +0100 swh-loader-git (0.0.28-1~swh1) unstable-swh; urgency=medium * v0.0.28 * loader: Fix fetch_date override -- Antoine R. Dumont (@ardumont) Wed, 15 Feb 2017 18:43:32 +0100 swh-loader-git (0.0.27-1~swh1) unstable-swh; urgency=medium * v0.0.27 * Add loader-git from archive -- Antoine R. Dumont (@ardumont) Tue, 14 Feb 2017 18:56:52 +0100 swh-loader-git (0.0.26-1~swh1) unstable-swh; urgency=medium * v0.0.26 * Add a git loader able to deal with git repository in archive -- Antoine R. Dumont (@ardumont) Tue, 14 Feb 2017 16:24:50 +0100 swh-loader-git (0.0.25-1~swh1) unstable-swh; urgency=medium * v0.0.25 * Fix to permit to actually pass the fetch date as parameter for * the loading git disk loader -- Antoine R. Dumont (@ardumont) Fri, 10 Feb 2017 17:34:35 +0100 swh-loader-git (0.0.24-1~swh1) unstable-swh; urgency=medium * v0.0.24 * Update storage configuration reading -- Antoine R. Dumont (@ardumont) Thu, 15 Dec 2016 18:40:29 +0100 swh-loader-git (0.0.23-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.23 * Make the save_data mechanism generic -- Nicolas Dandrimont Fri, 02 Dec 2016 15:34:05 +0100 swh-loader-git (0.0.22-1~swh1) unstable-swh; urgency=medium * v0.0.22 * Improve reader to permit to use it as analyzer tool -- Antoine R. Dumont (@ardumont) Fri, 04 Nov 2016 10:37:24 +0100 swh-loader-git (0.0.21-1~swh1) unstable-swh; urgency=medium * v0.0.21 * Improve the reader git to load all contents from a pack. * Improve to avoid unnecessary readings from db -- Antoine R. Dumont (@ardumont) Wed, 26 Oct 2016 17:06:12 +0200 swh-loader-git (0.0.20-1~swh1) unstable-swh; urgency=medium * v0.0.20 * Add new reader git task -- Antoine R. Dumont (@ardumont) Tue, 25 Oct 2016 18:40:17 +0200 swh-loader-git (0.0.19-1~swh1) unstable-swh; urgency=medium * v0.0.19 * Update git loaders to register origin_visit's state -- Antoine R. Dumont (@ardumont) Tue, 23 Aug 2016 16:34:15 +0200 swh-loader-git (0.0.18-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.18 * Properly handle skipped contents -- Nicolas Dandrimont Fri, 19 Aug 2016 18:12:44 +0200 swh-loader-git (0.0.16-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.16 * Add exist_ok to packfile cache directory creation -- Nicolas Dandrimont Mon, 01 Aug 2016 15:53:07 +0200 swh-loader-git (0.0.15-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.15 * Absence of remote refs doesn't throw an error in updater -- Nicolas Dandrimont Wed, 15 Jun 2016 01:20:37 +0200 swh-loader-git (0.0.14-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.14 * Add a disk loader using dulwich * Rework the loader logic to use a single pattern for both loaders * Allow caching of packfiles for the remote loader -- Nicolas Dandrimont Tue, 14 Jun 2016 18:10:21 +0200 swh-loader-git (0.0.13-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.13 * Update for latest schema revision -- Nicolas Dandrimont Fri, 08 Apr 2016 16:46:41 +0200 swh-loader-git (0.0.12-1~swh1) unstable-swh; urgency=medium * Release swh-loader-git v0.0.12 * Update to use new swh.storage api for object listing * Add a size limit to packfiles * Return a proper eventfulness for empty repositories * Do not crawl the pack file if unnecessary -- Nicolas Dandrimont Thu, 25 Feb 2016 18:21:34 +0100 swh-loader-git (0.0.11-1~swh1) unstable-swh; urgency=medium * Release swh.loader.git v0.0.11 * Implement git updater -- Nicolas Dandrimont Fri, 19 Feb 2016 19:13:22 +0100 swh-loader-git (0.0.10-1~swh1) unstable-swh; urgency=medium * Prepare swh.loader.git release v0.0.10 * Update for swh.model * Use new swh.storage -- Nicolas Dandrimont Mon, 07 Dec 2015 18:59:46 +0100 swh-loader-git (0.0.9-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.9 * Close fetch_history on failure too -- Nicolas Dandrimont Wed, 04 Nov 2015 10:54:37 +0100 swh-loader-git (0.0.8-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.8 * New database schema (v028) * Populate fetch_history (T121) -- Nicolas Dandrimont Tue, 27 Oct 2015 18:11:26 +0100 swh-loader-git (0.0.7-1~swh1) unstable-swh; urgency=medium * Prepare swh.loader.git v0.0.7 deployment -- Nicolas Dandrimont Mon, 19 Oct 2015 12:37:09 +0200 swh-loader-git (0.0.6-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.6 -- Nicolas Dandrimont Fri, 09 Oct 2015 17:50:35 +0200 swh-loader-git (0.0.5-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.5 -- Nicolas Dandrimont Tue, 06 Oct 2015 17:42:11 +0200 swh-loader-git (0.0.4-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.4 -- Nicolas Dandrimont Fri, 02 Oct 2015 14:54:04 +0200 swh-loader-git (0.0.3-1~swh1) unstable-swh; urgency=medium * Prepare deployment of swh.loader.git v0.0.3 -- Nicolas Dandrimont Thu, 01 Oct 2015 11:36:28 +0200 swh-loader-git (0.0.2-1~swh1) unstable-swh; urgency=medium * Prepare deploying swh.loader.git v0.0.2 -- Nicolas Dandrimont Tue, 29 Sep 2015 17:22:09 +0200 swh-loader-git (0.0.1-1~swh1) unstable-swh; urgency=medium * Initial release * Tagging swh.loader.git v0.0.1 -- Nicolas Dandrimont Fri, 25 Sep 2015 16:04:00 +0200 diff --git a/swh.loader.git.egg-info/PKG-INFO b/swh.loader.git.egg-info/PKG-INFO index 6e5adcf..27725ad 100644 --- a/swh.loader.git.egg-info/PKG-INFO +++ b/swh.loader.git.egg-info/PKG-INFO @@ -1,95 +1,95 @@ Metadata-Version: 2.1 Name: swh.loader.git -Version: 0.0.40 +Version: 0.0.41 Summary: Software Heritage git loader Home-page: https://forge.softwareheritage.org/diffusion/DLDG/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest Project-URL: Funding, https://www.softwareheritage.org/donate Project-URL: Source, https://forge.softwareheritage.org/source/swh-loader-git Description: swh-loader-git ============== The Software Heritage Git Loader is a tool and a library to walk a local Git repository and inject into the SWH dataset all contained files that weren't known before. License ------- This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. See top-level LICENSE file for the full text of the GNU General Public License along with this program. Dependencies ------------ ### Runtime - python3 - python3-dulwich - python3-retrying - python3-swh.core - python3-swh.model - python3-swh.storage - python3-swh.scheduler ### Test - python3-nose Requirements ------------ - implementation language, Python3 - coding guidelines: conform to PEP8 - Git access: via dulwich Configuration ------------- You can run the loader or the updater directly by calling: ``` python3 -m swh.loader.git.{loader,updater} ``` ### Location Both tools expect a configuration file. Either one of the following location: - /etc/softwareheritage/ - ~/.config/swh/ - ~/.swh/ Note: Will call that location $SWH_CONFIG_PATH ### Configuration sample $SWH_CONFIG_PATH/loader/git-{loader,updater}.yml: ``` storage: cls: remote args: url: http://localhost:5002/ ``` Platform: UNKNOWN Classifier: Programming Language :: Python :: 3 Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3) Classifier: Operating System :: OS Independent Classifier: Development Status :: 5 - Production/Stable Description-Content-Type: text/markdown Provides-Extra: testing diff --git a/swh/loader/git/tasks.py b/swh/loader/git/tasks.py index 5f449af..5eefff1 100644 --- a/swh/loader/git/tasks.py +++ b/swh/loader/git/tasks.py @@ -1,70 +1,70 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import dateutil.parser from swh.scheduler.task import Task from .loader import GitLoader, GitLoaderFromArchive from .updater import BulkUpdater from .reader import GitSha1RemoteReaderAndSendToQueue # TODO: rename to LoadRemoteGitRepository class UpdateGitRepository(Task): """Import a git repository from a remote location""" task_queue = 'swh_loader_git' def run_task(self, repo_url, base_url=None): """Import a git repository""" loader = BulkUpdater() loader.log = self.log - return loader.load(repo_url, base_url) + return loader.load(repo_url, base_url=base_url) class LoadDiskGitRepository(Task): """Import a git repository from disk""" task_queue = 'swh_loader_git_express' def run_task(self, origin_url, directory, date): """Import a git repository, cloned in `directory` from `origin_url` at `date`.""" loader = GitLoader() loader.log = self.log return loader.load(origin_url, directory, dateutil.parser.parse(date)) class UncompressAndLoadDiskGitRepository(Task): """Import a git repository from a zip archive""" task_queue = 'swh_loader_git_archive' def run_task(self, origin_url, archive_path, date): """1. Uncompress an archive repository in a local and temporary folder 2. Load it through the git disk loader 3. Clean up the temporary folder """ loader = GitLoaderFromArchive() loader.log = self.log return loader.load( origin_url, archive_path, dateutil.parser.parse(date)) class ReaderGitRepository(Task): task_queue = 'swh_reader_git' def run_task(self, repo_url, base_url=None): """Read a git repository from a remote location and send sha1 to archival. """ loader = GitSha1RemoteReaderAndSendToQueue() loader.log = self.log return loader.load(repo_url) diff --git a/version.txt b/version.txt index b868166..3cd46a1 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.40-0-g7156c46 \ No newline at end of file +v0.0.41-0-g8586650 \ No newline at end of file