diff --git a/README b/README index 773e7e9..ba6ff69 100644 --- a/README +++ b/README @@ -1,119 +1,119 @@ swh-loader-svn ============== Documents are in the ./docs folder: - Specification: ./docs/swh-loader-svn.txt - Comparison performance with git-svn: ./docs/comparison-git-svn-swh-svn.org # Configuration file ## Location Either: - /etc/softwareheritage/ - ~/.config/swh/ - ~/.swh/ Note: Will call that location $SWH_CONFIG_PATH ## Configuration sample $SWH_CONFIG_PATH/loader/svn.yml: ``` storage: cls: remote args: url: http://localhost:5002/ check_revision: 10 ``` ## configuration content With at least the following module (swh.loader.svn.tasks) and queue (swh_loader_svn): $SWH_CONFIG_PATH/worker.yml: ``` task_broker: amqp://guest@localhost// task_modules: task_modules: - swh.loader.svn.tasks task_queues: - swh_loader_svn task_soft_time_limit = 0 ``` `swh.loader.svn.tasks` and `swh_loader_svn` are the important entries here. ## toplevel ``` $ python3 repo = 'pyang-repo-r343-eol-native-mixed-lf-crlf' #repo = 'zipeg-gae' origin_url = 'http://%s.googlecode.com' % repo local_repo_path = '/home/storage/svn/repo' svn_url = 'file://%s/%s' % (local_repo_path, repo) import logging logging.basicConfig(level=logging.DEBUG) -from swh.loader.svn.tasks import LoadSWHSvnRepositoryTsk +from swh.loader.svn.tasks import LoadSvnRepository -t = LoadSWHSvnRepositoryTsk() +t = LoadSvnRepository() t.run(svn_url=svn_url, destination_path='/tmp', origin_url=origin_url, visit_date='2016-05-03T15:16:32+00:00', start_from_scratch=True) ``` ## Production like start worker instance To start a current worker instance: ```sh python3 -m celery worker --app=swh.scheduler.celery_backend.config.app \ --pool=prefork \ --concurrency=10 \ -Ofair \ --loglevel=debug 2>&1 ``` ## Produce a repository to load You can see: `python3 -m swh.loader.svn.producer svn --help` ### one repository ```sh python3 -u -m swh.loader.svn.producer svn --svn-url file:///home/storage/svn/repos/pkg-fox --visit-date 'Tue, 3 May 2017 17:16:32 +0200' ``` Note: - `--visit-date` to override the default visit-date to now. ### multiple repositories ```sh cat ~/svn-repository-list | python3 -m swh.loader.svn.producer svn ``` The file svn-repository-list contains a list of svn repository urls (one per line), something like: ```txt svn://svn.debian.org/svn/pkg-fox/ optional-url svn://svn.debian.org/svn/glibc-bsd/ optional-url svn://svn.debian.org/svn/pkg-voip/ optional-url svn://svn.debian.org/svn/python-modules/ optional-url svn://svn.debian.org/svn/pkg-gnome/ optional-url ``` ## Produce archive of svndumps list to load see. `python3 -m swh.loader.svn.producer svn-archive --help` diff --git a/docs/comparison-git-svn-swh-svn.org b/docs/comparison-git-svn-swh-svn.org index 3db6c61..05ca8d6 100644 --- a/docs/comparison-git-svn-swh-svn.org +++ b/docs/comparison-git-svn-swh-svn.org @@ -1,992 +1,992 @@ #+title: Comparison git-svn and swh-svn #+author: ardumont * How `git svn` checkouts an svn repository, follows the `svn log` history and for each svn commit, computes a git revision by adding metadata to an svn commit message and then store results in the .git folder. `swh loader svn` checkouts an svn repository, follows the `svn log` history and for each svn revision, computes the an swh revision from it and stores the result in an swh-storage instance. As of now, 202 svn debian repositories have been injected in an swh-storage instance. Logs have been injected in softwareheritage-log for that injection. We'll `git clone` some of those same repositories and compare the results from the injection logs. This comparison is not completely fair: - `git svn` stores on disk (remote svn, disk) - `swh svn` does store in a swh-storage instance (remote svn, network, db, disk). * Definition Considering the following definitions: |------------+--------------------| | Size | Revision threshold | |------------+--------------------| | very large | > 40k | | large | > 10k | | medium | > 5k | | small | < 150 | | very small | < 10 | |------------+--------------------| Here is the svn repositories list: #+BEGIN_SRC sql url | svn_revision ------------------------------------------------+-------------- svn://svn.debian.org/svn/pkg-doc-linux | 294 svn://svn.debian.org/svn/adduser/ | 840 svn://svn.debian.org/svn/glibc-bsd/ | 6006 svn://svn.debian.org/svn/pkg-fox/ | 145 svn://svn.debian.org/svn/spip/ | 170 svn://svn.debian.org/svn/console-common/ | 324 svn://svn.debian.org/svn/partial-mirror/ | 592 svn://svn.debian.org/svn/debian-ha/ | 103 svn://svn.debian.org/svn/pkg-omnievents/ | 8 svn://svn.debian.org/svn/online-desktop/ | 109 svn://svn.debian.org/svn/vamos/ | 24 svn://svn.debian.org/svn/pkg-ralink/ | 191 svn://svn.debian.org/svn/pkg-libburnia/ | 372 svn://svn.debian.org/svn/pkg-sysvinit/ | 1966 svn://svn.debian.org/svn/pkg-synfig/ | 438 svn://svn.debian.org/svn/kernel-handbook/ | 89 svn://svn.debian.org/svn/pkg-vlock/ | 59 svn://svn.debian.org/svn/pkg-phpgroupware/ | 125 svn://svn.debian.org/svn/pkg-squeak/ | 41 svn://svn.debian.org/svn/pkg-turbogears/ | 26 svn://svn.debian.org/svn/megahal/ | 55 svn://svn.debian.org/svn/ssmtp/ | 65 svn://svn.debian.org/svn/dmraid-debian/ | 31 svn://svn.debian.org/svn/pkg-kwiki/ | 231 svn://svn.debian.org/svn/pkg-gnome/ | 48013 svn://svn.debian.org/svn/pkg-varnish/ | 115 svn://svn.debian.org/svn/logidee-tools/ | 149 svn://svn.debian.org/svn/pkg-ruby-extras/ | 6142 svn://svn.debian.org/svn/xml-dtc/ | 6 svn://svn.debian.org/svn/pkg-voip/ | 10707 svn://svn.debian.org/svn/perl-tk/ | 95 svn://svn.debian.org/svn/pkg-qemu/ | 426 svn://svn.debian.org/svn/pkg-coolkey/ | 67 svn://svn.debian.org/svn/pkg-gmt/ | 52 svn://svn.debian.org/svn/pkg-catalyst/ | 1718 svn://svn.debian.org/svn/debichem/ | 6339 svn://svn.debian.org/svn/pkg-ime/ | 1056 svn://svn.debian.org/svn/pkg-fdt-tools/ | 68 svn://svn.debian.org/svn/pkg-kolab/ | 1606 svn://svn.debian.org/svn/pkg-openxchange/ | 316 svn://svn.debian.org/svn/pkg-gourmet/ | 6 svn://svn.debian.org/svn/egtk/ | 19 svn://svn.debian.org/svn/pkg-mythtv/ | 257 svn://svn.debian.org/svn/nstx-ng/ | 16 svn://svn.debian.org/svn/pkg-xen/ | 1216 svn://svn.debian.org/svn/pkg-orbit2/ | 21 svn://svn.debian.org/svn/pkg-asv/ | 77 svn://svn.debian.org/svn/pkg-gtk2-perl/ | 275 svn://svn.debian.org/svn/pkg-lirc/ | 542 svn://svn.debian.org/svn/pkg-smalltools/ | 106 svn://svn.debian.org/svn/debian-br-cdd/ | 327 svn://svn.debian.org/svn/aptitude/ | 4434 svn://svn.debian.org/svn/pkg-opensync/ | 472 svn://svn.debian.org/svn/nsspampgsql/ | 99 svn://svn.debian.org/svn/piuparts/ | 931 svn://svn.debian.org/svn/secvpn/ | 50 svn://svn.debian.org/svn/pkg-cvs/ | 84 svn://svn.debian.org/svn/fai/ | 6785 svn://svn.debian.org/svn/pkg-xfce/ | 9393 svn://svn.debian.org/svn/aqbanking/ | 1909 svn://svn.debian.org/svn/libpst/ | 92 svn://svn.debian.org/svn/pkg-fedora-ds/ | 262 svn://svn.debian.org/svn/pkg-fim/ | 4 svn://svn.debian.org/svn/debianjr/ | 26 svn://svn.debian.org/svn/moreutils/ | 102 svn://svn.debian.org/svn/soc/ | 211 svn://svn.debian.org/svn/cruft/ | 213 svn://svn.debian.org/svn/yaird/ | 129 svn://svn.debian.org/svn/debianzine/ | 11 svn://svn.debian.org/svn/pkg-mailutils/ | 431 svn://svn.debian.org/svn/pkg-upslug2/ | 22 svn://svn.debian.org/svn/pkg-libnuma/ | 74 svn://svn.debian.org/svn/pkg-boinc/ | 929 svn://svn.debian.org/svn/ddp/ | 11130 svn://svn.debian.org/svn/l10n-russian/ | 399 svn://svn.debian.org/svn/ninjajump/ | 4 svn://svn.debian.org/svn/demudi/ | 2000 svn://svn.debian.org/svn/debnest/ | 26 svn://svn.debian.org/svn/pkg-kbd/ | 263 svn://svn.debian.org/svn/debian-olpc/ | 29 svn://svn.debian.org/svn/pkg-ace/ | 757 svn://svn.debian.org/svn/chrpath/ | 174 svn://svn.debian.org/svn/pkg-uml/ | 335 svn://svn.debian.org/svn/xbox/ | 1 svn://svn.debian.org/svn/resolvconf/ | 367 svn://svn.debian.org/svn/dbconfig-common/ | 468 svn://svn.debian.org/svn/pkg-pan/ | 10 svn://svn.debian.org/svn/pkg-fgfs/ | 229 svn://svn.debian.org/svn/pkg-pulseaudio/ | 187 svn://svn.debian.org/svn/fai-config-dir/ | 19 svn://svn.debian.org/svn/pkg-bioc/ | 381 svn://svn.debian.org/svn/pkg-hamradio/ | 9 svn://svn.debian.org/svn/debootloaders/ | 261 svn://svn.debian.org/svn/tasksel/ | 2114 svn://svn.debian.org/svn/chase/ | 16 svn://svn.debian.org/svn/pkg-sks/ | 245 svn://svn.debian.org/svn/pkg-mol/ | 171 svn://svn.debian.org/svn/cabot/ | 203 svn://svn.debian.org/svn/pkg-libatomic-ops/ | 23 svn://svn.debian.org/svn/kgb/ | 976 svn://svn.debian.org/svn/apt-howto/ | 132 svn://svn.debian.org/svn/pkg-xcdroast/ | 49 svn://svn.debian.org/svn/pkg-citadel/ | 393 svn://svn.debian.org/svn/pkg-ofed/ | 1070 svn://svn.debian.org/svn/atrace/ | 3 svn://svn.debian.org/svn/guessnet/ | 337 svn://svn.debian.org/svn/pkg-mwavem/ | 34 svn://svn.debian.org/svn/pkg-ofbis/ | 48 svn://svn.debian.org/svn/cdd/ | 1351 svn://svn.debian.org/svn/pear-package/ | 716 svn://svn.debian.org/svn/debian-hebrew/ | 1026 svn://svn.debian.org/svn/pkg-xdialog/ | 17 svn://svn.debian.org/svn/demi/ | 84 svn://svn.debian.org/svn/chinese/ | 208 svn://svn.debian.org/svn/cipux/ | 5158 svn://svn.debian.org/svn/splashy/ | 1449 svn://svn.debian.org/svn/pkg-directfb/ | 402 svn://svn.debian.org/svn/python-modules/ | 34523 svn://svn.debian.org/svn/tetexcvs/ | 538 svn://svn.debian.org/svn/pkg-qalculate/ | 110 svn://svn.debian.org/svn/pkg-freedict/ | 206 svn://svn.debian.org/svn/pkg-ocaml-maint/ | 6495 svn://svn.debian.org/svn/pkg-llvm/ | 1915 svn://svn.debian.org/svn/pkg-tetex/ | 538 svn://svn.debian.org/svn/sumusu/ | 47 svn://svn.debian.org/svn/pkg-dutch/ | 165 svn://svn.debian.org/svn/pkg-modperl2/ | 2 svn://svn.debian.org/svn/net-ssleay/ | 465 svn://svn.debian.org/svn/pkg-nagios/ | 2150 svn://svn.debian.org/svn/bts-webui/ | 178 svn://svn.debian.org/svn/pkg-rrdtool/ | 49 svn://svn.debian.org/svn/debburn/ | 860 svn://svn.debian.org/svn/catdoc/ | 34 svn://svn.debian.org/svn/pkg-italian/ | 89 svn://svn.debian.org/svn/logrotate/ | 1 svn://svn.debian.org/svn/oval/ | 450 svn://svn.debian.org/svn/pkg-flock/ | 16 svn://svn.debian.org/svn/pkg-mixmaster/ | 1028 svn://svn.debian.org/svn/hwdb/ | 1 svn://svn.debian.org/svn/pkg-inetutils/ | 254 svn://svn.debian.org/svn/libacpi/ | 5 svn://svn.debian.org/svn/pkg-mesa/ | 38 svn://svn.debian.org/svn/pkg-dkms/ | 45 svn://svn.debian.org/svn/pkg-mysql/ | 2261 svn://svn.debian.org/svn/modvhostldap/ | 69 svn://svn.debian.org/svn/pancutan/ | 80 svn://svn.debian.org/svn/pkg-gd/ | 197 svn://svn.debian.org/svn/pkg-caudium/ | 28 svn://svn.debian.org/svn/pkg-jed/ | 1205 svn://svn.debian.org/svn/pkg-escience/ | 593 svn://svn.debian.org/svn/libapt-front/ | 1166 svn://svn.debian.org/svn/pkg-plt-scheme/ | 127 svn://svn.debian.org/svn/pkg-gmagick/ | 130 svn://svn.debian.org/svn/tts/ | 1 svn://svn.debian.org/svn/parted/ | 898 svn://svn.debian.org/svn/ddtp/ | 42 svn://svn.debian.org/svn/pkg-net-tools/ | 74 svn://svn.debian.org/svn/pkg-icewm/ | 331 svn://svn.debian.org/svn/pkg-nx/ | 75 svn://svn.debian.org/svn/pyroman/ | 99 svn://svn.debian.org/svn/pkg-swig/ | 417 svn://svn.debian.org/svn/pkg-postgresql/ | 837 svn://svn.debian.org/svn/php-apt-parser/ | 36 svn://svn.debian.org/svn/pkg-multidistrotools/ | 21 svn://svn.debian.org/svn/pkg-grass/ | 3148 svn://svn.debian.org/svn/pkg-polygen/ | 15 svn://svn.debian.org/svn/debpool/ | 80 svn://svn.debian.org/svn/pkg-python/ | 1 svn://svn.debian.org/svn/pkg-findutils/ | 434 svn://svn.debian.org/svn/deb-usability/ | 183 svn://svn.debian.org/svn/pkg-mailscanner/ | 29 svn://svn.debian.org/svn/edos/ | 7 svn://svn.debian.org/svn/vmware-package/ | 65 svn://svn.debian.org/svn/pkg-fetchmail/ | 565 svn://svn.debian.org/svn/imediff2/ | 22 svn://svn.debian.org/svn/dwn-trans/ | 48 svn://svn.debian.org/svn/pkg-pcmcia-cs/ | 137 svn://svn.debian.org/svn/pkg-fonty/ | 86 svn://svn.debian.org/svn/python-apps/ | 13161 svn://svn.debian.org/svn/nagios-aptwatch/ | 11 svn://svn.debian.org/svn/pkg-nethack/ | 128 svn://svn.debian.org/svn/pkg-xiph/ | 61 svn://svn.debian.org/svn/pkg-terp/ | 27 svn://svn.debian.org/svn/pkg-evolution/ | 2885 svn://svn.debian.org/svn/pkg-freebob/ | 489 svn://svn.debian.org/svn/pkg-glusterfs/ | 55 svn://svn.debian.org/svn/pkg-awstats/ | 131 svn://svn.debian.org/svn/pkg-gridengine/ | 91 svn://svn.debian.org/svn/pkg-geneweb/ | 33 svn://svn.debian.org/svn/women/ | 57 svn://svn.debian.org/svn/publicity/ | 6596 svn://svn.debian.org/svn/pkg-cracklib/ | 123 svn://svn.debian.org/svn/pkg-tikiwiki/ | 164 svn://svn.debian.org/svn/pkg-k3b/ | 346 svn://svn.debian.org/svn/pkg-zope/ | 3017 #+END_SRC * Machine worker01.internal.softwareheritage.org has been the * Comparison - 1 |------------+--------+------------------------------------------+-------------------------+---------------------------+-------------| | Type | # Revs | Url | git-svn (git svn clone) | swh-svn (load repository) | Ratio | |------------+--------+------------------------------------------+-------------------------+---------------------------+-------------| | small | 145 | svn://svn.debian.org/svn/pkg-fox/ | 447.074 | 1445.8084549359046s | 3.2339354 s | | medium | 6006 | svn://svn.debian.org/svn/glibc-bsd/ | 4740.928 | 10118.787201701081 | 2.1343474 | | large | 10707 | svn://svn.debian.org/svn/pkg-voip/ | 8592.398 | 46112.436119881924 | 5.3666551 | | very large | 34523 | svn://svn.debian.org/svn/python-modules/ | 36627.907 | 288410.36918153404s | 7.8740609 s | | very large | 48013 | svn://svn.debian.org/svn/pkg-gnome/ | 71902.080 | 594822.724728007 | 8.2726776 | |------------+--------+------------------------------------------+-------------------------+---------------------------+-------------| #+TBLFM: $6=$5/$4 Note: - Time is in seconds - We compare storing in our swh backend (swh-loader) and storing on disk (git-svn). The git-svn clones, when it finishes goes faster. But, it remains the swh loading in swh to do. Below are the details about the extracted times. Feel free to skip. ** Log extraction swh-loader-svn logs extracted from the period between April 15th 2016 and up to April 30th 2016. #+BEGIN_SRC sh psql -c "select level, message from log where src_host='worker01.softwareheritage.org' and ts between '2016-04-15 00:00:00.00+01' and '2016-04-30 00:00:00.00+01' ;" service=swh-log #+END_SRC *** DONE pkg-fox CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh - info | [2016-04-15 18:21:27,874: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[7d697a6b-ae8b-4718-b226-1406af717954] + info | [2016-04-15 18:21:27,874: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[7d697a6b-ae8b-4718-b226-1406af717954] debug | [2016-04-15 18:22:18,522: DEBUG/Worker-1] svn co svn://svn.debian.org/svn/pkg-fox/@1 info | [2016-04-15 18:22:19,193: INFO/Worker-1] [revision_start-revision_end]: [1-145] info | [2016-04-15 18:22:19,207: INFO/Worker-1] Repo {'remote_url': 'svn://svn.debian.org/svn/pkg-fox', 'local_url': '/tmp/tmp.wzzvlwuw.swh.loader/pkg-fox', 'uuid': 'd908f651-7add-0310-a5d1-c7ac9dfebe41', 'swh-origin': 4} ready to be processed. - info | [2016-04-15 18:45:33,703: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[7d697a6b-ae8b-4718-b226-1406af717954] succeeded in 1445.8084549359046s: None + info | [2016-04-15 18:45:33,703: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[7d697a6b-ae8b-4718-b226-1406af717954] succeeded in 1445.8084549359046s: None #+END_SRC *** DONE glibc-bsd CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC log - info | [2016-04-15 15:32:48,048: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[a41fba9b-f397-493a-a95f-deb673f91156] + info | [2016-04-15 15:32:48,048: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[a41fba9b-f397-493a-a95f-deb673f91156] info | [2016-04-15 15:32:59,607: INFO/Worker-1] [revision_start-revision_end]: [1-6006] info | [2016-04-15 15:32:59,620: INFO/Worker-1] Repo {'remote_url': 'svn://svn.debian.org/svn/glibc-bsd', 'local_url': '/tmp/tmp.bfeb_zdv.swh.loader/glibc-bsd', 'uuid': 'ae44cbe4-c7d5-0310-ae45-95c72a56cd7d', 'swh-origin': 3} ready to be processed. - info | [2016-04-15 18:21:27,855: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[a41fba9b-f397-493a-a95f-deb673f91156] succeeded in 10118.787201701081s: None + info | [2016-04-15 18:21:27,855: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[a41fba9b-f397-493a-a95f-deb673f91156] succeeded in 10118.787201701081s: None #+END_SRC *** DONE pkg-voip CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh - info | [2016-04-23 21:32:56,252: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[9a78bddb-227f-4f8a-b245-482a462e0000] + info | [2016-04-23 21:32:56,252: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[9a78bddb-227f-4f8a-b245-482a462e0000] debug | [2016-04-23 21:32:57,781: DEBUG/Worker-9] svn co svn://svn.debian.org/svn/pkg-voip/@1 - info | [2016-04-23 21:32:56,252: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[9a78bddb-227f-4f8a-b245-482a462e0000] + info | [2016-04-23 21:32:56,252: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[9a78bddb-227f-4f8a-b245-482a462e0000] info | [2016-04-23 21:32:58,221: INFO/Worker-9] Repo {'remote_url': 'svn://svn.debian.org/svn/pkg-voip', 'local_url': '/tmp/tmp.nwuhzku9.swh.loader/pkg-voip', 'uuid': '5e74be4b-f5d6-0310-a852-e9e23c5afa6a', 'swh-origin': 32} ready to be processed. info | [2016-04-23 21:32:58,186: INFO/Worker-9] [revision_start-revision_end]: [1-10707] - info | [2016-04-24 10:21:28,897: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[9a78bddb-227f-4f8a-b245-482a462e0000] succeeded in 46112.436119881924s: None + info | [2016-04-24 10:21:28,897: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[9a78bddb-227f-4f8a-b245-482a462e0000] succeeded in 46112.436119881924s: None #+END_SRC *** DONE python-modules CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh - info | [2016-04-28 17:35:59,087: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[776d23aa-e3c6-452d-95bd-7ae35409e9a5] + info | [2016-04-28 17:35:59,087: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[776d23aa-e3c6-452d-95bd-7ae35409e9a5] debug | [2016-04-28 17:36:00,036: DEBUG/Worker-27] svn co svn://svn.debian.org/svn/python-modules/@1 info | [2016-04-28 17:36:00,509: INFO/Worker-27] [revision_start-revision_end]: [1-34523] info | [2016-04-28 17:36:00,522: INFO/Worker-27] Repo {'remote_url': 'svn://svn.debian.org/svn/python-modules', 'local_url': '/tmp/tmp.7t45udhc.swh.loader/python-modules', 'uuid': '771dd761-d7fa-0310-a302-f036d1c1ebb6', 'swh-origin': 122} ready to be processed. - info | [2016-05-02 01:42:49,471: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[776d23aa-e3c6-452d-95bd-7ae35409e9a5] succeeded in 288410.36918153404s: None + info | [2016-05-02 01:42:49,471: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[776d23aa-e3c6-452d-95bd-7ae35409e9a5] succeeded in 288410.36918153404s: None #+END_SRC *** DONE pkg-gnome CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC log - info | [2016-04-16 20:02:34,346: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[b05b9228-7842-4cf1-9f8e-79edb462c262] + info | [2016-04-16 20:02:34,346: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[b05b9228-7842-4cf1-9f8e-79edb462c262] debug | [2016-04-16 20:02:35,262: DEBUG/Worker-7] svn co svn://svn.debian.org/svn/pkg-gnome/@1 info | [2016-04-16 20:02:35,625: INFO/Worker-7] [revision_start-revision_end]: [1-48013] info | [2016-04-16 20:02:35,629: INFO/Worker-48806] Archive gs://google-code-archive-source/v2/code.google.com/dennisbabiak-gccgcm/source-archive.zip fetched. info | [2016-04-16 20:02:35,641: INFO/Worker-7] Repo {'remote_url': 'svn://svn.debian.org/svn/pkg-gnome', 'local_url': '/tmp/tmp.uxt6n47f.swh.loader/pkg-gnome', 'uuid': 'db0db5de-e4c8-0310-9441-90abf70311f7', 'swh-origin': 27} ready to be processed. - info | [2016-04-23 17:16:17,149: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[b05b9228-7842-4cf1-9f8e-79edb462c262] succeeded in 594822.724728007s: None + info | [2016-04-23 17:16:17,149: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[b05b9228-7842-4cf1-9f8e-79edb462c262] succeeded in 594822.724728007s: None #+END_SRC ** git svn clone/fetch *** DONE pkg-fox CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-fox/ ... r144 = 755f744998acfb873b1b1557c788494dbac7f5b4 (refs/remotes/git-svn) M fox-1.6/trunk/debian/changelog M fox-1.6/trunk/debian/rules r145 = 1170cd05a054f814ebbd239090f0f3fd927f0fa7 (refs/remotes/git-svn) Checking out files: 100% (47319/47319), done. Checked out HEAD: svn://svn.debian.org/svn/pkg-fox r145 creating empty directory: fox-1.0/debian/fox_1.0.52-1/windows/vcpp/window/CVS creating empty directory: fox-1.0/debian/fox_1.0.52-1/windows/watcom/CVS creating empty directory: fox-1.0/debian/fox_1.0.52-2/windows/vcpp/window/CVS creating empty directory: fox-1.0/debian/fox_1.0.52-2/windows/watcom/CVS creating empty directory: fox-1.0/vendor/fox-1.0.52/windows/vcpp/window/CVS creating empty directory: fox-1.0/vendor/fox-1.0.52/windows/watcom/CVS real 7m27.074s user 1m15.752s sys 0m54.740s #+END_SRC *** DONE glibc-bsd CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/glibc-bsd/ ... r6050 = c1d2473b17d04529c13f9ed87ecddf617e0f655c (refs/remotes/git-svn) Checking out files: 100% (7805/7805), done. Checked out HEAD: svn://svn.debian.org/svn/glibc-bsd r6050 creating empty directory: trunk/glibc-ports-2.23/kfreebsd/i386/i486/fbtl creating empty directory: trunk/glibc-ports-2.23/kfreebsd/i386/i586/fbtl creating empty directory: trunk/glibc-ports/kfreebsd/i386/i486/fbtl creating empty directory: trunk/glibc-ports/kfreebsd/i386/i586/fbtl real 79m0.928s user 5m12.044s sys 11m28.424s #+END_SRC *** DONE pkg-voip CLOSED: [2016-05-12 Thu 14:54] #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-voip/ ... r10706 = 4ddf0d53596f9246c8329742f8853d88863d9196 (refs/remotes/git-svn) M sip-tester/trunk/debian/compat M sip-tester/trunk/debian/changelog r10707 = 19ebf4dfc63b657f8a93f697552c72925afa75f9 (refs/remotes/git-svn) Auto packing the repository in background for optimum performance. See "git help gc" for manual housekeeping. Checking out files: 100% (40381/40381), done. Checked out HEAD: svn://svn.debian.org/svn/pkg-voip r10707 creating empty directory: ATTIC/ari/branches creating empty directory: ATTIC/asterisk-sounds-extra/branches/upstream/1.4.7 creating empty directory: ATTIC/asterisk-sounds-extra/branches/upstream/current ... creating empty directory: yate/tags/2.1.0-1~dfsg-1/debian/patches creating empty directory: yxa/branches/upstream/0.91 creating empty directory: yxa/branches/upstream/current (+ (* 60 143) 12.398) real 143m12.398s user 22m0.212s sys 29m10.096s ardumont@worker01:~/test$ #+END_SRC *** DONE python-modules CLOSED: [2016-05-12 Thu 20:53] #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/python-modules/ Initialized empty Git repository in /home/ardumont/test/python-modules/.git/ A gnupginterface/debian/changelog A gnupginterface/debian/copyright A gnupginterface/debian/docs A gnupginterface/debian/rules A gnupginterface/debian/doc-base A gnupginterface/debian/control A gnupginterface/debian/compat r1 = febf6b2dc1469d10201e231a3b08805429abdb24 (refs/remotes/git-svn) A gnupginterface/debian/control.in M gnupginterface/debian/changelog M gnupginterface/debian/rules A metainfo/python-modules.team r2 = 3205aceb3431ebb08fbefef91e04a9c10d85a5b7 (refs/remotes/git-svn) M gnupginterface/debian/control.in M gnupginterface/debian/changelog M gnupginterface/debian/rules A gnupginterface/debian/uploaders M gnupginterface/debian/control r3 = 26ae7cf95826f7bd6c430a9a311a6586df80bdba (refs/remotes/git-svn) r4 = 1336cf42fb7fd305884fb81238a68a1627b5e635 (refs/remotes/git-svn) W: +empty_dir: www r5 = 7ef355783ad3dc02d74b0e133e8666597f33e231 (refs/remotes/git-svn) A www/python-modules-policy.rst r6 = a0c4721f5bdaf60fabd548503029c842aa8039fd (refs/remotes/git-svn) M www/python-modules-policy.rst r7 = 2e23d5f2e8e75f6c1050fe69b978d6d4b9405577 (refs/remotes/git-svn) M www/python-modules-policy.rst r8 = a64c844ef002876929e63c08cafe9c0d7913dbd5 (refs/remotes/git-svn) ... r34523 = d7ad2aa9f03614e98131a45d12ca85f0db6200b0 (refs/remotes/git-svn) Checking out files: 100% (96660/96660), done. Checked out HEAD: svn://svn.debian.org/svn/python-modules r34523 creating empty directory: packages/adhocracy/tags creating empty directory: packages/aiopg/tags creating empty directory: packages/alembic/branches/upstream/0.3.2 ... creating empty directory: packages/wheel/tags/0.26.0-1/debian/manpages/_static creating empty directory: packages/wheel/tags/0.26.0-1/debian/manpages/_templates creating empty directory: packages/wheel/trunk/debian/manpages/_static creating empty directory: packages/wheel/trunk/debian/manpages/_templates real 610m27.907s user 140m19.436s sys 147m44.760s #+END_SRC *** DONE pkg-gnome CLOSED: [2016-05-13 Fri 17:17] `git svn clone` does not finish for this repository. And no retake on error is possible, we need to clean up and restart from scratch. **** 1st tryout Failure: #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-gnome/ Initialized empty Git repository in /home/ardumont/test/pkg-gnome/.git/ r1 = 15e91b0046a42767752f7cbf0614d27721e1af12 (refs/remotes/git-svn) W: +empty_dir: packages r2 = 17064cc88bbf5bf9c85a10c44d57846c694158a1 (refs/remotes/git-svn) W: +empty_dir: packages/file-roller r3 = dec3b8e994b3d8fa4d409c7195bcbd4706b6cdfa (refs/remotes/git-svn) A packages/gnome-pkg-tools/team_members A packages/gnome-pkg-tools/debian/changelog A packages/gnome-pkg-tools/debian/gnome-pkg-tools.install A packages/gnome-pkg-tools/debian/copyright A packages/gnome-pkg-tools/debian/rules A packages/gnome-pkg-tools/debian/control A packages/gnome-pkg-tools/debian/compat ... r16689 = 39f2a34508fd3605408875d29d8b73baa1ac4bdd (refs/remotes/git-svn) M desktop/unstable/vte/debian/rules M desktop/unstable/vte/debian/changelog D desktop/unstable/vte/debian/patches/25_enable_static_ncurses.patch A desktop/unstable/vte/debian/patches/25_optional-ncurses.patch M desktop/unstable/vte/debian/patches/90_autoreconf.patch Connection reset by peer: Can't read from connection: Connection reset by peer at /usr/share/perl5/Git/SVN/Ra.pm line 300. real 265m2.537s user 25m9.600s sys 43m18.704s #+END_SRC **** 2nd tryout Failure: #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-gnome/ ... r7983 = 5db47427ea4135418ac008d5acfcbaa5a2ab41a6 (refs/remotes/git-svn) Connection reset by peer: Can't read from connection: Connection reset by peer at /usr/share/perl5/Git/SVN.pm line 1210. . real 131m37.338s user 6m57.104s sys 14m6.808s ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-gnome #+END_SRC **** 3rd tryout 3rd is the charm... or not! FAILURE: #+BEGIN_SRC sh ardumont@worker01:~/test$ time git svn clone svn://svn.debian.org/svn/pkg-gnome/ ... r13093 = 9a4c30f1242671a73ad5618d95671e0f2a569965 (refs/remotes/git-svn) M desktop/unstable/gtkmm2.4/debian/watch M desktop/unstable/gtkmm2.4/debian/copyright r13094 = f791964e6a1789d79b5db809066ab5c6a944f151 (refs/remotes/git-svn) M packages/experimental/rhythmbox/debian/changelog r13095 = 0f04c0efe06865fec3c9b825c0300f9620eee342 (refs/remotes/git-svn) Connection reset by peer: Can't read from connection: Connection reset by peer at /usr/share/perl5/Git/SVN.pm line 1210. real 180m39.494s user 15m0.696s sys 28m22.148s ardumont@worker01:~/test$ #+END_SRC Resuming the clone, it progresses and fails: #+BEGIN_SRC sh ardumont@worker01:~/test/pkg-gnome$ time git svn fetch ... r36696 = f50d47c1f4503e94d5a14120b5ae28ddc89d5459 (refs/remotes/git-svn) M desktop/experimental/gnome-mahjongg/debian/control M desktop/experimental/gnome-mahjongg/debian/control.in M desktop/experimental/five-or-more/debian/control.in M desktop/experimental/five-or-more/debian/control M desktop/experimental/gnome-mines/debian/control M desktop/experimental/gnome-mines/debian/control.in M desktop/experimental/swell-foop/debian/control M desktop/experimental/swell-foop/debian/control.in M desktop/experimental/four-in-a-row/debian/control M desktop/experimental/four-in-a-row/debian/control.in M desktop/experimental/gnome-chess/debian/control M desktop/experimental/gnome-chess/debian/control.in Checksum mismatch: desktop/experimental/quadrapassel/debian/control 5f307bd77cf88802bd8bf29f53bf79c56c025bd0 expected: d0a3e82c6cf89884b7a437d95698e944 got: 1a88372a29c589d72350b615235eabde real 802m28.076s user 149m40.680s sys 130m10.140s #+END_SRC And now we are stuck: #+BEGIN_SRC sh ardumont@worker01:~/test/pkg-gnome$ time git svn fetch Index mismatch: 7e42143b744220d2fb9bc02e3b95b52240227a13 != e038931478d9739bb149296393a50df7a9114f86 rereading f50d47c1f4503e94d5a14120b5ae28ddc89d5459 M desktop/experimental/lightsoff/debian/control M desktop/experimental/lightsoff/debian/control.in M desktop/experimental/swell-foop/debian/control M desktop/experimental/swell-foop/debian/control.in M desktop/experimental/tali/debian/control.in M desktop/experimental/tali/debian/control M desktop/experimental/gnome-sudoku/debian/control.in M desktop/experimental/gnome-sudoku/debian/control M desktop/experimental/four-in-a-row/debian/control M desktop/experimental/four-in-a-row/debian/control.in M desktop/experimental/five-or-more/debian/control M desktop/experimental/five-or-more/debian/control.in M desktop/experimental/gnome-mines/debian/control M desktop/experimental/gnome-mines/debian/control.in Checksum mismatch: desktop/experimental/quadrapassel/debian/control.in f22732d39f98416ae930781a2011261fb40e1f4b expected: 43a5a26795d42c25ebdaa90c3673ed87 got: c527c56f66f2e8d9383d57f3938bf135 real 0m33.228s user 0m1.992s sys 0m0.892s #+END_SRC **** 4th tryout Trying another way, first clone at the first revision and then fetch the rest: #+BEGIN_SRC sh ardumont@worker01:~/test $ time git svn clone svn://svn.debian.org/svn/pkg-gnome -r1 Initialized empty Git repository in /home/ardumont/pkg-gnome/.git/ W: +empty_dir: tools r1 = 15e91b0046a42767752f7cbf0614d27721e1af12 (refs/remotes/git-svn) Checked out HEAD: svn://svn.debian.org/svn/pkg-gnome r1 creating empty directory: tools ardumont@worker01:~/test $ cd pkg-gnome ardumont@worker01:~/test/pkg-gnome$ time git svn fetch W: +empty_dir: packages r2 = 17064cc88bbf5bf9c85a10c44d57846c694158a1 (refs/remotes/git-svn) W: +empty_dir: packages/file-roller r3 = dec3b8e994b3d8fa4d409c7195bcbd4706b6cdfa (refs/remotes/git-svn) ... r48675 = ad0a56ae71a479b1502ec6e99d2c5c90176718fa (refs/remotes/git-svn) M desktop/unstable/gnome-klotski/debian/changelog r48676 = 494ad851898051b2e6d86e3de0b428094ed37d3d (refs/remotes/git-svn) real 1198m22.080s user 314m23.704s sys 327m49.552s #+END_SRC * Comparison - 2 Improving the comparison between swh-svn and git-svn. This take is about converging the swn-svn to match the one from git-svn. To compare what's comparable. ** behavior divergences and their status Here is the divergences sum up and the action undertook in swh-svn to converge. |--------------------------+--------------------------------------------------+-------------------------------------------------------+-----------------------------------------------------------------------| | Divergence nature | git-svn | swh-svn | Action (on swh-loader-svn) | |--------------------------+--------------------------------------------------+-------------------------------------------------------+-----------------------------------------------------------------------| | svn commit author | author@ | Take the svn author as is | swh-svn now uses the same format as git-svn | | svn commit date | As int | Take the svn commit date as is | swh-svn now uses the same format as git-svn | | empty folder | Not checked out | Svn API checkouts empty folder | Option added in swh-svn to remove those empty folders as extra step | | svn commit message | Add extra line in commit message | Take the commit message as is | Option added to add that extra line to revision message | | svn update | `git svn fetch` | If repository know, update from last know commit | Option added to inhibit this update | | extra metadata in commit | By default add some at the end of the svn commit | Add extra-headers in a git compliant way (for update) | Option added to inhibit those extra headers (inhibit the update too) | | storage | Stores on disk | Store in swh-storage | Option activated to avoid sending data to swh-storage | |--------------------------+--------------------------------------------------+-------------------------------------------------------+-----------------------------------------------------------------------| ** Comparison Here is the comparison with the following options: #+BEGIN_SRC ini # inhibit the swh-storage part send_contents = False send_directories = False send_revisions = False send_releases = False send_occurrences = False with_svn_update = False with_revision_headers = False with_empty_folder = False with_extra_commit_line = True #+END_SRC |------------+--------+------------------------------------------+-------------------------+-------------------------------+------------| | Type | # Revs | Url | git-svn (git svn clone) | swh-svn | Ratio | |------------+--------+------------------------------------------+-------------------------+-------------------------------+------------| | small | 145 | svn://svn.debian.org/svn/pkg-fox/ | 447.074 | 547.6864733919501 | 1.2250466 | | medium | 6006 | svn://svn.debian.org/svn/glibc-bsd/ | 4740.928 | 4046.70439547766 | 0.85356799 | | large | 10707 | svn://svn.debian.org/svn/pkg-voip/ | 8592.398 | 28698.161668108776 | 3.3399479 | | very large | 34523 | svn://svn.debian.org/svn/python-modules/ | 36627.907 | Lost result (something awful) | 0. | | very large | 48013 | svn://svn.debian.org/svn/pkg-gnome/ | 71902.080 | Lost result (something awful) | 0. | |------------+--------+------------------------------------------+-------------------------+-------------------------------+------------| #+TBLFM: $6=$5/$4 * Comparison - 3 The second iteration was about finding out whether: - the loader was fast (it was not) - the loader did its computation right (it did) To improve the speed, we investigate further how git-svn did its job. It uses a Remote Access server approach. Meaning, it discusses directly with the svn server. Using the same approach as git-svn (using subvertpy instead of pysvn), we were able to adapt the code accordingly. So now, the loader speaks to the server and computes hashes alongside. Here is the comparison (using the same options as comparison 2 to have the same hashes as git-svn for trees and commits): ** Comparison - no swh-storage No swh-storage is used here. Only write on disk and hash computations, then write result on logs (for swh-loader) |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+-------------| | Type | # Revs | Url | git-svn (git svn clone) | # Revs updated | swh-svn | Ratio | |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+-------------| | small | 145 | svn://svn.debian.org/svn/pkg-fox/ | 447.074 | 145 | 66.56195999495685 | 0.14888354 | | medium | 6006 | svn://svn.debian.org/svn/glibc-bsd/ | 4740.928 | 6073 | 338.06703379005194 | 0.071308198 | | large | 10707 | svn://svn.debian.org/svn/pkg-voip/ | 8592.398 | 10707 | 536.1971881072968 | 0.062403672 | | very large | 34523 | svn://svn.debian.org/svn/python-modules/ | 36627.907 | 34523 | 3310.805134777911 | 0.090390235 | | very large | 48013 | svn://svn.debian.org/svn/pkg-gnome/ | 71902.080 | 49061 | 3519.9480575090274 | 0.048954746 | |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+-------------| #+TBLFM: $7=$6/$4 *** Log extract From where the swh-svn column is extracted. #+BEGIN_SRC txt Jun 11 20:13:30 worker01 python3[27823]: [tasks] Jun 11 20:13:30 worker01 python3[27823]: . swh.loader.core.tasks.LoaderCoreTask -Jun 11 20:13:30 worker01 python3[27823]: . swh.loader.svn.tasks.LoadSvnRepositoryTsk +Jun 11 20:13:30 worker01 python3[27823]: . swh.loader.svn.tasks.LoadSvnRepository Jun 11 20:13:30 worker01 python3[27823]: [2016-06-11 20:13:30,213: INFO/MainProcess] Connected to amqp://swhconsumer:**@moma:5672// Jun 11 20:13:30 worker01 python3[27823]: [2016-06-11 20:13:30,259: INFO/MainProcess] mingle: searching for neighbors Jun 11 20:13:31 worker01 python3[27823]: [2016-06-11 20:13:31,369: INFO/MainProcess] mingle: sync with 7 nodes Jun 11 20:13:31 worker01 python3[27823]: [2016-06-11 20:13:31,387: INFO/MainProcess] mingle: sync complete Jun 11 20:13:32 worker01 python3[27823]: [2016-06-11 20:13:32,360: INFO/MainProcess] Events of group {task} enabled by remote. Jun 11 20:15:04 worker01 python3[27823]: [2016-06-11 20:15:04,997: INFO/MainProcess] Started consuming from swh_loader_svn Jun 11 20:15:06 worker01 python3[27823]: [2016-06-11 20:15:06,204: INFO/MainProcess] Cancelling queue swh_loader_svn Jun 11 20:15:20 worker01 python3[27823]: [2016-06-11 20:15:20,724: INFO/MainProcess] Started consuming from swh_loader_svn -Jun 11 20:17:51 worker01 python3[27823]: [2016-06-11 20:17:51,487: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[7fab7834-0c41-4634-89f3-1af35502461a] +Jun 11 20:17:51 worker01 python3[27823]: [2016-06-11 20:17:51,487: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[7fab7834-0c41-4634-89f3-1af35502461a] Jun 11 20:17:52 worker01 python3[27823]: [2016-06-11 20:17:52,362: INFO/Worker-10] [revision_start-revision_end]: [1-145] Jun 11 20:17:52 worker01 python3[27823]: [2016-06-11 20:17:52,386: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-fox', 'uuid': b'd908f651-7add-0310-a5d1-c7ac9dfebe41', 'local_url': b'/tmp/tmp.bm6rebqz.swh.loader/p kg-fox', 'swh-origin': 4}. Jun 11 20:18:55 worker01 python3[27823]: [2016-06-11 20:18:55,307: INFO/Worker-10] Processed 145 revisions: [1dda85506a12af80c5a701a02aba5a02c703642f, ...] -Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,078: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[7fab7834-0c41-4634-89f3-1af35502461a] succeeded in 66.56195999495685s: None -Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,106: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[ace251b8-7255-4e63-90b5-1a56655755e8] +Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,078: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[7fab7834-0c41-4634-89f3-1af35502461a] succeeded in 66.56195999495685s: None +Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,106: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[ace251b8-7255-4e63-90b5-1a56655755e8] Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,437: INFO/Worker-10] [revision_start-revision_end]: [1-6073] Jun 11 20:18:58 worker01 python3[27823]: [2016-06-11 20:18:58,453: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/glibc-bsd', 'uuid': b'ae44cbe4-c7d5-0310-ae45-95c72a56cd7d', 'local_url': b'/tmp/tmp.d_iw28du.swh.loader/glibc-bsd', 'swh-origin': 3}. Jun 11 20:20:07 worker01 python3[27823]: [2016-06-11 20:20:07,371: INFO/Worker-10] Processed 1000 revisions: [be6fe97464c0fedd9959073d07b2fda4cbedbe2d, ...] Jun 11 20:20:38 worker01 python3[27823]: [2016-06-11 20:20:38,419: INFO/Worker-10] Processed 1000 revisions: [344505eb753471c1085e2afeef646fe4e5d49df4, ...] Jun 11 20:21:06 worker01 python3[27823]: [2016-06-11 20:21:06,596: INFO/Worker-10] Processed 1000 revisions: [7638d6e04f082876bdd248f13f5e62def43f411d, ...] Jun 11 20:22:06 worker01 python3[27823]: [2016-06-11 20:22:06,324: INFO/Worker-10] Processed 1000 revisions: [cac5d6ccdd3aab6e29bc3def9df35b1fbbf9bd4f, ...] Jun 11 20:23:17 worker01 python3[27823]: [2016-06-11 20:23:17,551: INFO/Worker-10] Processed 1000 revisions: [b2c7a10f2127dd496048133480df041b0ab66865, ...] Jun 11 20:24:29 worker01 python3[27823]: [2016-06-11 20:24:29,274: INFO/Worker-10] Processed 1000 revisions: [382341a00301f36ceec9fca563aef85cb628b323, ...] Jun 11 20:24:35 worker01 python3[27823]: [2016-06-11 20:24:35,546: INFO/Worker-10] Processed 73 revisions: [512a9f720bd1af1581b09483846035cf292c52cd, ...] -Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,205: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[ace251b8-7255-4e63-90b5-1a56655755e8] succeeded in 338.06703379005194s: None -Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,240: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[275183f6-ef9e-4533-aa72-322f080b76e1] +Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,205: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[ace251b8-7255-4e63-90b5-1a56655755e8] succeeded in 338.06703379005194s: None +Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,240: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[275183f6-ef9e-4533-aa72-322f080b76e1] Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,584: INFO/Worker-10] [revision_start-revision_end]: [1-10707] Jun 11 20:24:36 worker01 python3[27823]: [2016-06-11 20:24:36,600: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-voip', 'uuid': b'5e74be4b-f5d6-0310-a852-e9e23c5afa6a', 'local_url': b'/tmp/tmp.0y0ny007.swh.loader/pkg-voip', 'swh-origin': 32}. Jun 11 20:25:34 worker01 python3[27823]: [2016-06-11 20:25:34,718: INFO/Worker-10] Processed 1000 revisions: [a1fff6a0e5e397f634d0ea5c1600bc723d019e4c, ...] Jun 11 20:26:11 worker01 python3[27823]: [2016-06-11 20:26:11,493: INFO/Worker-10] Processed 1000 revisions: [510f95e1239eaea1170816b080cd8d7d76ad4b5b, ...] Jun 11 20:26:45 worker01 python3[27823]: [2016-06-11 20:26:45,145: INFO/Worker-10] Processed 1000 revisions: [17a376896e1f2084bfd85dc5052e14280bbcb63a, ...] Jun 11 20:27:20 worker01 python3[27823]: [2016-06-11 20:27:20,603: INFO/Worker-10] Processed 1000 revisions: [5f9d3835c69ec09fd082553d425d9e26fc006289, ...] Jun 11 20:27:52 worker01 python3[27823]: [2016-06-11 20:27:52,940: INFO/Worker-10] Processed 1000 revisions: [89551acb946906862441ef05368b99c5ce4d2144, ...] Jun 11 20:28:27 worker01 python3[27823]: [2016-06-11 20:28:27,370: INFO/Worker-10] Processed 1000 revisions: [97eeaf9eae6c155631b15514b5b600e367c2dc4b, ...] Jun 11 20:29:02 worker01 python3[27823]: [2016-06-11 20:29:02,164: INFO/Worker-10] Processed 1000 revisions: [c0f1bce218db099425e15aa9f5521a5badb4c6c2, ...] Jun 11 20:29:35 worker01 python3[27823]: [2016-06-11 20:29:35,064: INFO/Worker-10] Processed 1000 revisions: [b3dcf7365c6afd9280565abfec4204b26c4e13dc, ...] Jun 11 20:30:18 worker01 python3[27823]: [2016-06-11 20:30:18,890: INFO/Worker-10] Processed 1000 revisions: [943042e9d1113406483a2a5b6f39023935c0532a, ...] Jun 11 20:32:20 worker01 python3[27823]: [2016-06-11 20:32:20,890: INFO/Worker-10] Processed 1000 revisions: [81405fca1c7f928fcc6a2b137546b4f94f22551e, ...] Jun 11 20:33:29 worker01 python3[27823]: [2016-06-11 20:33:29,927: INFO/Worker-10] Processed 707 revisions: [5419a4e4da9d9e37df35c1c9455024fe8170d2fa, ...] -Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,487: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[275183f6-ef9e-4533-aa72-322f080b76e1] succeeded in 536.1971881072968s: None -Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,522: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[8677e0f5-4938-4146-b164-720ec7294cb4] +Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,487: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[275183f6-ef9e-4533-aa72-322f080b76e1] succeeded in 536.1971881072968s: None +Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,522: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[8677e0f5-4938-4146-b164-720ec7294cb4] Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,816: INFO/Worker-10] [revision_start-revision_end]: [1-34523] Jun 11 20:33:32 worker01 python3[27823]: [2016-06-11 20:33:32,874: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/python-modules', 'uuid': b'771dd761-d7fa-0310-a302-f036d1c1ebb6', 'local_url': b'/tmp/tmp.jokuejpx.swh.loader/python-modules', 'swh-origin': 122}. Jun 11 20:34:43 worker01 python3[27823]: [2016-06-11 20:34:43,590: INFO/Worker-10] Processed 1000 revisions: [4ca7de178b8d929a6dfc12e113b3072730eeb4c3, ...] Jun 11 20:35:35 worker01 python3[27823]: [2016-06-11 20:35:35,455: INFO/Worker-10] Processed 1000 revisions: [45be8a9d34b4b57595a1e3c0cec40b378a05e43f, ...] Jun 11 20:36:12 worker01 python3[27823]: [2016-06-11 20:36:12,743: INFO/Worker-10] Processed 1000 revisions: [62d51095353cb4bac1b840a917e9cda7e809b6ea, ...] Jun 11 20:36:46 worker01 python3[27823]: [2016-06-11 20:36:46,526: INFO/Worker-10] Processed 1000 revisions: [0b88046aa11871c3d22a5190a62c200d42ab3da0, ...] Jun 11 20:37:19 worker01 python3[27823]: [2016-06-11 20:37:19,382: INFO/Worker-10] Processed 1000 revisions: [1791f1e4cdf4eef44aa6f1dcdb76378a37f5de38, ...] Jun 11 20:37:54 worker01 python3[27823]: [2016-06-11 20:37:54,069: INFO/Worker-10] Processed 1000 revisions: [60f91c20e1ac862c6d1fd4abf2890cebcd4ef2e6, ...] Jun 11 20:38:32 worker01 python3[27823]: [2016-06-11 20:38:32,090: INFO/Worker-10] Processed 1000 revisions: [51764d25e62e2318e92310cea8d123773d6a9054, ...] Jun 11 20:39:08 worker01 python3[27823]: [2016-06-11 20:39:08,667: INFO/Worker-10] Processed 1000 revisions: [8720f9952080e743c8432ef7d27cf35f61da88d6, ...] Jun 11 20:39:45 worker01 python3[27823]: [2016-06-11 20:39:45,419: INFO/Worker-10] Processed 1000 revisions: [837dc122f04058ede9351b4940579ba8fba4fda1, ...] Jun 11 20:40:33 worker01 python3[27823]: [2016-06-11 20:40:33,718: INFO/Worker-10] Processed 1000 revisions: [e33a666eb6f9a50e35f62e2c4187a0caf8055aad, ...] Jun 11 20:41:14 worker01 python3[27823]: [2016-06-11 20:41:14,348: INFO/Worker-10] Processed 1000 revisions: [2b16e069f75ebb8550d8b30c02e4dcaca0a98dae, ...] Jun 11 20:41:55 worker01 python3[27823]: [2016-06-11 20:41:55,502: INFO/Worker-10] Processed 1000 revisions: [fcbc70f1574e30e467d2aa49050f26a6e1e67b5e, ...] Jun 11 20:42:39 worker01 python3[27823]: [2016-06-11 20:42:39,390: INFO/Worker-10] Processed 1000 revisions: [10fc7480388c84bc7742e057b04f72c9e7696233, ...] Jun 11 20:43:25 worker01 python3[27823]: [2016-06-11 20:43:25,565: INFO/Worker-10] Processed 1000 revisions: [dabdb737d3f80c8c85ab6219b52429325750f7d7, ...] Jun 11 20:44:12 worker01 python3[27823]: [2016-06-11 20:44:12,970: INFO/Worker-10] Processed 1000 revisions: [5ec3d4290c8fa3f0cb3725c9412895633f48dc47, ...] Jun 11 20:45:07 worker01 python3[27823]: [2016-06-11 20:45:07,443: INFO/Worker-10] Processed 1000 revisions: [32aa0ad290ad039d8da3b1223d34d31f6ee93a69, ...] Jun 11 20:45:56 worker01 python3[27823]: [2016-06-11 20:45:56,115: INFO/Worker-10] Processed 1000 revisions: [6abae57242a050042f6980971911a9dcd7429f65, ...] Jun 11 20:47:32 worker01 python3[27823]: [2016-06-11 20:47:32,574: INFO/Worker-10] Processed 1000 revisions: [4bcf91f45c78113b626c914acdf81f73d11191f1, ...] Jun 11 20:49:10 worker01 python3[27823]: [2016-06-11 20:49:10,514: INFO/Worker-10] Processed 1000 revisions: [18f5add4158abdb8bf1e408e6758adf7e4a59eef, ...] Jun 11 20:51:08 worker01 python3[27823]: [2016-06-11 20:51:08,684: INFO/Worker-10] Processed 1000 revisions: [99ebabd86ec48c52965c89c41fe443f2ee49f6a4, ...] Jun 11 20:53:05 worker01 python3[27823]: [2016-06-11 20:53:05,376: INFO/Worker-10] Processed 1000 revisions: [d6f7410e1295db6dbb137d94a27a26e3e7d636c0, ...] Jun 11 20:55:15 worker01 python3[27823]: [2016-06-11 20:55:15,605: INFO/Worker-10] Processed 1000 revisions: [a4907fb6dadae5b431b66c33b1bf043eb52992e2, ...] Jun 11 20:59:49 worker01 python3[27823]: [2016-06-11 20:59:49,794: INFO/Worker-10] Processed 1000 revisions: [ebaca99c10f1ea83e983d500c760ea31acba0762, ...] Jun 11 21:03:01 worker01 python3[27823]: [2016-06-11 21:03:01,831: INFO/Worker-10] Processed 1000 revisions: [b7e32f4db2957388e5d2a7d89749a1ceb4dbcf57, ...] Jun 11 21:05:41 worker01 python3[27823]: [2016-06-11 21:05:41,617: INFO/Worker-10] Processed 1000 revisions: [8736c0ee49a93fbb328aabfa4d21cd5a936a02b1, ...] Jun 11 21:07:53 worker01 python3[27823]: [2016-06-11 21:07:53,901: INFO/Worker-10] Processed 1000 revisions: [22a27c3aab8d00bd5a63bee6287ae46aae6b9fcb, ...] Jun 11 21:10:05 worker01 python3[27823]: [2016-06-11 21:10:05,816: INFO/Worker-10] Processed 1000 revisions: [8575eeea98818729b6319b68078872b5a72bdf14, ...] Jun 11 21:12:31 worker01 python3[27823]: [2016-06-11 21:12:31,081: INFO/Worker-10] Processed 1000 revisions: [badeba4badf9ebcf209da0123dd87a074863efc9, ...] Jun 11 21:14:55 worker01 python3[27823]: [2016-06-11 21:14:55,572: INFO/Worker-10] Processed 1000 revisions: [b7e011f38d59bd95ca9752f5e9d4841f14a1f46b, ...] Jun 11 21:17:16 worker01 python3[27823]: [2016-06-11 21:17:16,501: INFO/Worker-10] Processed 1000 revisions: [fc8dbd462c4e698b2650effbf33903de2f9b49b2, ...] Jun 11 21:19:18 worker01 python3[27823]: [2016-06-11 21:19:18,904: INFO/Worker-10] Processed 1000 revisions: [7d70379982d340ebff879a3f79314568ea3873eb, ...] Jun 11 21:22:35 worker01 python3[27823]: [2016-06-11 21:22:35,067: INFO/Worker-10] Processed 1000 revisions: [7b81c934b441335e06977f2dcd797f16337c49f2, ...] Jun 11 21:24:43 worker01 python3[27823]: [2016-06-11 21:24:43,586: INFO/Worker-10] Processed 1000 revisions: [8de5385844c1a8de253ad8945dc06259d11c7fc8, ...] Jun 11 21:27:21 worker01 python3[27823]: [2016-06-11 21:27:21,950: INFO/Worker-10] Processed 1000 revisions: [c0b63db3767754092c8c1fe92b07e66e77b7fed3, ...] Jun 11 21:28:34 worker01 python3[27823]: [2016-06-11 21:28:34,131: INFO/Worker-10] Processed 523 revisions: [ba52b091af3078562f7e7fc05c04b9469988e006, ...] -Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,356: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[8677e0f5-4938-4146-b164-720ec7294cb4] succeeded in 3310.805134777911s: None -Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,373: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[b3f34747-3575-4160-8543-6ddf23b8822e] +Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,356: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[8677e0f5-4938-4146-b164-720ec7294cb4] succeeded in 3310.805134777911s: None +Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,373: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[b3f34747-3575-4160-8543-6ddf23b8822e] Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,674: INFO/Worker-10] [revision_start-revision_end]: [1-49061] Jun 11 21:28:43 worker01 python3[27823]: [2016-06-11 21:28:43,688: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-gnome', 'uuid': b'db0db5de-e4c8-0310-9441-90abf70311f7', 'local_url': b'/tmp/tmp.kgkdf865.swh.loader/pkg-gnome', 'swh-origin': 27}. Jun 11 21:29:23 worker01 python3[27823]: [2016-06-11 21:29:23,231: INFO/Worker-10] Processed 1000 revisions: [29a3157f1d4a82955860a6fd3397bbd94573b555, ...] Jun 11 21:29:55 worker01 python3[27823]: [2016-06-11 21:29:55,671: INFO/Worker-10] Processed 1000 revisions: [bbada23d3943294e2efe3d6a1c978498e013d957, ...] Jun 11 21:30:35 worker01 python3[27823]: [2016-06-11 21:30:35,913: INFO/Worker-10] Processed 1000 revisions: [8a257ee9a72d9ec7f3a25bc5b4ab886f06b7d9c4, ...] Jun 11 21:31:11 worker01 python3[27823]: [2016-06-11 21:31:11,428: INFO/Worker-10] Processed 1000 revisions: [f179272fc81ae3f16bbf6d8a12709e9a7109b347, ...] Jun 11 21:31:43 worker01 python3[27823]: [2016-06-11 21:31:43,689: INFO/Worker-10] Processed 1000 revisions: [d74e2545a86fb065e60f3ef6e1fa5a975ab6a793, ...] Jun 11 21:32:21 worker01 python3[27823]: [2016-06-11 21:32:21,407: INFO/Worker-10] Processed 1000 revisions: [2a1dab8d134ef32f914da2ccc7c14819777f1e8f, ...] Jun 11 21:32:53 worker01 python3[27823]: [2016-06-11 21:32:53,714: INFO/Worker-10] Processed 1000 revisions: [544884d67e2a6c138a09ddf6b8f3f3f9c835610e, ...] Jun 11 21:33:26 worker01 python3[27823]: [2016-06-11 21:33:26,103: INFO/Worker-10] Processed 1000 revisions: [17c8cc0eb3be0d69176af005422f4a8aea22b037, ...] Jun 11 21:33:57 worker01 python3[27823]: [2016-06-11 21:33:57,754: INFO/Worker-10] Processed 1000 revisions: [cd83dc86dc388920ddfef5c3b393eeb754b15595, ...] Jun 11 21:34:47 worker01 python3[27823]: [2016-06-11 21:34:47,984: INFO/Worker-10] Processed 1000 revisions: [ed5ad1e6879a9a56c1fb89fec0e9ba80943facd8, ...] Jun 11 21:35:35 worker01 python3[27823]: [2016-06-11 21:35:35,152: INFO/Worker-10] Processed 1000 revisions: [ea995f84bc6eef36f890243f0c6ea4bdf2796452, ...] Jun 11 21:36:17 worker01 python3[27823]: [2016-06-11 21:36:17,834: INFO/Worker-10] Processed 1000 revisions: [b83e8f3b62dda65f582c01e2d04da6003564546b, ...] Jun 11 21:37:00 worker01 python3[27823]: [2016-06-11 21:37:00,366: INFO/Worker-10] Processed 1000 revisions: [c730f0da619383441fe1dcf04e19a5e40976226d, ...] Jun 11 21:37:40 worker01 python3[27823]: [2016-06-11 21:37:40,533: INFO/Worker-10] Processed 1000 revisions: [559711199c653202b1c8f2073a7cb0c95116165e, ...] Jun 11 21:38:26 worker01 python3[27823]: [2016-06-11 21:38:26,410: INFO/Worker-10] Processed 1000 revisions: [e2739f72f8bfe51f3344d838d2ae4b66aa472497, ...] Jun 11 21:39:20 worker01 python3[27823]: [2016-06-11 21:39:20,230: INFO/Worker-10] Processed 1000 revisions: [d05438f16f9aa3cd8dfbe47d493bdcc168448daf, ...] Jun 11 21:40:05 worker01 python3[27823]: [2016-06-11 21:40:05,528: INFO/Worker-10] Processed 1000 revisions: [4cf7273102dca6be96e569b8115f1371d43c3767, ...] Jun 11 21:40:56 worker01 python3[27823]: [2016-06-11 21:40:56,506: INFO/Worker-10] Processed 1000 revisions: [2ecb859a650ad2b10e7d0869c014d6b1b3ded55a, ...] Jun 11 21:41:51 worker01 python3[27823]: [2016-06-11 21:41:51,550: INFO/Worker-10] Processed 1000 revisions: [4758461520c23192618d1e78eeb23fecfcb9bd29, ...] Jun 11 21:42:50 worker01 python3[27823]: [2016-06-11 21:42:50,848: INFO/Worker-10] Processed 1000 revisions: [21fec7d864ab60f99863fe842680bfef1f8cd0b5, ...] Jun 11 21:43:53 worker01 python3[27823]: [2016-06-11 21:43:53,391: INFO/Worker-10] Processed 1000 revisions: [e3c2e7cbb7beaa344549145e2e78c77c5fd9f7c8, ...] Jun 11 21:44:43 worker01 python3[27823]: [2016-06-11 21:44:43,843: INFO/Worker-10] Processed 1000 revisions: [f6cf4af3d7d1744c364a9f25e92a5a6bba622837, ...] Jun 11 21:45:42 worker01 python3[27823]: [2016-06-11 21:45:42,393: INFO/Worker-10] Processed 1000 revisions: [3ccd855ced499e839306249a44006b33fcd9efb7, ...] Jun 11 21:46:45 worker01 python3[27823]: [2016-06-11 21:46:45,238: INFO/Worker-10] Processed 1000 revisions: [65092bf7e0e2df4d71a31f3d89d1233eef0e6438, ...] Jun 11 21:47:34 worker01 python3[27823]: [2016-06-11 21:47:34,384: INFO/Worker-10] Processed 1000 revisions: [a4c15e02c5db378dd77f0bab5e6a73a437ad8afb, ...] Jun 11 21:48:26 worker01 python3[27823]: [2016-06-11 21:48:26,171: INFO/Worker-10] Processed 1000 revisions: [bb81b662a5257e136c665b97d858bf4d20bf088c, ...] Jun 11 21:49:11 worker01 python3[27823]: [2016-06-11 21:49:11,819: INFO/Worker-10] Processed 1000 revisions: [04467b7e2bd60ac0a5c55ba184432620410f8b11, ...] Jun 11 21:49:54 worker01 python3[27823]: [2016-06-11 21:49:54,668: INFO/Worker-10] Processed 1000 revisions: [7b108c6627a394a2db6ad8d743f319e085014118, ...] Jun 11 21:51:32 worker01 python3[27823]: [2016-06-11 21:51:32,293: INFO/Worker-10] Processed 1000 revisions: [479a007698148817ab921a0a2df6a07f4558ec0e, ...] Jun 11 21:52:54 worker01 python3[27823]: [2016-06-11 21:52:54,870: INFO/Worker-10] Processed 1000 revisions: [2f7cbf1ff849e315b9211e6a57451c22d5a9a538, ...] Jun 11 21:54:03 worker01 python3[27823]: [2016-06-11 21:54:03,799: INFO/Worker-10] Processed 1000 revisions: [97650da3145a749e730367e9bb8b4b5e99733d1f, ...] Jun 11 21:55:26 worker01 python3[27823]: [2016-06-11 21:55:26,019: INFO/Worker-10] Processed 1000 revisions: [8aca07d408d5461e073f2a464b1bb1098a4169d6, ...] Jun 11 21:56:56 worker01 python3[27823]: [2016-06-11 21:56:56,823: INFO/Worker-10] Processed 1000 revisions: [66a3005e7fb08d761030438b88416a2f5354aab3, ...] Jun 11 21:58:20 worker01 python3[27823]: [2016-06-11 21:58:20,814: INFO/Worker-10] Processed 1000 revisions: [8d25bc6cb7e1ce4e1cf0bceca17cc8cd4701e4b1, ...] Jun 11 21:59:49 worker01 python3[27823]: [2016-06-11 21:59:49,945: INFO/Worker-10] Processed 1000 revisions: [77c2759fc8f419b2993b39b1423a5f48c5b26aa4, ...] Jun 11 22:02:00 worker01 python3[27823]: [2016-06-11 22:02:00,541: INFO/Worker-10] Processed 1000 revisions: [39ec4e85be0132737da5862397d35263930d0f60, ...] Jun 11 22:03:50 worker01 python3[27823]: [2016-06-11 22:03:50,547: INFO/Worker-10] Processed 1000 revisions: [7f9125098da0bf0360ce8f2c4fb70f96f5f105f3, ...] Jun 11 22:05:18 worker01 python3[27823]: [2016-06-11 22:05:18,050: INFO/Worker-10] Processed 1000 revisions: [f882f6cc7777f60a3e2e795bcffcaeb586defe2e, ...] Jun 11 22:06:42 worker01 python3[27823]: [2016-06-11 22:06:42,185: INFO/Worker-10] Processed 1000 revisions: [dfb73fa7ac834fb8863c739c6a1f40320ccca198, ...] Jun 11 22:08:17 worker01 python3[27823]: [2016-06-11 22:08:17,319: INFO/Worker-10] Processed 1000 revisions: [7a3dcc6abe1fe812552a5ff17123cc676a6e19ea, ...] Jun 11 22:09:50 worker01 python3[27823]: [2016-06-11 22:09:50,957: INFO/Worker-10] Processed 1000 revisions: [2f14c0ee2931c3bfe146af6f7baa5ffa6512a9d5, ...] Jun 11 22:11:35 worker01 python3[27823]: [2016-06-11 22:11:35,384: INFO/Worker-10] Processed 1000 revisions: [aa82ccb58456e1be3fc8b3b4db0c2769526f115e, ...] Jun 11 22:02:00 worker01 python3[27823]: [2016-06-11 22:02:00,541: INFO/Worker-10] Processed 1000 revisions: [39ec4e85be0132737da5862397d35263930d0f60, ...] Jun 11 22:03:50 worker01 python3[27823]: [2016-06-11 22:03:50,547: INFO/Worker-10] Processed 1000 revisions: [7f9125098da0bf0360ce8f2c4fb70f96f5f105f3, ...] Jun 11 22:05:18 worker01 python3[27823]: [2016-06-11 22:05:18,050: INFO/Worker-10] Processed 1000 revisions: [f882f6cc7777f60a3e2e795bcffcaeb586defe2e, ...] Jun 11 22:06:42 worker01 python3[27823]: [2016-06-11 22:06:42,185: INFO/Worker-10] Processed 1000 revisions: [dfb73fa7ac834fb8863c739c6a1f40320ccca198, ...] Jun 11 22:08:17 worker01 python3[27823]: [2016-06-11 22:08:17,319: INFO/Worker-10] Processed 1000 revisions: [7a3dcc6abe1fe812552a5ff17123cc676a6e19ea, ...] Jun 11 22:09:50 worker01 python3[27823]: [2016-06-11 22:09:50,957: INFO/Worker-10] Processed 1000 revisions: [2f14c0ee2931c3bfe146af6f7baa5ffa6512a9d5, ...] Jun 11 22:11:35 worker01 python3[27823]: [2016-06-11 22:11:35,384: INFO/Worker-10] Processed 1000 revisions: [aa82ccb58456e1be3fc8b3b4db0c2769526f115e, ...] Jun 11 22:13:27 worker01 python3[27823]: [2016-06-11 22:13:27,439: INFO/Worker-10] Processed 1000 revisions: [c0ac4d6e5058a7ad19bc5b5f8ea6d0184262814b, ...] Jun 11 22:17:18 worker01 python3[27823]: [2016-06-11 22:17:18,339: INFO/Worker-10] Processed 1000 revisions: [8e972fa53b5e4ccaca85cf7f7087191a809d83bd, ...] Jun 11 22:20:03 worker01 python3[27823]: [2016-06-11 22:20:03,239: INFO/Worker-10] Processed 1000 revisions: [b846dcc3d3cea598263cffb87557aa7f7566e80f, ...] Jun 11 22:22:17 worker01 python3[27823]: [2016-06-11 22:22:17,967: INFO/Worker-10] Processed 1000 revisions: [bc969c3c6977374d6e6bc5fe4e0606ee60f29e90, ...] Jun 11 22:23:44 worker01 python3[27823]: [2016-06-11 22:23:44,874: INFO/Worker-10] Processed 1000 revisions: [e9625679cf1fe23a91f0f06f02ac03723d7112cb, ...] Jun 11 22:25:16 worker01 python3[27823]: [2016-06-11 22:25:16,759: INFO/Worker-10] Processed 1000 revisions: [f61326f5d12094860dead900d2d46d0c368f4e4b, ...] Jun 11 22:27:01 worker01 python3[27823]: [2016-06-11 22:27:01,152: INFO/Worker-10] Processed 1000 revisions: [754bfe4141e5a9165a7458945157791ac85e6ff9, ...] Jun 11 22:27:06 worker01 python3[27823]: [2016-06-11 22:27:06,833: INFO/Worker-10] Processed 61 revisions: [27e079019bc07ef84716c80c62ec53a39d806879, ...] -Jun 11 22:27:23 worker01 python3[27823]: [2016-06-11 22:27:23,340: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[b3f34747-3575-4160-8543-6ddf23b8822e] succeeded in 3519.9480575090274s: None +Jun 11 22:27:23 worker01 python3[27823]: [2016-06-11 22:27:23,340: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[b3f34747-3575-4160-8543-6ddf23b8822e] succeeded in 3519.9480575090274s: None #+END_SRC ** comparison - with swh-storage |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+------------| | Type | # Revs | Url | git-svn (git svn clone) | # Revs updated | swh-svn | Ratio | |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+------------| | small | 145 | svn://svn.debian.org/svn/pkg-fox/ | 447.074 | 145 | 117.17479287087917 | 0.26209261 | | medium | 6006 | svn://svn.debian.org/svn/glibc-bsd/ | 4740.928 | 6073 | 962.4779040301219 | 0.20301466 | | large | 10707 | svn://svn.debian.org/svn/pkg-voip/ | 8592.398 | 10707 | 3028.8734963517636 | 0.35250619 | | very large | 34523 | svn://svn.debian.org/svn/python-modules/ | 36627.907 | 34523 | 22651.265359937213 | 0.61841550 | | very large | 48013 | svn://svn.debian.org/svn/pkg-gnome/ | 71902.080 | 49061 | 43629.105175915174 | 0.60678502 | |------------+--------+------------------------------------------+-------------------------+----------------+--------------------+------------| #+TBLFM: $7=$6/$4 ** Log extract #+BEGIN_SRC sh -Jun 11 22:31:34 worker01 python3[13656]: [2016-06-11 22:31:34,315: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[9b6b05d0-f7fe-4799-86a0-f9ec7bd67ead] +Jun 11 22:31:34 worker01 python3[13656]: [2016-06-11 22:31:34,315: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[9b6b05d0-f7fe-4799-86a0-f9ec7bd67ead] Jun 11 22:31:34 worker01 python3[13656]: [2016-06-11 22:31:34,657: INFO/Worker-10] [revision_start-revision_end]: [1-145] Jun 11 22:31:34 worker01 python3[13656]: [2016-06-11 22:31:34,674: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-fox', 'local_url': b'/tmp/tmp.lod3o5u5.swh.loader/pkg-fox', 'uuid': b'd908f651-7add-0310-a5d1-c7ac9dfebe41', 'swh-origin': 4}. Jun 11 22:33:27 worker01 python3[13656]: [2016-06-11 22:33:27,415: INFO/Worker-10] Processed 145 revisions: [1dda85506a12af80c5a701a02aba5a02c703642f, ...] -Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,508: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[9b6b05d0-f7fe-4799-86a0-f9ec7bd67ead] succeeded in 117.17479287087917s: None -Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,552: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[f0375fb9-cf8c-40b5-9e04-db9e37fd9ecb] +Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,508: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[9b6b05d0-f7fe-4799-86a0-f9ec7bd67ead] succeeded in 117.17479287087917s: None +Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,552: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[f0375fb9-cf8c-40b5-9e04-db9e37fd9ecb] Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,856: INFO/Worker-10] [revision_start-revision_end]: [1-6073] Jun 11 22:33:31 worker01 python3[13656]: [2016-06-11 22:33:31,875: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/glibc-bsd', 'local_url': b'/tmp/tmp.w1bf5rdz.swh.loader/glibc-bsd', 'uuid': b'ae44cbe4-c7d5-0310-ae45-95c72a56cd7d', 'swh-origin': 3}. Jun 11 22:36:19 worker01 python3[13656]: [2016-06-11 22:36:19,485: INFO/Worker-10] Processed 1000 revisions: [be6fe97464c0fedd9959073d07b2fda4cbedbe2d, ...] Jun 11 22:38:43 worker01 python3[13656]: [2016-06-11 22:38:43,013: INFO/Worker-10] Processed 1000 revisions: [344505eb753471c1085e2afeef646fe4e5d49df4, ...] Jun 11 22:40:56 worker01 python3[13656]: [2016-06-11 22:40:56,494: INFO/Worker-10] Processed 1000 revisions: [7638d6e04f082876bdd248f13f5e62def43f411d, ...] Jun 11 22:43:23 worker01 python3[13656]: [2016-06-11 22:43:23,883: INFO/Worker-10] Processed 1000 revisions: [cac5d6ccdd3aab6e29bc3def9df35b1fbbf9bd4f, ...] Jun 11 22:46:03 worker01 python3[13656]: [2016-06-11 22:46:03,775: INFO/Worker-10] Processed 1000 revisions: [b2c7a10f2127dd496048133480df041b0ab66865, ...] Jun 11 22:48:57 worker01 python3[13656]: [2016-06-11 22:48:57,500: INFO/Worker-10] Processed 1000 revisions: [382341a00301f36ceec9fca563aef85cb628b323, ...] Jun 11 22:49:15 worker01 python3[13656]: [2016-06-11 22:49:15,687: INFO/Worker-10] Processed 73 revisions: [512a9f720bd1af1581b09483846035cf292c52cd, ...] -Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,067: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[f0375fb9-cf8c-40b5-9e04-db9e37fd9ecb] succeeded in 962.4779040301219s: None -Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,087: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[60670885-84de-4884-937d-b7aecdbedcd5] +Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,067: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[f0375fb9-cf8c-40b5-9e04-db9e37fd9ecb] succeeded in 962.4779040301219s: None +Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,087: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[60670885-84de-4884-937d-b7aecdbedcd5] Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,371: INFO/Worker-10] [revision_start-revision_end]: [1-10707] Jun 11 22:49:34 worker01 python3[13656]: [2016-06-11 22:49:34,391: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-voip', 'local_url': b'/tmp/tmp.w9fxgpt4.swh.loader/pkg-voip', 'uuid': b'5e74be4b-f5d6-0310-a852-e9e2 3c5afa6a', 'swh-origin': 32}. Jun 11 22:52:01 worker01 python3[13656]: [2016-06-11 22:52:01,903: INFO/Worker-10] Processed 1000 revisions: [a1fff6a0e5e397f634d0ea5c1600bc723d019e4c, ...] Jun 11 22:54:54 worker01 python3[13656]: [2016-06-11 22:54:54,798: INFO/Worker-10] Processed 1000 revisions: [510f95e1239eaea1170816b080cd8d7d76ad4b5b, ...] Jun 11 22:58:14 worker01 python3[13656]: [2016-06-11 22:58:14,074: INFO/Worker-10] Processed 1000 revisions: [17a376896e1f2084bfd85dc5052e14280bbcb63a, ...] Jun 11 23:02:08 worker01 python3[13656]: [2016-06-11 23:02:08,886: INFO/Worker-10] Processed 1000 revisions: [5f9d3835c69ec09fd082553d425d9e26fc006289, ...] Jun 11 23:06:31 worker01 python3[13656]: [2016-06-11 23:06:31,837: INFO/Worker-10] Processed 1000 revisions: [89551acb946906862441ef05368b99c5ce4d2144, ...] Jun 11 23:11:24 worker01 python3[13656]: [2016-06-11 23:11:24,307: INFO/Worker-10] Processed 1000 revisions: [97eeaf9eae6c155631b15514b5b600e367c2dc4b, ...] Jun 11 23:16:45 worker01 python3[13656]: [2016-06-11 23:16:45,041: INFO/Worker-10] Processed 1000 revisions: [c0f1bce218db099425e15aa9f5521a5badb4c6c2, ...] Jun 11 23:22:25 worker01 python3[13656]: [2016-06-11 23:22:25,507: INFO/Worker-10] Processed 1000 revisions: [b3dcf7365c6afd9280565abfec4204b26c4e13dc, ...] Jun 11 23:28:30 worker01 python3[13656]: [2016-06-11 23:28:30,510: INFO/Worker-10] Processed 1000 revisions: [943042e9d1113406483a2a5b6f39023935c0532a, ...] Jun 11 23:35:10 worker01 python3[13656]: [2016-06-11 23:35:10,918: INFO/Worker-10] Processed 1000 revisions: [81405fca1c7f928fcc6a2b137546b4f94f22551e, ...] Jun 11 23:39:53 worker01 python3[13656]: [2016-06-11 23:39:53,511: INFO/Worker-10] Processed 707 revisions: [5419a4e4da9d9e37df35c1c9455024fe8170d2fa, ...] -Jun 11 23:40:02 worker01 python3[13656]: [2016-06-11 23:40:02,978: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[60670885-84de-4884-937d-b7aecdbedcd5] succeeded in 3028.8734963517636s: None -Jun 11 23:40:02 worker01 python3[13656]: [2016-06-11 23:40:02,997: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[740dd7e1-3d96-4bc8-8fce-faef773acbb6] +Jun 11 23:40:02 worker01 python3[13656]: [2016-06-11 23:40:02,978: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[60670885-84de-4884-937d-b7aecdbedcd5] succeeded in 3028.8734963517636s: None +Jun 11 23:40:02 worker01 python3[13656]: [2016-06-11 23:40:02,997: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[740dd7e1-3d96-4bc8-8fce-faef773acbb6] Jun 11 23:40:03 worker01 python3[13656]: [2016-06-11 23:40:03,305: INFO/Worker-10] [revision_start-revision_end]: [1-34523] Jun 11 23:40:03 worker01 python3[13656]: [2016-06-11 23:40:03,324: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/python-modules', 'local_url': b'/tmp/tmp.g_iyx8bd.swh.loader/python-modules', 'uuid': b'771dd761-d7fa-03 10-a302-f036d1c1ebb6', 'swh-origin': 122}. Jun 11 23:42:33 worker01 python3[13656]: [2016-06-11 23:42:33,485: INFO/Worker-10] Processed 1000 revisions: [4ca7de178b8d929a6dfc12e113b3072730eeb4c3, ...] Jun 11 23:45:29 worker01 python3[13656]: [2016-06-11 23:45:29,699: INFO/Worker-10] Processed 1000 revisions: [45be8a9d34b4b57595a1e3c0cec40b378a05e43f, ...] Jun 11 23:48:50 worker01 python3[13656]: [2016-06-11 23:48:50,349: INFO/Worker-10] Processed 1000 revisions: [62d51095353cb4bac1b840a917e9cda7e809b6ea, ...] Jun 11 23:53:06 worker01 python3[13656]: [2016-06-11 23:53:06,817: INFO/Worker-10] Processed 1000 revisions: [0b88046aa11871c3d22a5190a62c200d42ab3da0, ...] Jun 11 23:57:02 worker01 python3[13656]: [2016-06-11 23:57:02,932: INFO/Worker-10] Processed 1000 revisions: [1791f1e4cdf4eef44aa6f1dcdb76378a37f5de38, ...] Jun 12 00:01:52 worker01 python3[13656]: [2016-06-12 00:01:52,850: INFO/Worker-10] Processed 1000 revisions: [60f91c20e1ac862c6d1fd4abf2890cebcd4ef2e6, ...] Jun 12 00:06:54 worker01 python3[13656]: [2016-06-12 00:06:54,366: INFO/Worker-10] Processed 1000 revisions: [51764d25e62e2318e92310cea8d123773d6a9054, ...] Jun 12 00:12:26 worker01 python3[13656]: [2016-06-12 00:12:26,119: INFO/Worker-10] Processed 1000 revisions: [8720f9952080e743c8432ef7d27cf35f61da88d6, ...] Jun 12 00:18:24 worker01 python3[13656]: [2016-06-12 00:18:24,556: INFO/Worker-10] Processed 1000 revisions: [837dc122f04058ede9351b4940579ba8fba4fda1, ...] Jun 12 00:24:53 worker01 python3[13656]: [2016-06-12 00:24:53,118: INFO/Worker-10] Processed 1000 revisions: [e33a666eb6f9a50e35f62e2c4187a0caf8055aad, ...] Jun 12 00:32:03 worker01 python3[13656]: [2016-06-12 00:32:03,550: INFO/Worker-10] Processed 1000 revisions: [2b16e069f75ebb8550d8b30c02e4dcaca0a98dae, ...] Jun 12 00:39:16 worker01 python3[13656]: [2016-06-12 00:39:16,356: INFO/Worker-10] Processed 1000 revisions: [fcbc70f1574e30e467d2aa49050f26a6e1e67b5e, ...] Jun 12 00:46:38 worker01 python3[13656]: [2016-06-12 00:46:38,941: INFO/Worker-10] Processed 1000 revisions: [10fc7480388c84bc7742e057b04f72c9e7696233, ...] Jun 12 00:54:29 worker01 python3[13656]: [2016-06-12 00:54:29,844: INFO/Worker-10] Processed 1000 revisions: [dabdb737d3f80c8c85ab6219b52429325750f7d7, ...] Jun 12 01:03:25 worker01 python3[13656]: [2016-06-12 01:03:25,923: INFO/Worker-10] Processed 1000 revisions: [5ec3d4290c8fa3f0cb3725c9412895633f48dc47, ...] Jun 12 01:12:23 worker01 python3[13656]: [2016-06-12 01:12:23,413: INFO/Worker-10] Processed 1000 revisions: [32aa0ad290ad039d8da3b1223d34d31f6ee93a69, ...] Jun 12 01:21:43 worker01 python3[13656]: [2016-06-12 01:21:43,691: INFO/Worker-10] Processed 1000 revisions: [6abae57242a050042f6980971911a9dcd7429f65, ...] Jun 12 01:31:56 worker01 python3[13656]: [2016-06-12 01:31:56,988: INFO/Worker-10] Processed 1000 revisions: [4bcf91f45c78113b626c914acdf81f73d11191f1, ...] Jun 12 01:42:27 worker01 python3[13656]: [2016-06-12 01:42:27,028: INFO/Worker-10] Processed 1000 revisions: [18f5add4158abdb8bf1e408e6758adf7e4a59eef, ...] Jun 12 01:53:32 worker01 python3[13656]: [2016-06-12 01:53:32,015: INFO/Worker-10] Processed 1000 revisions: [99ebabd86ec48c52965c89c41fe443f2ee49f6a4, ...] Jun 12 02:05:41 worker01 python3[13656]: [2016-06-12 02:05:41,288: INFO/Worker-10] Processed 1000 revisions: [d6f7410e1295db6dbb137d94a27a26e3e7d636c0, ...] Jun 12 02:19:43 worker01 python3[13656]: [2016-06-12 02:19:43,962: INFO/Worker-10] Processed 1000 revisions: [a4907fb6dadae5b431b66c33b1bf043eb52992e2, ...] Jun 12 02:33:59 worker01 python3[13656]: [2016-06-12 02:33:59,432: INFO/Worker-10] Processed 1000 revisions: [ebaca99c10f1ea83e983d500c760ea31acba0762, ...] Jun 12 02:49:18 worker01 python3[13656]: [2016-06-12 02:49:18,727: INFO/Worker-10] Processed 1000 revisions: [b7e32f4db2957388e5d2a7d89749a1ceb4dbcf57, ...] Jun 12 03:05:32 worker01 python3[13656]: [2016-06-12 03:05:32,251: INFO/Worker-10] Processed 1000 revisions: [8736c0ee49a93fbb328aabfa4d21cd5a936a02b1, ...] Jun 12 03:20:59 worker01 python3[13656]: [2016-06-12 03:20:59,802: INFO/Worker-10] Processed 1000 revisions: [22a27c3aab8d00bd5a63bee6287ae46aae6b9fcb, ...] Jun 12 03:37:08 worker01 python3[13656]: [2016-06-12 03:37:08,731: INFO/Worker-10] Processed 1000 revisions: [8575eeea98818729b6319b68078872b5a72bdf14, ...] Jun 12 03:54:14 worker01 python3[13656]: [2016-06-12 03:54:14,064: INFO/Worker-10] Processed 1000 revisions: [badeba4badf9ebcf209da0123dd87a074863efc9, ...] Jun 12 04:12:32 worker01 python3[13656]: [2016-06-12 04:12:32,728: INFO/Worker-10] Processed 1000 revisions: [b7e011f38d59bd95ca9752f5e9d4841f14a1f46b, ...] Jun 12 04:30:33 worker01 python3[13656]: [2016-06-12 04:30:33,848: INFO/Worker-10] Processed 1000 revisions: [fc8dbd462c4e698b2650effbf33903de2f9b49b2, ...] Jun 12 04:48:46 worker01 python3[13656]: [2016-06-12 04:48:46,147: INFO/Worker-10] Processed 1000 revisions: [7d70379982d340ebff879a3f79314568ea3873eb, ...] Jun 12 05:07:42 worker01 python3[13656]: [2016-06-12 05:07:42,426: INFO/Worker-10] Processed 1000 revisions: [7b81c934b441335e06977f2dcd797f16337c49f2, ...] Jun 12 05:26:47 worker01 python3[13656]: [2016-06-12 05:26:47,593: INFO/Worker-10] Processed 1000 revisions: [8de5385844c1a8de253ad8945dc06259d11c7fc8, ...] Jun 12 05:46:27 worker01 python3[13656]: [2016-06-12 05:46:27,938: INFO/Worker-10] Processed 1000 revisions: [c0b63db3767754092c8c1fe92b07e66e77b7fed3, ...] Jun 12 05:57:00 worker01 python3[13656]: [2016-06-12 05:57:00,711: INFO/Worker-10] Processed 523 revisions: [ba52b091af3078562f7e7fc05c04b9469988e006, ...] -Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,284: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[740dd7e1-3d96-4bc8-8fce-faef773acbb6] succeeded in 22651.265359937213s: None -Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,299: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepositoryTsk[10b1e441-4ab0-4121-a27f-7d32f1312b1f] +Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,284: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[740dd7e1-3d96-4bc8-8fce-faef773acbb6] succeeded in 22651.265359937213s: None +Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,299: INFO/MainProcess] Received task: swh.loader.svn.tasks.LoadSvnRepository[10b1e441-4ab0-4121-a27f-7d32f1312b1f] Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,593: INFO/Worker-10] [revision_start-revision_end]: [1-49061] Jun 12 05:57:34 worker01 python3[13656]: [2016-06-12 05:57:34,623: INFO/Worker-10] Processing {'remote_url': 'svn://svn.debian.org/svn/pkg-gnome', 'local_url': b'/tmp/tmp.h36l0khh.swh.loader/pkg-gnome', 'uuid': b'db0db5de-e4c8-0310-9441-90 abf70311f7', 'swh-origin': 27}. Jun 12 06:00:02 worker01 python3[13656]: [2016-06-12 06:00:02,454: INFO/Worker-10] Processed 1000 revisions: [29a3157f1d4a82955860a6fd3397bbd94573b555, ...] Jun 12 06:02:48 worker01 python3[13656]: [2016-06-12 06:02:48,493: INFO/Worker-10] Processed 1000 revisions: [bbada23d3943294e2efe3d6a1c978498e013d957, ...] Jun 12 06:05:59 worker01 python3[13656]: [2016-06-12 06:05:59,792: INFO/Worker-10] Processed 1000 revisions: [8a257ee9a72d9ec7f3a25bc5b4ab886f06b7d9c4, ...] Jun 12 06:08:54 worker01 python3[13656]: [2016-06-12 06:08:54,635: INFO/Worker-10] Processed 1000 revisions: [f179272fc81ae3f16bbf6d8a12709e9a7109b347, ...] Jun 12 06:11:41 worker01 python3[13656]: [2016-06-12 06:11:41,154: INFO/Worker-10] Processed 1000 revisions: [d74e2545a86fb065e60f3ef6e1fa5a975ab6a793, ...] Jun 12 06:14:44 worker01 python3[13656]: [2016-06-12 06:14:44,830: INFO/Worker-10] Processed 1000 revisions: [2a1dab8d134ef32f914da2ccc7c14819777f1e8f, ...] Jun 12 06:17:56 worker01 python3[13656]: [2016-06-12 06:17:56,791: INFO/Worker-10] Processed 1000 revisions: [544884d67e2a6c138a09ddf6b8f3f3f9c835610e, ...] Jun 12 06:21:13 worker01 python3[13656]: [2016-06-12 06:21:13,713: INFO/Worker-10] Processed 1000 revisions: [17c8cc0eb3be0d69176af005422f4a8aea22b037, ...] Jun 12 06:24:12 worker01 python3[13656]: [2016-06-12 06:24:12,387: INFO/Worker-10] Processed 1000 revisions: [cd83dc86dc388920ddfef5c3b393eeb754b15595, ...] Jun 12 06:27:59 worker01 python3[13656]: [2016-06-12 06:27:59,335: INFO/Worker-10] Processed 1000 revisions: [ed5ad1e6879a9a56c1fb89fec0e9ba80943facd8, ...] Jun 12 06:31:51 worker01 python3[13656]: [2016-06-12 06:31:51,145: INFO/Worker-10] Processed 1000 revisions: [ea995f84bc6eef36f890243f0c6ea4bdf2796452, ...] Jun 12 06:36:05 worker01 python3[13656]: [2016-06-12 06:36:05,801: INFO/Worker-10] Processed 1000 revisions: [b83e8f3b62dda65f582c01e2d04da6003564546b, ...] Jun 12 06:41:16 worker01 python3[13656]: [2016-06-12 06:41:16,749: INFO/Worker-10] Processed 1000 revisions: [c730f0da619383441fe1dcf04e19a5e40976226d, ...] Jun 12 06:47:13 worker01 python3[13656]: [2016-06-12 06:47:13,322: INFO/Worker-10] Processed 1000 revisions: [559711199c653202b1c8f2073a7cb0c95116165e, ...] Jun 12 06:53:14 worker01 python3[13656]: [2016-06-12 06:53:14,793: INFO/Worker-10] Processed 1000 revisions: [e2739f72f8bfe51f3344d838d2ae4b66aa472497, ...] Jun 12 06:59:51 worker01 python3[13656]: [2016-06-12 06:59:51,234: INFO/Worker-10] Processed 1000 revisions: [d05438f16f9aa3cd8dfbe47d493bdcc168448daf, ...] Jun 12 07:07:16 worker01 python3[13656]: [2016-06-12 07:07:16,195: INFO/Worker-10] Processed 1000 revisions: [4cf7273102dca6be96e569b8115f1371d43c3767, ...] Jun 12 07:15:28 worker01 python3[13656]: [2016-06-12 07:15:27,999: INFO/Worker-10] Processed 1000 revisions: [2ecb859a650ad2b10e7d0869c014d6b1b3ded55a, ...] Jun 12 07:24:27 worker01 python3[13656]: [2016-06-12 07:24:27,812: INFO/Worker-10] Processed 1000 revisions: [4758461520c23192618d1e78eeb23fecfcb9bd29, ...] Jun 12 07:35:29 worker01 python3[13656]: [2016-06-12 07:35:29,420: INFO/Worker-10] Processed 1000 revisions: [21fec7d864ab60f99863fe842680bfef1f8cd0b5, ...] Jun 12 07:47:12 worker01 python3[13656]: [2016-06-12 07:47:12,803: INFO/Worker-10] Processed 1000 revisions: [e3c2e7cbb7beaa344549145e2e78c77c5fd9f7c8, ...] Jun 12 07:58:12 worker01 python3[13656]: [2016-06-12 07:58:12,596: INFO/Worker-10] Processed 1000 revisions: [f6cf4af3d7d1744c364a9f25e92a5a6bba622837, ...] Jun 12 08:10:41 worker01 python3[13656]: [2016-06-12 08:10:41,787: INFO/Worker-10] Processed 1000 revisions: [3ccd855ced499e839306249a44006b33fcd9efb7, ...] Jun 12 08:23:42 worker01 python3[13656]: [2016-06-12 08:23:42,690: INFO/Worker-10] Processed 1000 revisions: [65092bf7e0e2df4d71a31f3d89d1233eef0e6438, ...] Jun 12 08:38:36 worker01 python3[13656]: [2016-06-12 08:38:36,192: INFO/Worker-10] Processed 1000 revisions: [a4c15e02c5db378dd77f0bab5e6a73a437ad8afb, ...] Jun 12 08:53:13 worker01 python3[13656]: [2016-06-12 08:53:13,853: INFO/Worker-10] Processed 1000 revisions: [bb81b662a5257e136c665b97d858bf4d20bf088c, ...] Jun 12 09:09:17 worker01 python3[13656]: [2016-06-12 09:09:17,177: INFO/Worker-10] Processed 1000 revisions: [04467b7e2bd60ac0a5c55ba184432620410f8b11, ...] Jun 12 09:26:04 worker01 python3[13656]: [2016-06-12 09:26:04,887: INFO/Worker-10] Processed 1000 revisions: [7b108c6627a394a2db6ad8d743f319e085014118, ...] Jun 12 09:44:06 worker01 python3[13656]: [2016-06-12 09:44:06,874: INFO/Worker-10] Processed 1000 revisions: [479a007698148817ab921a0a2df6a07f4558ec0e, ...] Jun 12 10:02:21 worker01 python3[13656]: [2016-06-12 10:02:21,001: INFO/Worker-10] Processed 1000 revisions: [2f7cbf1ff849e315b9211e6a57451c22d5a9a538, ...] Jun 12 10:21:57 worker01 python3[13656]: [2016-06-12 10:21:57,565: INFO/Worker-10] Processed 1000 revisions: [97650da3145a749e730367e9bb8b4b5e99733d1f, ...] Jun 12 10:40:39 worker01 python3[13656]: [2016-06-12 10:40:39,339: INFO/Worker-10] Processed 1000 revisions: [8aca07d408d5461e073f2a464b1bb1098a4169d6, ...] Jun 12 10:59:30 worker01 python3[13656]: [2016-06-12 10:59:30,232: INFO/Worker-10] Processed 1000 revisions: [66a3005e7fb08d761030438b88416a2f5354aab3, ...] Jun 12 11:18:59 worker01 python3[13656]: [2016-06-12 11:18:59,836: INFO/Worker-10] Processed 1000 revisions: [8d25bc6cb7e1ce4e1cf0bceca17cc8cd4701e4b1, ...] Jun 12 11:38:26 worker01 python3[13656]: [2016-06-12 11:38:26,167: INFO/Worker-10] Processed 1000 revisions: [77c2759fc8f419b2993b39b1423a5f48c5b26aa4, ...] Jun 12 11:59:25 worker01 python3[13656]: [2016-06-12 11:59:25,276: INFO/Worker-10] Processed 1000 revisions: [39ec4e85be0132737da5862397d35263930d0f60, ...] Jun 12 12:30:45 worker01 python3[13656]: [2016-06-12 12:30:45,885: INFO/Worker-10] Processed 1000 revisions: [7f9125098da0bf0360ce8f2c4fb70f96f5f105f3, ...] Jun 12 12:57:31 worker01 python3[13656]: [2016-06-12 12:57:31,495: INFO/Worker-10] Processed 1000 revisions: [f882f6cc7777f60a3e2e795bcffcaeb586defe2e, ...] Jun 12 13:21:53 worker01 python3[13656]: [2016-06-12 13:21:53,741: INFO/Worker-10] Processed 1000 revisions: [dfb73fa7ac834fb8863c739c6a1f40320ccca198, ...] Jun 12 13:45:41 worker01 python3[13656]: [2016-06-12 13:45:41,496: INFO/Worker-10] Processed 1000 revisions: [7a3dcc6abe1fe812552a5ff17123cc676a6e19ea, ...] Jun 12 14:10:03 worker01 python3[13656]: [2016-06-12 14:10:03,134: INFO/Worker-10] Processed 1000 revisions: [2f14c0ee2931c3bfe146af6f7baa5ffa6512a9d5, ...] Jun 12 14:36:48 worker01 python3[13656]: [2016-06-12 14:36:48,680: INFO/Worker-10] Processed 1000 revisions: [aa82ccb58456e1be3fc8b3b4db0c2769526f115e, ...] Jun 12 15:04:36 worker01 python3[13656]: [2016-06-12 15:04:36,579: INFO/Worker-10] Processed 1000 revisions: [c0ac4d6e5058a7ad19bc5b5f8ea6d0184262814b, ...] Jun 12 15:31:55 worker01 python3[13656]: [2016-06-12 15:31:55,484: INFO/Worker-10] Processed 1000 revisions: [8e972fa53b5e4ccaca85cf7f7087191a809d83bd, ...] Jun 12 15:59:34 worker01 python3[13656]: [2016-06-12 15:59:34,441: INFO/Worker-10] Processed 1000 revisions: [b846dcc3d3cea598263cffb87557aa7f7566e80f, ...] Jun 12 16:28:10 worker01 python3[13656]: [2016-06-12 16:28:10,446: INFO/Worker-10] Processed 1000 revisions: [bc969c3c6977374d6e6bc5fe4e0606ee60f29e90, ...] Jun 12 16:57:15 worker01 python3[13656]: [2016-06-12 16:57:15,045: INFO/Worker-10] Processed 1000 revisions: [e9625679cf1fe23a91f0f06f02ac03723d7112cb, ...] Jun 12 17:28:04 worker01 python3[13656]: [2016-06-12 17:28:04,678: INFO/Worker-10] Processed 1000 revisions: [f61326f5d12094860dead900d2d46d0c368f4e4b, ...] Jun 12 18:01:24 worker01 python3[13656]: [2016-06-12 18:01:24,516: INFO/Worker-10] Processed 1000 revisions: [754bfe4141e5a9165a7458945157791ac85e6ff9, ...] Jun 12 18:04:03 worker01 python3[13656]: [2016-06-12 18:04:03,334: INFO/Worker-10] Processed 61 revisions: [27e079019bc07ef84716c80c62ec53a39d806879, ...] -Jun 12 18:04:43 worker01 python3[13656]: [2016-06-12 18:04:43,418: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[10b1e441-4ab0-4121-a27f-7d32f1312b1f] succeeded in 43629.105175915174s: None +Jun 12 18:04:43 worker01 python3[13656]: [2016-06-12 18:04:43,418: INFO/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[10b1e441-4ab0-4121-a27f-7d32f1312b1f] succeeded in 43629.105175915174s: None #+END_SRC diff --git a/errors.org b/errors.org index f4676b1..0e85896 100644 --- a/errors.org +++ b/errors.org @@ -1,307 +1,307 @@ #+title: Errors encountered during testing #+author: ardumont * Malformed xml exception when asking for log #+BEGIN_SRC sh -[2016-04-03 11:10:43,740: ERROR/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[8ffa1cf0-6e88-40f5-9918-f659de0810f3] raised unexpected: ClientError('The XML response contains invalid XML\nMalformed XML: not well-formed (invalid token)',) +[2016-04-03 11:10:43,740: ERROR/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[8ffa1cf0-6e88-40f5-9918-f659de0810f3] raised unexpected: ClientError('The XML response contains invalid XML\nMalformed XML: not well-formed (invalid token)',) Traceback (most recent call last): File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 240, in trace_task R = retval = fun(*args, **kwargs) File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 438, in __protected_call__ return self.run(*args, **kwargs) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/tasks.py", line 88, in run result = SvnLoader(config).process(svn_url, origin, local_path) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 159, in process self.config['revision_packet_size']): File "/home/tony/work/inria/repo/swh-environment/swh-core/swh/core/utils.py", line 25, in grouper for _data in itertools.zip_longest(*args, fillvalue=fv): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 59, in process_revisions revision_start, revision_end): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 269, in swh_hash_data_per_revision for commit in self.logs(start_revision, end_revision): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 217, in logs discover_changed_paths=True): pysvn._pysvn_3_5.ClientError: The XML response contains invalid XML Malformed XML: not well-formed (invalid token) #+END_SRC * No author found In general, author, message and even date can be empty in svn revision log entry. #+BEGIN_SRC sh 346a2fa8c] raised unexpected: AttributeError("PysvnLog instance has no attribute 'author'",) Traceback (most recent call last): File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 240, in trace_task R = retval = fun(*args, **kwargs) File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 438, in __protected_call__ return self.run(*args, **kwargs) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/tasks.py", line 82, in run result = SvnLoader(config).process(svn_url, origin, local_path) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 163, in process self.config['revision_packet_size']): File "/home/tony/work/inria/repo/swh-environment/swh-core/swh/core/utils.py", line 25, in grouper for _data in itertools.zip_longest(*args, fillvalue=fv): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 57, in process_revisions revision_start, revision_end): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 245, in swh_hash_data_per_revision for rev, commit in self.logs(start_revision, end_revision): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 207, in logs yield from self.logs(r2 + 1, revision_end, block_size) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 184, in logs author = log_entry.author File "/home/tony/.local/lib/python3.5/site-packages/pysvn/__init__.py", line 29, in __getattr__ raise AttributeError( "%s instance has no attribute '%s'" % (self.__class__.__name__, name) ) AttributeError: PysvnLog instance has no attribute 'author' #+END_SRC * lock When a checkout goes bad, some lock can stay. A `svn cleanup` operation can clean those locks from the working copy. #+BEGIN_SRC sh 3e6fa019d] raised unexpected: ClientError("'/tmp/tmp.fc9qu8sx.swh.loader/org-trello' is already locked via '/tmp/tmp.fc9qu8sx.swh.loader/org-trello'.",) Traceback (most recent call last): File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 240, in trace_task R = retval = fun(*args, **kwargs) File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 438, in __protected_call__ return self.run(*args, **kwargs) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/tasks.py", line 88, in run result = SvnLoader(config).process(svn_url, origin, local_path) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 159, in process self.config['revision_packet_size']): File "/home/tony/work/inria/repo/swh-environment/swh-core/swh/core/utils.py", line 25, in grouper for _data in itertools.zip_longest(*args, fillvalue=fv): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 59, in process_revisions revision_start, revision_end): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 276, in swh_hash_data_per_revision self.checkout(revision=rev) File "/usr/lib/python3/dist-packages/retrying.py", line 49, in wrapped_f return Retrying(*dargs, **dkw).call(f, *args, **kw) File "/usr/lib/python3/dist-packages/retrying.py", line 212, in call raise attempt.get() File "/usr/lib/python3/dist-packages/retrying.py", line 247, in get six.reraise(self.value[0], self.value[1], self.value[2]) File "/usr/local/lib/python3.5/dist-packages/six.py", line 611, in reraise raise value File "/usr/lib/python3/dist-packages/retrying.py", line 200, in call attempt = Attempt(fn(*args, **kwargs), attempt_number, False) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 111, in checkout revision=Revision(opt_revision_kind.number, revision)) pysvn._pysvn_3_5.ClientError: '/tmp/tmp.fc9qu8sx.swh.loader/org-trello' is already locked via '/tmp/tmp.fc9qu8sx.swh.loader/org-trello'. #+END_SRC * node not found ??? #+BEGIN_SRC sh -[2016-04-03 12:45:41,468: ERROR/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepositoryTsk[9bf99330-4960-41ab-989f-3b172ffa92e6] raised unexpected: ValueError(ClientError("The node '/tmp/tmp.kra26_aa.swh.loader/dot-files/LICENSE.txt' was not found.",),) +[2016-04-03 12:45:41,468: ERROR/MainProcess] Task swh.loader.svn.tasks.LoadSvnRepository[9bf99330-4960-41ab-989f-3b172ffa92e6] raised unexpected: ValueError(ClientError("The node '/tmp/tmp.kra26_aa.swh.loader/dot-files/LICENSE.txt' was not found.",),) Traceback (most recent call last): File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 240, in trace_task R = retval = fun(*args, **kwargs) File "/usr/lib/python3/dist-packages/celery/app/trace.py", line 438, in __protected_call__ return self.run(*args, **kwargs) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/tasks.py", line 88, in run result = SvnLoader(config).process(svn_url, origin, local_path) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 159, in process self.config['revision_packet_size']): File "/home/tony/work/inria/repo/swh-environment/swh-core/swh/core/utils.py", line 25, in grouper for _data in itertools.zip_longest(*args, fillvalue=fv): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/loader.py", line 59, in process_revisions revision_start, revision_end): File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 300, in swh_hash_data_per_revision self.checkout(revision=rev) File "/usr/lib/python3/dist-packages/retrying.py", line 49, in wrapped_f return Retrying(*dargs, **dkw).call(f, *args, **kw) File "/usr/lib/python3/dist-packages/retrying.py", line 212, in call raise attempt.get() File "/usr/lib/python3/dist-packages/retrying.py", line 247, in get six.reraise(self.value[0], self.value[1], self.value[2]) File "/usr/local/lib/python3.5/dist-packages/six.py", line 611, in reraise raise value File "/usr/lib/python3/dist-packages/retrying.py", line 200, in call attempt = Attempt(fn(*args, **kwargs), attempt_number, False) File "/home/tony/work/inria/repo/swh-environment/swh-loader-svn/swh/loader/svn/svn.py", line 135, in checkout raise SvnRepoException(self, e) ValueError: The node '/tmp/tmp.kra26_aa.swh.loader/dot-files/LICENSE.txt' was not found. #+END_SRC diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py index 9a7113b..de85021 100644 --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -1,565 +1,565 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """Loader in charge of injecting either new or existing svn mirrors to swh-storage. """ import os import shutil import tempfile from swh.model import hashutil from swh.model.from_disk import Directory from swh.model.identifiers import identifier_to_bytes, revision_identifier from swh.model.identifiers import snapshot_identifier from swh.loader.core.loader import SWHLoader from swh.loader.core.utils import clean_dangling_folders from . import svn, converters from .utils import init_svn_repo_from_archive_dump from .exception import SvnLoaderUneventful from .exception import SvnLoaderHistoryAltered DEFAULT_BRANCH = b'master' def _revision_id(revision): return identifier_to_bytes(revision_identifier(revision)) def build_swh_snapshot(revision_id, branch=DEFAULT_BRANCH): """Build a swh snapshot from the revision id, origin id, and visit. """ return { 'id': None, 'branches': { branch: { 'target': revision_id, 'target_type': 'revision', } } } TEMPORARY_DIR_PREFIX_PATTERN = 'swh.loader.svn.' -class SWHSvnLoader(SWHLoader): +class SvnLoader(SWHLoader): """Swh svn loader to load an svn repository The repository is either remote or local. The loader deals with update on an already previously loaded repository. Default policy: Keep data as close as possible from the original svn data. We only add information that are needed for update or continuing from last known revision (svn revision and svn repository's uuid). """ CONFIG_BASE_FILENAME = 'loader/svn' ADDITIONAL_CONFIG = { 'check_revision': ('int', 1000), 'temp_directory': ('str', '/tmp'), 'debug': ('bool', False), # NOT FOR PRODUCTION, False is mandatory } def __init__(self): super().__init__(logging_class='swh.loader.svn.SvnLoader') self.check_revision = self.config['check_revision'] self.origin_id = None self.debug = self.config['debug'] self.last_seen_revision = None self.temp_directory = self.config['temp_directory'] self.done = False # internal state used to store swh objects self._contents = [] self._directories = [] self._revisions = [] self._snapshot = None self._last_revision = None self._visit_status = 'full' self._load_status = 'uneventful' def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed tasks) """ clean_dangling_folders(self.temp_directory, pattern_check=TEMPORARY_DIR_PREFIX_PATTERN, log=self.log) def cleanup(self): """Clean up the svn repository's working representation on disk. """ if not hasattr(self, 'svnrepo'): # could happen if `prepare` fails # nothing to do in that case return if self.debug: self.log.error('''NOT FOR PRODUCTION - debug flag activated Local repository not cleaned up for investigation: %s''' % ( self.svnrepo.local_url.decode('utf-8'), )) return self.svnrepo.clean_fs() def swh_revision_hash_tree_at_svn_revision(self, revision): """Compute and return the hash tree at a given svn revision. Args: rev (int): the svn revision we want to check Returns: The hash tree directory as bytes. """ local_dirname, local_url = self.svnrepo.export_temporary(revision) h = Directory.from_disk(path=local_url).hash self.svnrepo.clean_fs(local_dirname) return h def get_svn_repo(self, svn_url, local_dirname, origin): """Instantiates the needed svnrepo collaborator to permit reading svn repository. Args: svn_url (str): the svn repository url to read from local_dirname (str): the local path on disk to compute data origin (int): the corresponding origin Returns: Instance of :mod:`swh.loader.svn.svn` clients """ - return svn.SWHSvnRepo( + return svn.SvnRepo( svn_url, origin['id'], self.storage, local_dirname=local_dirname) def swh_latest_snapshot_revision(self, origin_id, previous_swh_revision=None): """Look for latest snapshot revision and returns it if any. Args: origin_id (int): Origin identifier previous_swh_revision: (optional) id of a possible previous swh revision Returns: dict: The latest known point in time. Dict with keys: 'revision': latest visited revision 'snapshot': latest snapshot If None is found, return an empty dict. """ storage = self.storage if not previous_swh_revision: # check latest snapshot's revision latest_snap = storage.snapshot_get_latest(origin_id) if latest_snap: branches = latest_snap.get('branches') if not branches: return {} branch = branches.get(DEFAULT_BRANCH) if not branch: return {} target_type = branch['target_type'] if target_type != 'revision': return {} previous_swh_revision = branch['target'] else: return {} if isinstance(previous_swh_revision, dict): swh_id = previous_swh_revision['id'] else: swh_id = previous_swh_revision revs = list(storage.revision_get([swh_id])) if revs: return { 'snapshot': latest_snap, 'revision': revs[0] } return {} def build_swh_revision(self, rev, commit, dir_id, parents): """Build the swh revision dictionary. This adds: - the `'synthetic`' flag to true - the '`extra_headers`' containing the repository's uuid and the svn revision number. Args: rev (dict): the svn revision commit (dict): the commit metadata dir_id (bytes): the upper tree's hash identifier parents ([bytes]): the parents' identifiers Returns: The swh revision corresponding to the svn revision. """ return converters.build_swh_revision(rev, commit, self.svnrepo.uuid, dir_id, parents) def check_history_not_altered(self, svnrepo, revision_start, swh_rev): """Given a svn repository, check if the history was not tampered with. """ revision_id = swh_rev['id'] parents = swh_rev['parents'] hash_data_per_revs = svnrepo.swh_hash_data_at_revision(revision_start) rev = revision_start rev, _, commit, _, root_dir = list(hash_data_per_revs)[0] dir_id = root_dir.hash swh_revision = self.build_swh_revision(rev, commit, dir_id, parents) swh_revision_id = _revision_id(swh_revision) return swh_revision_id == revision_id def _init_from(self, partial_swh_revision, previous_swh_revision): """Function to determine from where to start from. Args: partial_swh_revision (dict): A known revision from which the previous loading did not finish. known_previous_revision (dict): A known revision from which the previous loading did finish. Returns: The revision from which to start or None if nothing (fresh start). """ if partial_swh_revision and not previous_swh_revision: return partial_swh_revision if not partial_swh_revision and previous_swh_revision: return previous_swh_revision if partial_swh_revision and previous_swh_revision: # will determine from which to start from extra_headers1 = dict( partial_swh_revision['metadata']['extra_headers']) extra_headers2 = dict( previous_swh_revision['metadata']['extra_headers']) rev_start1 = int(extra_headers1['svn_revision']) rev_start2 = int(extra_headers2['svn_revision']) if rev_start1 <= rev_start2: return previous_swh_revision return partial_swh_revision return None def start_from(self, last_known_swh_revision=None, start_from_scratch=False): """Determine from where to start the loading. Args: last_known_swh_revision (dict): Last know swh revision or None start_from_scratch (bool): To start loading from scratch or not Returns: tuple (revision_start, revision_end, revision_parents) Raises: SvnLoaderHistoryAltered: When a hash divergence has been detected (should not happen) SvnLoaderUneventful: Nothing changed since last visit """ revision_head = self.svnrepo.head_revision() if revision_head == 0: # empty repository case revision_start = 0 revision_end = 0 else: # default configuration revision_start = self.svnrepo.initial_revision() revision_end = revision_head revision_parents = { revision_start: [] } if not start_from_scratch: # Check if we already know a previous revision for that origin if self.latest_snapshot: swh_rev = self.latest_snapshot['revision'] else: swh_rev = None # Determine from which known revision to start swh_rev = self._init_from(last_known_swh_revision, previous_swh_revision=swh_rev) if swh_rev: # Yes, we know a previous revision. Try and update it. extra_headers = dict(swh_rev['metadata']['extra_headers']) revision_start = int(extra_headers['svn_revision']) revision_parents = { revision_start: swh_rev['parents'], } self.log.debug('svn export --ignore-keywords %s@%s' % ( self.svnrepo.remote_url, revision_start)) if swh_rev and not self.check_history_not_altered( self.svnrepo, revision_start, swh_rev): msg = 'History of svn %s@%s altered. ' \ 'Skipping...' % ( self.svnrepo.remote_url, revision_start) raise SvnLoaderHistoryAltered(msg) # now we know history is ok, we start at next revision revision_start = revision_start + 1 # and the parent become the latest know revision for # that repository revision_parents[revision_start] = [swh_rev['id']] if revision_start > revision_end and revision_start is not 1: msg = '%s@%s already injected.' % (self.svnrepo.remote_url, revision_end) raise SvnLoaderUneventful(msg) self.log.info('Processing revisions [%s-%s] for %s' % ( revision_start, revision_end, self.svnrepo)) return revision_start, revision_end, revision_parents def process_svn_revisions(self, svnrepo, revision_start, revision_end, revision_parents): """Process svn revisions from revision_start to revision_end. At each svn revision, checkout the repository, compute the tree hash and blobs and send for swh storage to store. Then computes and yields the computed swh contents, directories, revision. Note that at every self.check_revision, an svn export is done and a hash tree is computed to check that no divergence occurred. Yields: tuple (contents, directories, revision) of dict as a dictionary with keys, sha1_git, sha1, etc... Raises: ValueError in case of a hash divergence detection """ gen_revs = svnrepo.swh_hash_data_per_revision( revision_start, revision_end) swh_revision = None count = 0 for rev, nextrev, commit, new_objects, root_directory in gen_revs: count += 1 # Send the associated contents/directories _contents = new_objects.get('content', {}).values() _directories = new_objects.get('directory', {}).values() # compute the fs tree's checksums dir_id = root_directory.hash swh_revision = self.build_swh_revision( rev, commit, dir_id, revision_parents[rev]) swh_revision['id'] = _revision_id(swh_revision) self.log.debug('rev: %s, swhrev: %s, dir: %s' % ( rev, hashutil.hash_to_hex(swh_revision['id']), hashutil.hash_to_hex(dir_id))) # FIXME: Is that still necessary? Rationale: T570 is now closed if (count % self.check_revision) == 0: # hash computation check self.log.debug('Checking hash computations on revision %s...' % rev) checked_dir_id = self.swh_revision_hash_tree_at_svn_revision( rev) if checked_dir_id != dir_id: err = 'Hash tree computation divergence detected ' \ '(%s != %s), stopping!' % ( hashutil.hash_to_hex(dir_id), hashutil.hash_to_hex(checked_dir_id)) raise ValueError(err) if nextrev: revision_parents[nextrev] = [swh_revision['id']] yield _contents, _directories, swh_revision def prepare_origin_visit(self, *, svn_url, visit_date=None, origin_url=None, **kwargs): self.origin = { 'url': origin_url if origin_url else svn_url, 'type': 'svn', } self.visit_date = visit_date def prepare(self, *, svn_url, destination_path=None, swh_revision=None, start_from_scratch=False, **kwargs): self.start_from_scratch = start_from_scratch if swh_revision: self.last_known_swh_revision = swh_revision else: self.last_known_swh_revision = None self.latest_snapshot = self.swh_latest_snapshot_revision( self.origin_id, self.last_known_swh_revision) if destination_path: local_dirname = destination_path else: local_dirname = tempfile.mkdtemp( suffix='-%s' % os.getpid(), prefix=TEMPORARY_DIR_PREFIX_PATTERN, dir=self.temp_directory) self.svnrepo = self.get_svn_repo(svn_url, local_dirname, self.origin) try: revision_start, revision_end, revision_parents = self.start_from( self.last_known_swh_revision, self.start_from_scratch) self.swh_revision_gen = self.process_svn_revisions( self.svnrepo, revision_start, revision_end, revision_parents) except SvnLoaderUneventful as e: self.log.warn(e) if self.latest_snapshot and 'snapshot' in self.latest_snapshot: self._snapshot = self.latest_snapshot['snapshot'] self.done = True except SvnLoaderHistoryAltered as e: self.log.error(e) self.done = True self._visit_status = 'partial' def fetch_data(self): """Fetching svn revision information. This will apply svn revision as patch on disk, and at the same time, compute the swh hashes. In effect, fetch_data fetches those data and compute the necessary swh objects. It's then stored in the internal state instance variables (initialized in `_prepare_state`). This is up to `store_data` to actually discuss with the storage to store those objects. Returns: bool: True to continue fetching data (next svn revision), False to stop. """ data = None if self.done: return False try: data = next(self.swh_revision_gen) self._load_status = 'eventful' except StopIteration: self.done = True self._visit_status = 'full' return False # Stopping iteration except Exception as e: # Potential: svn:external, i/o error... self.done = True self._visit_status = 'partial' return False # Stopping iteration self._contents, self._directories, revision = data if revision: self._last_revision = revision self._revisions.append(revision) return True # next svn revision def store_data(self): """We store the data accumulated in internal instance variable. If the iteration over the svn revisions is done, we create the snapshot and flush to storage the data. This also resets the internal instance variable state. """ self.maybe_load_contents(self._contents) self.maybe_load_directories(self._directories) self.maybe_load_revisions(self._revisions) if self.done: # finish line, snapshot! self.generate_and_load_snapshot(revision=self._last_revision, snapshot=self._snapshot) self.flush() self._contents = [] self._directories = [] self._revisions = [] def generate_and_load_snapshot(self, revision=None, snapshot=None): """Create the snapshot either from existing revision or snapshot. Revision (supposedly new) has priority over the snapshot (supposedly existing one). Args: revision (dict): Last revision seen if any (None by default) snapshot (dict): Snapshot to use if any (None by default) """ if revision: # Priority to the revision snap = build_swh_snapshot(revision['id']) snap['id'] = identifier_to_bytes(snapshot_identifier(snap)) elif snapshot: # Fallback to prior snapshot snap = snapshot else: return None self.log.debug('snapshot: %s' % snap) self.maybe_load_snapshot(snap) def load_status(self): return { 'status': self._load_status, } def visit_status(self): return self._visit_status -class SWHSvnLoaderFromDumpArchive(SWHSvnLoader): +class SvnLoaderFromDumpArchive(SvnLoader): """Uncompress an archive containing an svn dump, mount the svn dump as an svn repository and load said repository. """ def __init__(self, archive_path): super().__init__() self.log.info('Archive to mount and load %s' % archive_path) self.temp_dir, self.repo_path = init_svn_repo_from_archive_dump( archive_path, prefix=TEMPORARY_DIR_PREFIX_PATTERN, suffix='-%s' % os.getpid(), root_dir=self.temp_directory) def cleanup(self): super().cleanup() if self.temp_dir and os.path.exists(self.temp_dir): msg = 'Clean up temporary directory dump %s for project %s' % ( self.temp_dir, os.path.basename(self.repo_path)) self.log.debug(msg) shutil.rmtree(self.temp_dir) diff --git a/swh/loader/svn/producer.py b/swh/loader/svn/producer.py index 2dc053e..bf71d8f 100644 --- a/swh/loader/svn/producer.py +++ b/swh/loader/svn/producer.py @@ -1,210 +1,210 @@ # Copyright (C) 2015-2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import click import datetime import sys from swh.core import utils from swh.scheduler.utils import get_task from swh.scheduler.backend import SchedulerBackend def _produce_svn_to_load(svn_url, origin_url, destination_path=None, visit_date=None): """Yield svn url(s) parameters for producers. Those urls can either be read from stdin or directly passed as argument. Either the svn_url is passed and only 1 svn url is sent. Either no svn_url is provided and those urls are read from stdin and yielded as parameters for producers down the line. Args: svn_url (str / None): Potential svn url to load origin_url (str / None): Potential associated origin url destination_path (str): Destination path visit_date (date): Forcing the visit date Yields tuple svn_url, origin_url, visit_date, destination_path """ if svn_url: yield svn_url, origin_url, visit_date, destination_path # input from stdin, so we ignore most of the function's input for line in sys.stdin: line = line.rstrip() data = line.split(' ') svn_url = data[0] if len(data) > 1: origin_url = data[1] else: origin_url = None if svn_url: yield svn_url, origin_url, visit_date, destination_path def _produce_archive_to_mount_and_load(archive_path, visit_date): """Yield svn dump(s) parameters for producers. Those dumps can either be read from stdin or directly passed as argument. Either the archive_url is passed and only 1 dump is sent. Either no archive_path is provided and those dumps are read from stdin and yielded as parameters for producers down the line. Args: svn_url (str / None): Potential svn url to load origin_url (str / None): Potential associated origin url destination_path (str): Destination path visit_date (date): Forcing the visit date Yields tuple archive_path, origin_url, visit_date """ if archive_path: yield archive_path, None, visit_date for line in sys.stdin: line = line.rstrip() data = line.split(' ') archive_path = data[0] if len(data) > 1: origin_url = data[1] else: origin_url = None if archive_path: yield archive_path, origin_url, visit_date @click.group() def cli(): pass @cli.command('svn', help='Default svn urls producer') @click.option('--url', help="svn repository's mirror url.") @click.option('--origin-url', default=None, help='svn repository\'s original remote url ' '(if different than --svn-url).') @click.option('--destination-path', help="(optional) svn checkout destination.") @click.option('--visit-date', help="(optional) visit date to override") @click.option('--synchroneous', is_flag=True, help="To execute directly the svn loading.") @click.option('--dry-run/--no-dry-run', default=False, is_flag=True, help="Dry run flag") @click.option('--start-from-scratch', default=False, is_flag=True, help="Start from scratch option") def produce_svn_to_load(url, origin_url, destination_path, visit_date, synchroneous, dry_run, start_from_scratch): """Produce svn urls to celery queue """ - task = get_task('swh.loader.svn.tasks.LoadSWHSvnRepositoryTsk') + task = get_task('swh.loader.svn.tasks.LoadSvnRepository') if synchroneous: fn = task else: fn = task.delay for args in _produce_svn_to_load(svn_url=url, origin_url=origin_url, visit_date=visit_date, destination_path=destination_path): print(args) if dry_run: continue svn_url, origin_url, visit_date, destination_path = args fn(svn_url=svn_url, origin_url=origin_url, visit_date=visit_date, destination_path=destination_path, start_from_scratch=start_from_scratch) @cli.command('svn-archive', help='Default svndump archive producer') @click.option('--visit-date', help="(optional) visit date to override") @click.option('--path', help="Archive's Path to load and mount") @click.option('--synchroneous', is_flag=True, help="To execute directly the svn loading.") @click.option('--dry-run/--no-dry-run', default=False, is_flag=True, help="Dry run flag") @click.option('--start-from-scratch', default=False, is_flag=True, help="Start from scratch option") def produce_archive_to_mount_and_load(path, visit_date, synchroneous, dry_run, start_from_scratch): """Produce svn dumps to celery queue """ - task = get_task('swh.loader.svn.tasks.MountAndLoadSvnRepositoryTsk') + task = get_task('swh.loader.svn.tasks.MountAndLoadSvnRepository') if synchroneous: fn = task else: fn = task.delay for args in _produce_archive_to_mount_and_load(path, visit_date): print(args) if dry_run: continue archive_path, origin_url, visit_date = args fn(archive_path, origin_url, visit_date, start_from_scratch=start_from_scratch) @cli.command('schedule-svn-archive', help='Default svndump archive mounting and loading scheduling') @click.option('--visit-date', help="(optional) visit date to override") @click.option('--path', help="Archive's Path to load and mount") @click.option('--dry-run/--no-dry-run', default=False, is_flag=True, help="Dry run flag") def schedule_archive_to_mount_and_load(path, visit_date, dry_run): """Produce svn dumps to scheduler backend """ scheduler = SchedulerBackend() def make_scheduler_task(path, origin_url, visit_date): return { 'policy': 'oneshot', 'type': 'swh-loader-mount-dump-and-load-svn-repository', 'next_run': datetime.datetime.now(tz=datetime.timezone.utc), 'arguments': { 'args': [ path, ], 'kwargs': { 'origin_url': origin_url, 'visit_date': visit_date, }, } } for tasks in utils.grouper( _produce_archive_to_mount_and_load(path, visit_date), n=1000): tasks = [make_scheduler_task(*t) for t in tasks] print('[%s, ...]' % tasks[0]) if dry_run: continue scheduler.create_tasks(tasks) if __name__ == '__main__': cli() diff --git a/swh/loader/svn/svn.py b/swh/loader/svn/svn.py index a11b10a..00b832d 100644 --- a/swh/loader/svn/svn.py +++ b/swh/loader/svn/svn.py @@ -1,268 +1,268 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """SVN client in charge of iterating over svn logs and yield commit representations including the hash tree/content computations per svn commit. """ import logging import os import tempfile import shutil from subvertpy.ra import RemoteAccess, Auth, get_username_provider from subvertpy import client, properties from swh.model.from_disk import Directory from . import ra, converters # When log message contains empty data DEFAULT_AUTHOR_MESSAGE = '' -class SWHSvnRepo: +class SvnRepo: """SWH's svn repository representation. Args: remote_url (str): origin_id (int): Associated origin identifier storage (Storage): Storage to use to execute storage statements local_dirname (str): Path to write intermediary svn action results """ def __init__(self, remote_url, origin_id, storage, local_dirname): self.remote_url = remote_url.rstrip('/') self.storage = storage self.origin_id = origin_id auth = Auth([get_username_provider()]) # one connection for log iteration self.conn_log = RemoteAccess(self.remote_url, auth=auth) # another for replay self.conn = RemoteAccess(self.remote_url, auth=auth) # one client for update operation self.client = client.Client(auth=auth) self.local_dirname = local_dirname local_name = os.path.basename(self.remote_url) self.local_url = os.path.join(self.local_dirname, local_name).encode( 'utf-8') self.uuid = self.conn.get_uuid().encode('utf-8') self.swhreplay = ra.SWHReplay(conn=self.conn, rootpath=self.local_url) def __str__(self): return str({ 'swh-origin': self.origin_id, 'remote_url': self.remote_url, 'local_url': self.local_url, 'uuid': self.uuid, }) def head_revision(self): """Retrieve current head revision. """ return self.conn.get_latest_revnum() def initial_revision(self): """Retrieve the initial revision from which the remote url appeared. """ return 1 def convert_commit_message(self, msg): """Simply encode the commit message. Args: msg (str): the commit message to convert. Returns: The transformed message as bytes. """ if isinstance(msg, bytes): return msg return msg.encode('utf-8') def convert_commit_date(self, date): """Convert the message commit date into a timestamp in swh format. The precision is kept. Args: date (str): the commit date to convert. Returns: The transformed date. """ return converters.svn_date_to_swh_date(date) def convert_commit_author(self, author): """Convert the commit author into an swh person. The user becomes a dictionary of the form:: { name: author, email: '', fullname: author } Args: author (str): the commit author to convert. Returns: The transformed author as dict. """ return converters.svn_author_to_swh_person(author) def __to_entry(self, log_entry): changed_paths, rev, revprops, has_children = log_entry author_date = self.convert_commit_date( revprops.get(properties.PROP_REVISION_DATE)) author = self.convert_commit_author( revprops.get(properties.PROP_REVISION_AUTHOR)) message = self.convert_commit_message( revprops.get(properties.PROP_REVISION_LOG, DEFAULT_AUTHOR_MESSAGE)) return { 'rev': rev, 'author_date': author_date, 'author_name': author, 'message': message, } def logs(self, revision_start, revision_end): """Stream svn logs between revision_start and revision_end by chunks of block_size logs. Yields revision and associated revision information between the revision start and revision_end. Args: revision_start: the svn revision starting bound revision_end: the svn revision ending bound Yields: tuple: tuple of revisions and logs: - revisions: list of revisions in order - logs: Dictionary with key revision number and value the log entry. The log entry is a dictionary with the following keys: - author_date: date of the commit - author_name: name of the author - message: commit message """ for log_entry in self.conn_log.iter_log(paths=None, start=revision_start, end=revision_end, discover_changed_paths=False): yield self.__to_entry(log_entry) def export(self, revision): """Export the repository to a given version. """ self.client.export(self.remote_url, to=self.local_url.decode('utf-8'), rev=revision, ignore_keywords=True) def export_temporary(self, revision): """Export the repository to a given revision in a temporary location. This is up to the caller of this function to clean up the temporary location when done (cf. self.clean_fs method) Args: revision: Revision to export at Returns: The tuple local_dirname the temporary location root folder, local_url where the repository was exported. """ local_dirname = tempfile.mkdtemp( prefix='check-revision-%s.' % revision, dir=self.local_dirname) local_name = os.path.basename(self.remote_url) local_url = os.path.join(local_dirname, local_name) self.client.export( self.remote_url, to=local_url, rev=revision, ignore_keywords=True) return local_dirname, os.fsencode(local_url) def swh_hash_data_per_revision(self, start_revision, end_revision): """Compute swh hash data per each revision between start_revision and end_revision. Args: start_revision: starting revision end_revision: ending revision Yields: tuple (rev, nextrev, commit, objects_per_path) - rev: current revision - nextrev: next revision - commit: commit data (author, date, message) for such revision - objects_per_path: dictionary of path, swh hash data with type """ for commit in self.logs(start_revision, end_revision): rev = commit['rev'] objects = self.swhreplay.compute_hashes(rev) if rev == end_revision: nextrev = None else: nextrev = rev + 1 yield rev, nextrev, commit, objects, self.swhreplay.directory def swh_hash_data_at_revision(self, revision): """Compute the hash data at revision. Expected to be used for update only. """ # Update the disk at revision self.export(revision) # Compute the current hashes on disk directory = Directory.from_disk(path=os.fsencode(self.local_url), save_path=True) # Update the replay collaborator with the right state self.swhreplay = ra.SWHReplay( conn=self.conn, rootpath=self.local_url, directory=directory) # Retrieve the commit information for revision commit = list(self.logs(revision, revision))[0] yield revision, revision + 1, commit, {}, directory def clean_fs(self, local_dirname=None): """Clean up the local working copy. Args: local_dirname (str): Path to remove recursively if provided. Otherwise, remove the temporary upper root tree used for svn repository loading. """ dirname = local_dirname if local_dirname else self.local_dirname if os.path.exists(dirname): logging.debug('cleanup %s' % dirname) shutil.rmtree(dirname) diff --git a/swh/loader/svn/tasks.py b/swh/loader/svn/tasks.py index 1e49255..59b6241 100644 --- a/swh/loader/svn/tasks.py +++ b/swh/loader/svn/tasks.py @@ -1,65 +1,65 @@ # Copyright (C) 2015-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.scheduler.task import Task -from .loader import SWHSvnLoader, SWHSvnLoaderFromDumpArchive +from .loader import SvnLoader, SvnLoaderFromDumpArchive -class LoadSWHSvnRepositoryTsk(Task): +class LoadSvnRepository(Task): """Import one svn repository to Software Heritage. """ task_queue = 'swh_loader_svn' def run_task(self, *, svn_url, destination_path=None, swh_revision=None, origin_url=None, visit_date=None, start_from_scratch=None): """Import a svn repository with swh policy. Args: args: ordered arguments (expected None) kwargs: Dictionary with the following expected keys: - svn_url (str): (mandatory) svn's repository url - destination_path (str): (mandatory) root directory to locally retrieve svn's data - origin_url (str): Optional original url override - swh_revision (dict): (optional) extra SWH revision hex to start from. see swh.loader.svn.SvnLoader.process docstring """ - loader = SWHSvnLoader() + loader = SvnLoader() loader.log = self.log return loader.load( svn_url=svn_url, destination_path=destination_path, origin_url=origin_url, swh_revision=swh_revision, visit_date=visit_date, start_from_scratch=start_from_scratch) -class MountAndLoadSvnRepositoryTsk(Task): +class MountAndLoadSvnRepository(Task): task_queue = 'swh_loader_svn_mount_and_load' def run_task(self, *, archive_path, origin_url=None, visit_date=None, start_from_scratch=False): """1. Mount an svn dump from archive as a local svn repository. 2. Load it through the svn loader. 3. Clean up mounted svn repository archive. """ - loader = SWHSvnLoaderFromDumpArchive(archive_path) + loader = SvnLoaderFromDumpArchive(archive_path) loader.log = self.log return loader.load(svn_url='file://%s' % loader.repo_path, origin_url=origin_url, visit_date=visit_date, archive_path=archive_path, start_from_scratch=start_from_scratch) diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py index cd7374e..71a00ec 100644 --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -1,905 +1,905 @@ # Copyright (C) 2016-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from nose.tools import istest from test_base import BaseTestSvnLoader from unittest import TestCase from swh.model import hashutil from swh.loader.svn.loader import build_swh_snapshot, DEFAULT_BRANCH -from swh.loader.svn.loader import SWHSvnLoader +from swh.loader.svn.loader import SvnLoader from swh.loader.svn.exception import SvnLoaderEventful, SvnLoaderUneventful from swh.loader.svn.exception import SvnLoaderHistoryAltered class TestSWHSnapshot(TestCase): @istest def build_swh_snapshot(self): actual_snap = build_swh_snapshot('revision-id') self.assertEquals(actual_snap, { 'id': None, 'branches': { DEFAULT_BRANCH: { 'target': 'revision-id', 'target_type': 'revision', } } }) # Define loaders with no storage # They'll just accumulate the data in place # Only for testing purposes. class TestSvnLoader: """Mixin class to inhibit the persistence and keep in memory the data sent for storage. - cf. SWHSvnLoaderNoStorage + cf. SvnLoaderNoStorage """ def __init__(self): super().__init__() self.all_contents = [] self.all_directories = [] self.all_revisions = [] self.all_releases = [] self.all_snapshots = [] # Check at each svn revision that the hash tree computation # does not diverge self.check_revision = 10 # typed data self.objects = { 'content': self.all_contents, 'directory': self.all_directories, 'revision': self.all_revisions, 'release': self.all_releases, 'snapshot': self.all_snapshots, } def _add(self, type, l): """Add without duplicates and keeping the insertion order. Args: type (str): Type of objects concerned by the action l ([object]): List of 'type' object """ col = self.objects[type] for o in l: if o in col: continue col.extend([o]) def maybe_load_contents(self, all_contents): self._add('content', all_contents) def maybe_load_directories(self, all_directories): self._add('directory', all_directories) def maybe_load_revisions(self, all_revisions): self._add('revision', all_revisions) def maybe_load_releases(self, releases): raise ValueError('If called, the test must break.') def maybe_load_snapshot(self, snapshot): self._add('snapshot', [snapshot]) def send_origin(self, origin): return 1 # Override to do nothing at the end def close_failure(self): pass def close_success(self): pass def pre_cleanup(self): pass -class SvnLoaderNoStorage(TestSvnLoader, SWHSvnLoader): +class SvnLoaderNoStorage(TestSvnLoader, SvnLoader): """An SWHSVNLoader with no persistence. Context: Load a new svn repository using the swh policy (so no update). """ def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None): """We do not know this repository so no revision. """ return {} -class SvnLoaderUpdateNoStorage(TestSvnLoader, SWHSvnLoader): +class SvnLoaderUpdateNoStorage(TestSvnLoader, SvnLoader): """An SWHSVNLoader with no persistence. Context: Load a known svn repository using the swh policy. We can either: - do nothing since it does not contain any new commit (so no change) - either check its history is not altered and update in consequence by loading the new revision """ def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None): """Avoid the storage persistence call and return the expected previous revision for that repository. Check the following for explanation about the hashes: - test_loader.org for (swh policy). - - cf. SWHSvnLoaderITTest + - cf. SvnLoaderITTest """ return { 'snapshot': 'something', # need a snapshot of sort 'revision': { 'id': hashutil.hash_to_bytes( '4876cb10aec6f708f7466dddf547567b65f6c39c'), 'parents': [hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41')], 'directory': hashutil.hash_to_bytes( '0deab3023ac59398ae467fc4bff5583008af1ee2'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], ['svn_revision', '6'] ] } } } -class SvnLoaderUpdateHistoryAlteredNoStorage(TestSvnLoader, SWHSvnLoader): +class SvnLoaderUpdateHistoryAlteredNoStorage(TestSvnLoader, SvnLoader): """An SWHSVNLoader with no persistence. Context: Load a known svn repository using the swh policy with its history altered so we do not update it. """ def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None): """Avoid the storage persistence call and return the expected previous revision for that repository. Check the following for explanation about the hashes: - test_loader.org for (swh policy). - - cf. SWHSvnLoaderITTest + - cf. SvnLoaderITTest """ return { 'snapshot': None, 'revision': { # Changed the revision id's hash to simulate history altered 'id': hashutil.hash_to_bytes( 'badbadbadbadf708f7466dddf547567b65f6c39d'), 'parents': [hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41')], 'directory': hashutil.hash_to_bytes( '0deab3023ac59398ae467fc4bff5583008af1ee2'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], ['svn_revision', b'6'] ] } } } class SvnLoaderITest1(BaseTestSvnLoader): """Load an unknown svn repository results in new data. """ def setUp(self): super().setUp() self.loader = SvnLoaderNoStorage() @istest def load(self): """Load a new repository results in new swh object and snapshot """ # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then self.assertEquals(len(self.loader.all_revisions), 6) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '4876cb10aec6f708f7466dddf547567b65f6c39c' # cf. test_loader.org for explaining from where those hashes # come from expected_revisions = { # revision hash | directory hash '0d7dd5f751cef8fe17e8024f7d6b0e3aac2cfd71': '669a71cce6c424a81ba42b7dc5d560d32252f0ca', # noqa '95edacc8848369d6fb1608e887d6d2474fd5224f': '008ac97a1118560797c50e3392fa1443acdaa349', # noqa 'fef26ea45a520071711ba2b9d16a2985ee837021': '3780effbe846a26751a95a8c95c511fb72be15b4', # noqa '3f51abf3b3d466571be0855dfa67e094f9ceff1b': 'ffcca9b09c5827a6b8137322d4339c8055c3ee1e', # noqa 'a3a577948fdbda9d1061913b77a1588695eadb41': '7dc52cc04c3b8bd7c085900d60c159f7b846f866', # noqa last_revision: '0deab3023ac59398ae467fc4bff5583008af1ee2', # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITest2(BaseTestSvnLoader): """Load a visited repository with no new change results in no data change. """ def setUp(self): super().setUp() self.loader = SvnLoaderUpdateNoStorage() @istest def load(self): """Load a repository without new changes results in same snapshot """ # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then self.assertEquals(len(self.loader.all_contents), 0) self.assertEquals(len(self.loader.all_directories), 0) self.assertEquals(len(self.loader.all_revisions), 0) self.assertEquals(len(self.loader.all_releases), 0) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'uneventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITest3(BaseTestSvnLoader): """In this scenario, the dump has been tampered with to modify the commit log. This results in a hash divergence which is detected at startup. In effect, that stops the loading and do nothing. """ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderUpdateHistoryAlteredNoStorage() @istest def load(self): """Load known repository with history altered should do nothing """ # when self.loader.load(svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then # we got the previous run's last revision (rev 6) # so 2 news + 1 old self.assertEquals(len(self.loader.all_contents), 0) self.assertEquals(len(self.loader.all_directories), 0) self.assertEquals(len(self.loader.all_revisions), 0) self.assertEquals(len(self.loader.all_releases), 0) self.assertEquals(len(self.loader.all_snapshots), 0) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'uneventful'}) self.assertEqual(self.loader.visit_status(), 'partial') class SvnLoaderITest4(BaseTestSvnLoader): """In this scenario, the repository has been updated with new changes. The loading visit should result in new objects stored and 1 new snapshot. """ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderUpdateNoStorage() @istest def process_repository(self): """Process updated repository should yield new objects """ # when self.loader.load(svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then # we got the previous run's last revision (rev 6) # so 2 new self.assertEquals(len(self.loader.all_revisions), 5) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '171dc35522bfd17dda4e90a542a0377fb2fc707a' # cf. test_loader.org for explaining from where those hashes # come from expected_revisions = { # revision hash | directory hash '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa last_revision: 'fd24a76c87a3207428e06612b49860fc78e9f6dc' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITTest5(BaseTestSvnLoader): """Context: - Repository already injected with successfull data - New visit from scratch done with successfull load """ def setUp(self): # the svn repository pkg-gourmet has been updated with changes super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderUpdateNoStorage() @istest def load(self): """Load an existing repository from scratch yields same swh objects """ # when self.loader.load(svn_url=self.svn_mirror_url, destination_path=self.destination_path, start_from_scratch=True) # then # we got the previous run's last revision (rev 6) # but we do not inspect that as we start from from scratch so # we should have all revisions so 11 self.assertEquals(len(self.loader.all_revisions), 11) self.assertEquals(len(self.loader.all_releases), 0) expected_revisions = { '0d7dd5f751cef8fe17e8024f7d6b0e3aac2cfd71': '669a71cce6c424a81ba42b7dc5d560d32252f0ca', # noqa '95edacc8848369d6fb1608e887d6d2474fd5224f': '008ac97a1118560797c50e3392fa1443acdaa349', # noqa 'fef26ea45a520071711ba2b9d16a2985ee837021': '3780effbe846a26751a95a8c95c511fb72be15b4', # noqa '3f51abf3b3d466571be0855dfa67e094f9ceff1b': 'ffcca9b09c5827a6b8137322d4339c8055c3ee1e', # noqa 'a3a577948fdbda9d1061913b77a1588695eadb41': '7dc52cc04c3b8bd7c085900d60c159f7b846f866', # noqa '4876cb10aec6f708f7466dddf547567b65f6c39c': '0deab3023ac59398ae467fc4bff5583008af1ee2', # noqa '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa '171dc35522bfd17dda4e90a542a0377fb2fc707a': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') -class SvnLoaderWithPreviousRevisionNoStorage(TestSvnLoader, SWHSvnLoader): +class SvnLoaderWithPreviousRevisionNoStorage(TestSvnLoader, SvnLoader): """An SWHSVNLoader with no persistence. Context: Load a known svn repository using the swh policy with its history altered so we do not update it. """ def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None): """Avoid the storage persistence call and return the expected previous revision for that repository. Check the following for explanation about the hashes: - test_loader.org for (swh policy). - - cf. SWHSvnLoaderITTest + - cf. SvnLoaderITTest """ return { 'snapshot': None, 'revision': { 'id': hashutil.hash_to_bytes( '4876cb10aec6f708f7466dddf547567b65f6c39c'), 'parents': [hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41')], 'directory': hashutil.hash_to_bytes( '0deab3023ac59398ae467fc4bff5583008af1ee2'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], # noqa ['svn_revision', '6'] ] } } } class SvnLoaderITTest6(BaseTestSvnLoader): """Context: - repository already visited with load successfull - Changes on existing repository - New Visit done with successful new data """ def setUp(self): super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderWithPreviousRevisionNoStorage() @istest def load(self): """Load from partial previous visit result in new changes """ # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then # we got the previous run's last revision (rev 6) # so 2 new self.assertEquals(len(self.loader.all_revisions), 5) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '171dc35522bfd17dda4e90a542a0377fb2fc707a' # cf. test_loader.org for explaining from where those hashes # come from expected_revisions = { # revision hash | directory hash '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa last_revision: 'fd24a76c87a3207428e06612b49860fc78e9f6dc' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITest7(BaseTestSvnLoader): """Context: - repository already visited with load successfull - Changes on existing repository - New Visit done with successful new data """ def setUp(self): super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderUpdateNoStorage() @istest def load(self): """Load known and partial repository should start from last visit """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41'), 'parents': [hashutil.hash_to_bytes( '3f51abf3b3d466571be0855dfa67e094f9ceff1b')], 'directory': hashutil.hash_to_bytes( '7dc52cc04c3b8bd7c085900d60c159f7b846f866'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], ['svn_revision', '5'] ] } } # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path, swh_revision=previous_unfinished_revision) # then # we got the previous run's last revision (rev 6) # so 2 new self.assertEquals(len(self.loader.all_revisions), 5) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '171dc35522bfd17dda4e90a542a0377fb2fc707a' # cf. test_loader.org for explaining from where those hashes # come from expected_revisions = { # revision hash | directory hash '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa last_revision: 'fd24a76c87a3207428e06612b49860fc78e9f6dc' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') -class SvnLoaderUpdateLessRecentNoStorage(TestSvnLoader, SWHSvnLoader): +class SvnLoaderUpdateLessRecentNoStorage(TestSvnLoader, SvnLoader): """Context: Load a known svn repository. The last visit seen is less recent than a previous unfinished crawl. """ def swh_latest_snapshot_revision(self, origin_id, prev_swh_revision=None): """Avoid the storage persistence call and return the expected previous revision for that repository. Check the following for explanation about the hashes: - test_loader.org for (swh policy). - - cf. SWHSvnLoaderITTest + - cf. SvnLoaderITTest """ return { 'snapshot': None, 'revision': { 'id': hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41'), 'parents': [hashutil.hash_to_bytes( '3f51abf3b3d466571be0855dfa67e094f9ceff1b')], 'directory': hashutil.hash_to_bytes( '7dc52cc04c3b8bd7c085900d60c159f7b846f866'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], ['svn_revision', '5'] ] } } } class SvnLoaderITest8(BaseTestSvnLoader): """Context: - Previous visit on existing repository done - Starting the loading from the last unfinished visit - New objects are created (1 new snapshot) """ def setUp(self): super().setUp(archive_name='pkg-gourmet-with-updates.tgz') self.loader = SvnLoaderUpdateLessRecentNoStorage() @istest def load(self): """Load repository should yield revisions starting from last visit """ previous_unfinished_revision = { 'id': hashutil.hash_to_bytes( '4876cb10aec6f708f7466dddf547567b65f6c39c'), 'parents': [hashutil.hash_to_bytes( 'a3a577948fdbda9d1061913b77a1588695eadb41')], 'directory': hashutil.hash_to_bytes( '0deab3023ac59398ae467fc4bff5583008af1ee2'), 'target_type': 'revision', 'metadata': { 'extra_headers': [ ['svn_repo_uuid', '3187e211-bb14-4c82-9596-0b59d67cd7f4'], ['svn_revision', '6'] ] } } # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path, swh_revision=previous_unfinished_revision) # then # we got the previous run's last revision (rev 6) # so 2 new self.assertEquals(len(self.loader.all_revisions), 5) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '171dc35522bfd17dda4e90a542a0377fb2fc707a' # cf. test_loader.org for explaining from where those hashes # come from expected_revisions = { # revision hash | directory hash '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa last_revision: 'fd24a76c87a3207428e06612b49860fc78e9f6dc' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderTTest9(BaseTestSvnLoader): """Check that a svn repo containing a versioned file with CRLF line endings with svn:eol-style property set to 'native' (this is a violation of svn specification as the file should have been stored with LF line endings) can be loaded anyway. """ def setUp(self): super().setUp(archive_name='mediawiki-repo-r407-eol-native-crlf.tgz', filename='mediawiki-repo-r407-eol-native-crlf') self.loader = SvnLoaderNoStorage() @istest def process_repository(self): """Load repository with CRLF endings (svn:eol-style: native) is ok """ # noqa # when self.loader.load(svn_url=self.svn_mirror_url, destination_path=self.destination_path) expected_revisions = { '7da4975c363101b819756d33459f30a866d01b1b': 'f63637223ee0f7d4951ffd2d4d9547a4882c5d8b' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITest10(BaseTestSvnLoader): # noqa """Check that a svn repo containing a versioned file with mixed CRLF/LF line endings with svn:eol-style property set to 'native' (this is a violation of svn specification as mixed line endings for textual content should not be stored when the svn:eol-style property is set) can be loaded anyway. """ def setUp(self): super().setUp( archive_name='pyang-repo-r343-eol-native-mixed-lf-crlf.tgz', filename='pyang-repo-r343-eol-native-mixed-lf-crlf') self.loader = SvnLoaderNoStorage() @istest def load(self): """Load repo with mixed CRLF/LF endings (svn:eol-style:native) is ok """ self.loader.load(svn_url=self.svn_mirror_url, destination_path=self.destination_path) expected_revisions = { '9c6962eeb9164a636c374be700672355e34a98a7': '16aa6b6271f3456d4643999d234cf39fe3d0cc5a' # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state # self.assertEquals(self.loader.all_snapshots[0], {}) self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITest11(BaseTestSvnLoader): """Context: - Repository with svn:external (which is not deal with for now) - Visit is partial with as much data loaded as possible """ def setUp(self): super().setUp(archive_name='pkg-gourmet-with-external-id.tgz') self.loader = SvnLoaderNoStorage() @istest def load(self): """Repository with svn:externals property, will stop raising an error """ previous_unfinished_revision = None # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path, swh_revision=previous_unfinished_revision) # then repositories holds 21 revisions, but the last commit # one holds an 'svn:externals' property which will make the # loader-svn stops at the last revision prior to the bad one self.assertEquals(len(self.loader.all_revisions), 20) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '82a7a4a09f9549223429143ba36ad77375e33c5c' expected_revisions = { # revision hash | directory hash '0d7dd5f751cef8fe17e8024f7d6b0e3aac2cfd71': '669a71cce6c424a81ba42b7dc5d560d32252f0ca', # noqa '95edacc8848369d6fb1608e887d6d2474fd5224f': '008ac97a1118560797c50e3392fa1443acdaa349', # noqa 'fef26ea45a520071711ba2b9d16a2985ee837021': '3780effbe846a26751a95a8c95c511fb72be15b4', # noqa '3f51abf3b3d466571be0855dfa67e094f9ceff1b': 'ffcca9b09c5827a6b8137322d4339c8055c3ee1e', # noqa 'a3a577948fdbda9d1061913b77a1588695eadb41': '7dc52cc04c3b8bd7c085900d60c159f7b846f866', # noqa '4876cb10aec6f708f7466dddf547567b65f6c39c': '0deab3023ac59398ae467fc4bff5583008af1ee2', # noqa '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa '171dc35522bfd17dda4e90a542a0377fb2fc707a': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa '027e8769f4786597436ab94a91f85527d04a6cbb': '2d9ca72c6afec6284fb01e459588cbb007017c8c', # noqa '4474d96018877742d9697d5c76666c9693353bfc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa '97ad21eab92961e2a22ca0285f09c6d1e9a7ffbc': 'ab111577e0ab39e4a157c476072af48f2641d93f', # noqa 'd04ea8afcee6205cc8384c091bfc578931c169fd': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa 'ded78810401fd354ffe894aa4a1e5c7d30a645d1': 'b0a648b02e55a4dce356ac35187a058f89694ec7', # noqa '4ee95e39358712f53c4fc720da3fafee9249ed19': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa 'ffa901b69ca0f46a2261f42948838d19709cb9f8': 'c3c98df624733fef4e592bef983f93e2ed02b179', # noqa '0148ae3eaa520b73a50802c59f3f416b7a36cf8c': '844d4646d6c2b4f3a3b2b22ab0ee38c7df07bab2', # noqa last_revision: '0de6e75d2b79ec90d00a3a7611aa3861b2e4aa5e', # noqa } # The last revision being the one used later to start back from self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'partial') class SvnLoaderITest12(BaseTestSvnLoader): """Edge cases: - first create a file and commit it. Remove it, then add folder holding the same name, commit. - do the same scenario with symbolic link (instead of file) """ def setUp(self): super().setUp( archive_name='pkg-gourmet-with-edge-case-links-and-files.tgz') self.loader = SvnLoaderNoStorage() @istest def load(self): """File/Link removed prior to folder with same name creation is ok """ previous_unfinished_revision = None # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path, swh_revision=previous_unfinished_revision) # then repositories holds 14 revisions, but the last commit self.assertEquals(len(self.loader.all_revisions), 19) self.assertEquals(len(self.loader.all_releases), 0) last_revision = '3f43af2578fccf18b0d4198e48563da7929dc608' expected_revisions = { # revision hash | directory hash '0d7dd5f751cef8fe17e8024f7d6b0e3aac2cfd71': '669a71cce6c424a81ba42b7dc5d560d32252f0ca', # noqa '95edacc8848369d6fb1608e887d6d2474fd5224f': '008ac97a1118560797c50e3392fa1443acdaa349', # noqa 'fef26ea45a520071711ba2b9d16a2985ee837021': '3780effbe846a26751a95a8c95c511fb72be15b4', # noqa '3f51abf3b3d466571be0855dfa67e094f9ceff1b': 'ffcca9b09c5827a6b8137322d4339c8055c3ee1e', # noqa 'a3a577948fdbda9d1061913b77a1588695eadb41': '7dc52cc04c3b8bd7c085900d60c159f7b846f866', # noqa '4876cb10aec6f708f7466dddf547567b65f6c39c': '0deab3023ac59398ae467fc4bff5583008af1ee2', # noqa '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa '171dc35522bfd17dda4e90a542a0377fb2fc707a': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa '9231f9a98a9051a0cd34231cddd4e11773f8348e': '6c07f4f4ac780eaf99a247fbfd0897533598dd36', # noqa 'c309bd3b57796696d6655ab3ab0b438fdd2d8201': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa 'bb78300cc1ac9119eb6fffa9e9fa04a7f9340b11': 'ee995a0d85f6917c75bcee3aa448bea7726b265d', # noqa 'f2e01111329f84580dc3febb1fd45515692c5886': 'e2baec7b6a5543758e9c73695bc847db0a4f7941', # noqa '1a0f70c34e211f073e1be3435ecf6f0dd7700267': 'e7536e721fa806c19971b749c091c144b2f2b88e', # noqa '0c612a23d293cc3100496a54ae4ad13d750efe4c': '2123d12749294bbfb54e73f9d73fac658aabb266', # noqa '69a53d972e2f863acbbbda546d9da96287af6a88': '13896cb96ec004140ce5be8852fee8c29830d9c7', # noqa last_revision: '6b1e0243768ff9ac060064b2eeade77e764ffc82', # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full') class SvnLoaderITTest13(BaseTestSvnLoader): """Edge cases: - wrong symbolic link - wrong symbolic link with empty space names """ def setUp(self): super().setUp( archive_name='pkg-gourmet-with-wrong-link-cases.tgz') self.loader = SvnLoaderNoStorage() @istest def load(self): """Wrong link or empty space-named link should be ok """ # when self.loader.load( svn_url=self.svn_mirror_url, destination_path=self.destination_path) # then repositories holds 14 revisions, but the last commit self.assertEquals(len(self.loader.all_revisions), 21) self.assertEquals(len(self.loader.all_releases), 0) last_revision = 'cf30d3bb9d5967d0a2bbeacc405f10a5dd9b138a' expected_revisions = { # revision hash | directory hash '0d7dd5f751cef8fe17e8024f7d6b0e3aac2cfd71': '669a71cce6c424a81ba42b7dc5d560d32252f0ca', # noqa '95edacc8848369d6fb1608e887d6d2474fd5224f': '008ac97a1118560797c50e3392fa1443acdaa349', # noqa 'fef26ea45a520071711ba2b9d16a2985ee837021': '3780effbe846a26751a95a8c95c511fb72be15b4', # noqa '3f51abf3b3d466571be0855dfa67e094f9ceff1b': 'ffcca9b09c5827a6b8137322d4339c8055c3ee1e', # noqa 'a3a577948fdbda9d1061913b77a1588695eadb41': '7dc52cc04c3b8bd7c085900d60c159f7b846f866', # noqa '4876cb10aec6f708f7466dddf547567b65f6c39c': '0deab3023ac59398ae467fc4bff5583008af1ee2', # noqa '7f5bc909c29d4e93d8ccfdda516e51ed44930ee1': '752c52134dcbf2fff13c7be1ce4e9e5dbf428a59', # noqa '38d81702cb28db4f1a6821e64321e5825d1f7fd6': '39c813fb4717a4864bacefbd90b51a3241ae4140', # noqa '99c27ebbd43feca179ac0e895af131d8314cafe1': '3397ca7f709639cbd36b18a0d1b70bce80018c45', # noqa '902f29b4323a9b9de3af6d28e72dd581e76d9397': 'c4e12483f0a13e6851459295a4ae735eb4e4b5c4', # noqa '171dc35522bfd17dda4e90a542a0377fb2fc707a': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa '9231f9a98a9051a0cd34231cddd4e11773f8348e': '6c07f4f4ac780eaf99a247fbfd0897533598dd36', # noqa 'c309bd3b57796696d6655ab3ab0b438fdd2d8201': 'fd24a76c87a3207428e06612b49860fc78e9f6dc', # noqa 'bb78300cc1ac9119eb6fffa9e9fa04a7f9340b11': 'ee995a0d85f6917c75bcee3aa448bea7726b265d', # noqa 'f2e01111329f84580dc3febb1fd45515692c5886': 'e2baec7b6a5543758e9c73695bc847db0a4f7941', # noqa '1a0f70c34e211f073e1be3435ecf6f0dd7700267': 'e7536e721fa806c19971b749c091c144b2f2b88e', # noqa '0c612a23d293cc3100496a54ae4ad13d750efe4c': '2123d12749294bbfb54e73f9d73fac658aabb266', # noqa '69a53d972e2f863acbbbda546d9da96287af6a88': '13896cb96ec004140ce5be8852fee8c29830d9c7', # noqa '3f43af2578fccf18b0d4198e48563da7929dc608': '6b1e0243768ff9ac060064b2eeade77e764ffc82', # noqa '4ab5fc264732cd474d2e695c5ac66e4933bdad74': '9a1f5e3961db89422250ce6c1441476f40d65205', # noqa last_revision: 'd853d9628f6f0008d324fed27dadad00ce48bc62', # noqa } self.assertRevisionsOk(expected_revisions) self.assertEquals(len(self.loader.all_snapshots), 1) # FIXME: Check the snapshot's state self.assertEqual(self.loader.load_status(), {'status': 'eventful'}) self.assertEqual(self.loader.visit_status(), 'full')