{ "metadata": { "name": "", "signature": "sha256:d78d18aaa552a170eaac998e2cce6c8d9abd0d9453361f243a61a0293232550d" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "%matplotlib inline\n", "\n", "import datetime\n", "import pprint\n", "import random\n", "\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import numpy\n", "\n", "import mpld3\n", "mpld3.enable_notebook()\n", "\n", "import psycopg2" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "QUERY = \"\"\"\n", "select ts, trim(both 's:' from split_part(message, ' ', 8))::float\n", "from log\n", "where message ~~ '%%succeeded%%' and ts > %s\n", "order by ts\n", "\"\"\"\n", "\n", "MIN_TIMESTAMP = datetime.datetime.now() - datetime.timedelta(hours=6)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "db = psycopg2.connect('service=softwareheritage-log')\n", "with db.cursor() as cur:\n", " cur.execute(QUERY, (MIN_TIMESTAMP,))\n", " results = cur.fetchall()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print(len(results))\n", "pprint.pprint(results[:5])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "xdata = []\n", "ydata = []\n", "#for x, y in random.sample(results, 4000):\n", "for x, y in results:\n", " xdata.append(x)\n", " ydata.append(y)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "plt.scatter(xdata, ydata)\n", "plt.xlim(xdata[0], xdata[-100])\n", "plt.ylim(0, 10)\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "numpy.average(ydata), numpy.std(ydata)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "PARALLEL = 8 * 4 # 8 worker * 4 jobs\n", "REPOS = 14 * 1000 * 1000\n", "ETA_days = REPOS * numpy.average(ydata) / 60 / 60 / 24 / 32\n", "ETA_days" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }