diff --git a/histogram.py b/histogram.py index 80ddd95..3e78e2a 100755 --- a/histogram.py +++ b/histogram.py @@ -1,36 +1,43 @@ #!/usr/bin/env python import io import os from collections import Counter from swh.provenance import get_provenance # TODO: take conninfo as command line arguments. conninfo = { "cls": "local", "db": {"host": "/var/run/postgresql", "port": "5436", "dbname": "provenance"}, } if __name__ == "__main__": # Get provenance object for both databases and query its lists of content. provenance = get_provenance(**conninfo) tables = ["directory_in_rev", "content_in_dir"] for table in tables: provenance.cursor.execute(f"""SELECT depths.depth, COUNT(depths.depth) - FROM (SELECT (CHAR_LENGTH(ENCODE(location.path, 'escape')) - CHAR_LENGTH(REPLACE(ENCODE(location.path, 'escape'), '/', ''))) / CHAR_LENGTH('/') AS depth + FROM (SELECT + CASE location.path + WHEN '' THEN 0 + WHEN '.' THEN 0 + ELSE 1 + CHAR_LENGTH(ENCODE(location.path, 'escape')) - + CHAR_LENGTH(REPLACE(ENCODE(location.path, 'escape'), '/', '')) + END AS depth FROM {table} JOIN location ON {table}.loc=location.id ) AS depths GROUP BY depths.depth ORDER BY depths.depth""") - with io.open(conninfo["db"]["dbname"] + f"_{table}.csv", "w") as outfile: + filename = "depths_" + conninfo["db"]["dbname"] + f"_{table}.csv" + with io.open(filename, "w") as outfile: outfile.write(f"{table} depth,{table} count\n") for depth, count in provenance.cursor.fetchall(): outfile.write(f"{depth},{count}\n")