diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java index ba34c94..18c4100 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,71 +1,71 @@ package org.softwareheritage.graph; import java.io.IOException; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.NodeIdMap; import org.softwareheritage.graph.SwhId; public class Graph { BVGraph graph; BVGraph graphTransposed; String path; NodeIdMap nodeIdMap; public Graph(String path) throws IOException { this.graph = BVGraph.load(path); this.graphTransposed = BVGraph.load(path + "-transposed"); this.path = path; this.nodeIdMap = new NodeIdMap(path, getNbNodes()); } public void cleanUp() throws IOException { nodeIdMap.close(); } public String getPath() { return path; } public long getNodeId(SwhId swhId) { return nodeIdMap.getNodeId(swhId); } public SwhId getSwhId(long nodeId) { return nodeIdMap.getSwhId(nodeId); } public long getNbNodes() { return graph.numNodes(); } public long getNbEdges() { return graph.numArcs(); } public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } public long outdegree(long nodeId) { return graph.outdegree(nodeId); } public LazyLongIterator predecessors(long nodeId) { return graphTransposed.successors(nodeId); } public long indegree(long nodeId) { return graphTransposed.outdegree(nodeId); } - public long degree(long nodeId, boolean isTransposed) { - return (isTransposed) ? indegree(nodeId) : outdegree(nodeId); + public long degree(long nodeId, boolean useTransposed) { + return (useTransposed) ? indegree(nodeId) : outdegree(nodeId); } - public LazyLongIterator neighbors(long nodeId, boolean isTransposed) { - return (isTransposed) ? predecessors(nodeId) : successors(nodeId); + public LazyLongIterator neighbors(long nodeId, boolean useTransposed) { + return (useTransposed) ? predecessors(nodeId) : successors(nodeId); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java index 993bf68..a70de75 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java @@ -1,75 +1,75 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Stack; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.SwhPath; public class Visit { Graph graph; - boolean isTransposed; + boolean useTransposed; String allowedEdges; Stack currentPath; ArrayList paths; LongArrayBitVector visited; public Visit(Graph graph, SwhId swhId, String allowedEdges, String algorithm, String direction) { if (!algorithm.matches("dfs|bfs")) { throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); } if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } this.graph = graph; - this.isTransposed = (direction.equals("backward")); + this.useTransposed = (direction.equals("backward")); this.allowedEdges = allowedEdges; this.paths = new ArrayList(); this.currentPath = new Stack(); this.visited = LongArrayBitVector.ofLength(graph.getNbNodes()); if (algorithm.equals("dfs")) { dfs(graph.getNodeId(swhId)); } } // Allow Jackson JSON to only serialize the 'paths' field public ArrayList getPaths() { return paths; } private void dfs(long currentNodeId) { visited.set(currentNodeId); currentPath.push(currentNodeId); - long degree = graph.degree(currentNodeId, isTransposed); - LazyLongIterator neighbors = graph.neighbors(currentNodeId, isTransposed); + long degree = graph.degree(currentNodeId, useTransposed); + LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); if (degree == 0) { SwhPath path = new SwhPath(); for (long nodeId : currentPath) { path.add(graph.getSwhId(nodeId)); } paths.add(path); } while (degree-- > 0) { long nextNodeId = neighbors.nextLong(); if (isEdgeAllowed(currentNodeId, nextNodeId) && !visited.getBoolean(nextNodeId)) { dfs(nextNodeId); } } currentPath.pop(); } private boolean isEdgeAllowed(long currentNodeId, long nextNodeId) { // TODO return true; } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java new file mode 100644 index 0000000..a7f91dd --- /dev/null +++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java @@ -0,0 +1,62 @@ +package org.softwareheritage.graph.benchmark; + +import java.io.IOException; +import java.util.Stack; + +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.bits.LongArrayBitVector; + +import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.SwhId; +import org.softwareheritage.graph.algo.Visit; + +public class LinuxLog { + public static void main(String[] args) throws IOException { + String path = args[0]; + Graph graph = new Graph(path); + + final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35"); + final long expectedCount = 723640; + System.out.println("git log " + commit); + + long count = countCommits(graph, commit); + System.out.println("Counted " + count + " commits"); + if (count != expectedCount) { + throw new IllegalArgumentException("Expected " + expectedCount + " commits"); + } + + long startTime = System.nanoTime(); + Visit visit = new Visit(graph, commit, "rev", "dfs", "backward"); + long endTime = System.nanoTime(); + double duration = (double) (endTime - startTime) / 1_000_000_000; + System.out.println("Visit operation done in: " + duration + " seconds"); + } + + static long countCommits(Graph graph, SwhId commit) { + final boolean useTransposed = true; + + Stack stack = new Stack(); + LongArrayBitVector visited = LongArrayBitVector.ofLength(graph.getNbNodes()); + long startNodeId = graph.getNodeId(commit); + long count = 0; + + stack.push(startNodeId); + visited.set(startNodeId); + while (!stack.empty()) { + long currentNodeId = stack.pop(); + count++; + + long degree = graph.degree(currentNodeId, useTransposed); + LazyLongIterator revisions = graph.neighbors(currentNodeId, useTransposed); + while (degree-- > 0) { + long nextNodeId = revisions.nextLong(); + if (!visited.getBoolean(nextNodeId)) { + visited.set(nextNodeId); + stack.push(nextNodeId); + } + } + } + + return count; + } +} diff --git a/reports/linux_log/LinuxLog.java b/reports/linux_log/LinuxLog.java deleted file mode 100644 index ab8f0b7..0000000 --- a/reports/linux_log/LinuxLog.java +++ /dev/null @@ -1,62 +0,0 @@ -package org.softwareheritage.graph; - -import java.util.Stack; - -import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.bits.LongArrayBitVector; - -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.SwhId; -import org.softwareheritage.graph.algo.Visit; - -public class LinuxLog { - private static final String COMMIT_HASH = "f39d7d78b70e0f39facb1e4fab77ad3df5c52a35"; - private static final long EXPECTED_COUNT = 723640; - private static final boolean TRANSPOSED = true; - - public static void main(String[] args) throws Exception { - String graphPath = args[0]; - Graph graph = new Graph(graphPath); - SwhId commit = new SwhId(COMMIT_HASH); - long startNode = graph.getNode(commit); - - System.out.println("git log " + COMMIT_HASH); - System.out.println("Expecting " + EXPECTED_COUNT + " commits"); - - long count = countCommits(graph, startNode); - if (count != EXPECTED_COUNT) { - throw new Exception("Counted " + count + " commits"); - } - - long startTime = System.nanoTime(); - Visit visit = new Visit(graph, commit, "rev", "dfs", "backward"); - long endTime = System.nanoTime(); - double duration = (double) (endTime - startTime) / 1_000_000_000; - System.out.println("Visit done in: " + duration + " seconds"); - } - - static long countCommits(Graph graph, long startNode) { - Stack stack = new Stack(); - LongArrayBitVector visited = LongArrayBitVector.ofLength(graph.getNbNodes()); - long count = 0; - - stack.push(startNode); - visited.set(startNode); - while (!stack.empty()) { - long currentNode = stack.pop(); - count++; - - long degree = graph.degree(currentNode, TRANSPOSED); - LazyLongIterator revisions = graph.neighbors(currentNode, TRANSPOSED); - while (degree-- > 0) { - long nextNode = revisions.nextLong(); - if (!visited.getBoolean(nextNode)) { - visited.set(nextNode); - stack.push(nextNode); - } - } - } - - return count; - } -} diff --git a/reports/linux_log/linux_log.tex b/reports/linux_log/linux_log.tex index 3ba1852..00d971c 100644 --- a/reports/linux_log/linux_log.tex +++ b/reports/linux_log/linux_log.tex @@ -1,62 +1,62 @@ \documentclass[11pt,a4paper]{article} \usepackage[english]{babel} \usepackage[a4paper,left=2cm,right=2cm,top=2.5cm,bottom=2.5cm]{geometry} \usepackage{minted} \title{Google Summer of Code 2019} \author{Thibault Allançon} \date{May 31, 2019} \begin{document} \maketitle Experiment to test edge access time on the compressed graph. The goal was to do the equivalent of a \mintinline{text}{git log} operation with a Linux commit on the \mintinline{text}{rev_to_rev} compressed dataset. \section{Environment} -\begin{footnotesize} -\inputminted{java}{LinuxLog.java} -\end{footnotesize} +See \mintinline{text}{benchmark/LinuxLog.java} in the server-side code. \section{Results} +Experiments were done on a VM with 1TB of RAM and 40vCPU. + \begin{small} \begin{minted}{text} $ java -cp swh-graph-compression.jar org.softwareheritage.graph.LinuxLog /graph/rev_to_rev git log f39d7d78b70e0f39facb1e4fab77ad3df5c52a35 Expecting 723640 commits Visit done in: 0.422884286 seconds User time (seconds): 123.26 System time (seconds): 13.20 Percent of CPU this job got: 103% Elapsed (wall clock) time (h:mm:ss or m:ss): 2:11.92 Average shared text size (kbytes): 0 Average unshared data size (kbytes): 0 Average stack size (kbytes): 0 Average total size (kbytes): 0 Maximum resident set size (kbytes): 15228424 Average resident set size (kbytes): 0 Major (requiring I/O) page faults: 0 Minor (reclaiming a frame) page faults: 46461 Voluntary context switches: 13557 Involuntary context switches: 574 Swaps: 0 File system inputs: 0 File system outputs: 0 Socket messages sent: 0 Socket messages received: 0 Signals delivered: 0 Page size (bytes): 4096 Exit status: 0 \end{minted} \end{small} Edge access time is around 5 $\mu$s. \end{document}