Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
index ba34c94..18c4100 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java
@@ -1,71 +1,71 @@
package org.softwareheritage.graph;
import java.io.IOException;
import it.unimi.dsi.big.webgraph.BVGraph;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import org.softwareheritage.graph.NodeIdMap;
import org.softwareheritage.graph.SwhId;
public class Graph {
BVGraph graph;
BVGraph graphTransposed;
String path;
NodeIdMap nodeIdMap;
public Graph(String path) throws IOException {
this.graph = BVGraph.load(path);
this.graphTransposed = BVGraph.load(path + "-transposed");
this.path = path;
this.nodeIdMap = new NodeIdMap(path, getNbNodes());
}
public void cleanUp() throws IOException {
nodeIdMap.close();
}
public String getPath() {
return path;
}
public long getNodeId(SwhId swhId) {
return nodeIdMap.getNodeId(swhId);
}
public SwhId getSwhId(long nodeId) {
return nodeIdMap.getSwhId(nodeId);
}
public long getNbNodes() {
return graph.numNodes();
}
public long getNbEdges() {
return graph.numArcs();
}
public LazyLongIterator successors(long nodeId) {
return graph.successors(nodeId);
}
public long outdegree(long nodeId) {
return graph.outdegree(nodeId);
}
public LazyLongIterator predecessors(long nodeId) {
return graphTransposed.successors(nodeId);
}
public long indegree(long nodeId) {
return graphTransposed.outdegree(nodeId);
}
- public long degree(long nodeId, boolean isTransposed) {
- return (isTransposed) ? indegree(nodeId) : outdegree(nodeId);
+ public long degree(long nodeId, boolean useTransposed) {
+ return (useTransposed) ? indegree(nodeId) : outdegree(nodeId);
}
- public LazyLongIterator neighbors(long nodeId, boolean isTransposed) {
- return (isTransposed) ? predecessors(nodeId) : successors(nodeId);
+ public LazyLongIterator neighbors(long nodeId, boolean useTransposed) {
+ return (useTransposed) ? predecessors(nodeId) : successors(nodeId);
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java
index 993bf68..a70de75 100644
--- a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java
+++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java
@@ -1,75 +1,75 @@
package org.softwareheritage.graph.algo;
import java.util.ArrayList;
import java.util.Stack;
import it.unimi.dsi.big.webgraph.LazyLongIterator;
import it.unimi.dsi.bits.LongArrayBitVector;
import org.softwareheritage.graph.Graph;
import org.softwareheritage.graph.SwhId;
import org.softwareheritage.graph.SwhPath;
public class Visit {
Graph graph;
- boolean isTransposed;
+ boolean useTransposed;
String allowedEdges;
Stack<Long> currentPath;
ArrayList<SwhPath> paths;
LongArrayBitVector visited;
public Visit(Graph graph, SwhId swhId, String allowedEdges, String algorithm, String direction) {
if (!algorithm.matches("dfs|bfs")) {
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm);
}
if (!direction.matches("forward|backward")) {
throw new IllegalArgumentException("Unknown traversal direction: " + direction);
}
this.graph = graph;
- this.isTransposed = (direction.equals("backward"));
+ this.useTransposed = (direction.equals("backward"));
this.allowedEdges = allowedEdges;
this.paths = new ArrayList<SwhPath>();
this.currentPath = new Stack<Long>();
this.visited = LongArrayBitVector.ofLength(graph.getNbNodes());
if (algorithm.equals("dfs")) {
dfs(graph.getNodeId(swhId));
}
}
// Allow Jackson JSON to only serialize the 'paths' field
public ArrayList<SwhPath> getPaths() {
return paths;
}
private void dfs(long currentNodeId) {
visited.set(currentNodeId);
currentPath.push(currentNodeId);
- long degree = graph.degree(currentNodeId, isTransposed);
- LazyLongIterator neighbors = graph.neighbors(currentNodeId, isTransposed);
+ long degree = graph.degree(currentNodeId, useTransposed);
+ LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed);
if (degree == 0) {
SwhPath path = new SwhPath();
for (long nodeId : currentPath) {
path.add(graph.getSwhId(nodeId));
}
paths.add(path);
}
while (degree-- > 0) {
long nextNodeId = neighbors.nextLong();
if (isEdgeAllowed(currentNodeId, nextNodeId) && !visited.getBoolean(nextNodeId)) {
dfs(nextNodeId);
}
}
currentPath.pop();
}
private boolean isEdgeAllowed(long currentNodeId, long nextNodeId) {
// TODO
return true;
}
}
diff --git a/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
new file mode 100644
index 0000000..a7f91dd
--- /dev/null
+++ b/api/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java
@@ -0,0 +1,62 @@
+package org.softwareheritage.graph.benchmark;
+
+import java.io.IOException;
+import java.util.Stack;
+
+import it.unimi.dsi.big.webgraph.LazyLongIterator;
+import it.unimi.dsi.bits.LongArrayBitVector;
+
+import org.softwareheritage.graph.Graph;
+import org.softwareheritage.graph.SwhId;
+import org.softwareheritage.graph.algo.Visit;
+
+public class LinuxLog {
+ public static void main(String[] args) throws IOException {
+ String path = args[0];
+ Graph graph = new Graph(path);
+
+ final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35");
+ final long expectedCount = 723640;
+ System.out.println("git log " + commit);
+
+ long count = countCommits(graph, commit);
+ System.out.println("Counted " + count + " commits");
+ if (count != expectedCount) {
+ throw new IllegalArgumentException("Expected " + expectedCount + " commits");
+ }
+
+ long startTime = System.nanoTime();
+ Visit visit = new Visit(graph, commit, "rev", "dfs", "backward");
+ long endTime = System.nanoTime();
+ double duration = (double) (endTime - startTime) / 1_000_000_000;
+ System.out.println("Visit operation done in: " + duration + " seconds");
+ }
+
+ static long countCommits(Graph graph, SwhId commit) {
+ final boolean useTransposed = true;
+
+ Stack<Long> stack = new Stack<Long>();
+ LongArrayBitVector visited = LongArrayBitVector.ofLength(graph.getNbNodes());
+ long startNodeId = graph.getNodeId(commit);
+ long count = 0;
+
+ stack.push(startNodeId);
+ visited.set(startNodeId);
+ while (!stack.empty()) {
+ long currentNodeId = stack.pop();
+ count++;
+
+ long degree = graph.degree(currentNodeId, useTransposed);
+ LazyLongIterator revisions = graph.neighbors(currentNodeId, useTransposed);
+ while (degree-- > 0) {
+ long nextNodeId = revisions.nextLong();
+ if (!visited.getBoolean(nextNodeId)) {
+ visited.set(nextNodeId);
+ stack.push(nextNodeId);
+ }
+ }
+ }
+
+ return count;
+ }
+}
diff --git a/reports/linux_log/LinuxLog.java b/reports/linux_log/LinuxLog.java
deleted file mode 100644
index ab8f0b7..0000000
--- a/reports/linux_log/LinuxLog.java
+++ /dev/null
@@ -1,62 +0,0 @@
-package org.softwareheritage.graph;
-
-import java.util.Stack;
-
-import it.unimi.dsi.big.webgraph.LazyLongIterator;
-import it.unimi.dsi.bits.LongArrayBitVector;
-
-import org.softwareheritage.graph.Graph;
-import org.softwareheritage.graph.SwhId;
-import org.softwareheritage.graph.algo.Visit;
-
-public class LinuxLog {
- private static final String COMMIT_HASH = "f39d7d78b70e0f39facb1e4fab77ad3df5c52a35";
- private static final long EXPECTED_COUNT = 723640;
- private static final boolean TRANSPOSED = true;
-
- public static void main(String[] args) throws Exception {
- String graphPath = args[0];
- Graph graph = new Graph(graphPath);
- SwhId commit = new SwhId(COMMIT_HASH);
- long startNode = graph.getNode(commit);
-
- System.out.println("git log " + COMMIT_HASH);
- System.out.println("Expecting " + EXPECTED_COUNT + " commits");
-
- long count = countCommits(graph, startNode);
- if (count != EXPECTED_COUNT) {
- throw new Exception("Counted " + count + " commits");
- }
-
- long startTime = System.nanoTime();
- Visit visit = new Visit(graph, commit, "rev", "dfs", "backward");
- long endTime = System.nanoTime();
- double duration = (double) (endTime - startTime) / 1_000_000_000;
- System.out.println("Visit done in: " + duration + " seconds");
- }
-
- static long countCommits(Graph graph, long startNode) {
- Stack<Long> stack = new Stack<Long>();
- LongArrayBitVector visited = LongArrayBitVector.ofLength(graph.getNbNodes());
- long count = 0;
-
- stack.push(startNode);
- visited.set(startNode);
- while (!stack.empty()) {
- long currentNode = stack.pop();
- count++;
-
- long degree = graph.degree(currentNode, TRANSPOSED);
- LazyLongIterator revisions = graph.neighbors(currentNode, TRANSPOSED);
- while (degree-- > 0) {
- long nextNode = revisions.nextLong();
- if (!visited.getBoolean(nextNode)) {
- visited.set(nextNode);
- stack.push(nextNode);
- }
- }
- }
-
- return count;
- }
-}
diff --git a/reports/linux_log/linux_log.tex b/reports/linux_log/linux_log.tex
index 3ba1852..00d971c 100644
--- a/reports/linux_log/linux_log.tex
+++ b/reports/linux_log/linux_log.tex
@@ -1,62 +1,62 @@
\documentclass[11pt,a4paper]{article}
\usepackage[english]{babel}
\usepackage[a4paper,left=2cm,right=2cm,top=2.5cm,bottom=2.5cm]{geometry}
\usepackage{minted}
\title{Google Summer of Code 2019}
\author{Thibault Allançon}
\date{May 31, 2019}
\begin{document}
\maketitle
Experiment to test edge access time on the compressed graph. The goal was to do
the equivalent of a \mintinline{text}{git log} operation with a Linux commit on
the \mintinline{text}{rev_to_rev} compressed dataset.
\section{Environment}
-\begin{footnotesize}
-\inputminted{java}{LinuxLog.java}
-\end{footnotesize}
+See \mintinline{text}{benchmark/LinuxLog.java} in the server-side code.
\section{Results}
+Experiments were done on a VM with 1TB of RAM and 40vCPU.
+
\begin{small}
\begin{minted}{text}
$ java -cp swh-graph-compression.jar org.softwareheritage.graph.LinuxLog /graph/rev_to_rev
git log f39d7d78b70e0f39facb1e4fab77ad3df5c52a35
Expecting 723640 commits
Visit done in: 0.422884286 seconds
User time (seconds): 123.26
System time (seconds): 13.20
Percent of CPU this job got: 103%
Elapsed (wall clock) time (h:mm:ss or m:ss): 2:11.92
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 15228424
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 0
Minor (reclaiming a frame) page faults: 46461
Voluntary context switches: 13557
Involuntary context switches: 574
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 4096
Exit status: 0
\end{minted}
\end{small}
Edge access time is around 5 $\mu$s.
\end{document}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:50 PM (2 w, 13 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3260860

Event Timeline