diff --git a/api/server/src/main/java/org/softwareheritage/graph/App.java b/api/server/src/main/java/org/softwareheritage/graph/App.java index 660458a..344a7c9 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/App.java +++ b/api/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,62 +1,62 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.Optional; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import io.javalin.Javalin; import io.javalin.json.JavalinJackson; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.algo.Stats; import org.softwareheritage.graph.algo.Visit; public class App { - public static void main(String[] args) throws IOException, Exception { + public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); Stats stats = new Stats(path); + // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { - graph.cleanUp(); + try { + graph.cleanUp(); + } catch (IOException e) { + System.out.println("Could not clean up graph on exit: " + e); + } } }); // Configure Jackson JSON properties ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(5010); app.get("/stats", ctx -> { - try { - ctx.json(stats); - } catch (Exception e) { - ctx.status(400); - ctx.result(e.toString()); - } + ctx.json(stats); }); app.get("/visit/:swh_id", ctx -> { try { SwhId start = new SwhId(ctx.pathParam("swh_id")); // By default, traversal is a forward DFS using all edges String algorithm = Optional.ofNullable(ctx.queryParam("traversal")).orElse("dfs"); String direction = Optional.ofNullable(ctx.queryParam("direction")).orElse("forward"); - String edges = Optional.ofNullable(ctx.queryParam("edges")).orElse("cnt:dir:rel:rev:snp"); + String edges = Optional.ofNullable(ctx.queryParam("edges")).orElse("all"); ctx.json(new Visit(graph, start, edges, algorithm, direction)); } catch (IllegalArgumentException e) { ctx.status(400); - ctx.result(e.toString()); + ctx.result(e.getMessage()); } }); app.error(404, ctx -> { ctx.result("Not found"); }); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/Graph.java b/api/server/src/main/java/org/softwareheritage/graph/Graph.java index 350c5be..f0eeece 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/Graph.java +++ b/api/server/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,77 +1,79 @@ package org.softwareheritage.graph; +import java.io.IOException; + import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.NodeIdMap; import org.softwareheritage.graph.SwhId; public class Graph { BVGraph graph; BVGraph graphTransposed; String path; NodeIdMap nodeIdMap; - public Graph(String graphPath) throws Exception { + public Graph(String graphPath) throws IOException { this.graph = BVGraph.load(graphPath); this.graphTransposed = BVGraph.load(graphPath + "-transposed"); this.path = graphPath; this.nodeIdMap = new NodeIdMap(graphPath, getNbNodes()); } - public void cleanUp() { + public void cleanUp() throws IOException { nodeIdMap.close(); } public String getPath() { return path; } public long getNode(SwhId swhId) { return nodeIdMap.getNode(swhId); } public SwhId getSwhId(long node) { return nodeIdMap.getSwhId(node); } public long getNbNodes() { return graph.numNodes(); } public long getNbEdges() { return graph.numArcs(); } public LazyLongIterator successors(long node) { return graph.successors(node); } public long outdegree(long node) { return graph.outdegree(node); } public LazyLongIterator predecessors(long node) { return graphTransposed.successors(node); } public long indegree(long node) { return graphTransposed.outdegree(node); } public long degree(long node, boolean isTransposed) { if (isTransposed) { return indegree(node); } else { return outdegree(node); } } public LazyLongIterator neighbors(long node, boolean isTransposed) { if (isTransposed) { return predecessors(node); } else { return successors(node); } } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java b/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java index cc65ebb..6291290 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java +++ b/api/server/src/main/java/org/softwareheritage/graph/NodeIdMap.java @@ -1,135 +1,142 @@ package org.softwareheritage.graph; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.zip.GZIPInputStream; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.utils.MMapInputFile; import org.softwareheritage.graph.utils.MMapOutputFile; // TODO: // - Add option to dump or not the mapping -// - Add error handling when node/swh ids not found public class NodeIdMap { private static final int SWH_ID_SIZE = 50; private static final int NODE_ID_SIZE = 20; // +1 are for spaces and end of lines private static final int SWH_TO_NODE_LINE_LENGTH = SWH_ID_SIZE + 1 + NODE_ID_SIZE + 1; private static final int NODE_TO_SWH_LINE_LENGTH = SWH_ID_SIZE + 1; String graphPath; long nbIds; MMapInputFile swhToNodeMap; MMapInputFile nodeToSwhMap; - public NodeIdMap(String graphPath, long nbNodes) { + public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; - try { - dump(); - } catch (Exception e) { - System.out.println("Could not dump mapping: " + e); - } + dump(); this.swhToNodeMap = new MMapInputFile(graphPath + ".swhToNodeMap.csv", SWH_TO_NODE_LINE_LENGTH); this.nodeToSwhMap = new MMapInputFile(graphPath + ".nodeToSwhMap.csv", NODE_TO_SWH_LINE_LENGTH); } // SWH id (string) -> WebGraph node id (long) // Each line in .swhToNode.csv is formatted as: swhId nodeId // The file is sorted by swhId, hence we can binary search on swhId to get corresponding nodeId public long getNode(SwhId swhId) { long start = 0; - long end = nbIds; + long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; - String[] parts = swhToNodeMap.readLine(lineNumber).split(" "); + String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); + if (parts.length != 2) { + break; + } + String currentSwhId = parts[0]; long currentNodeId = Long.parseLong(parts[1]); int cmp = currentSwhId.compareTo(swhId.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } - return -1; + throw new IllegalArgumentException("Unknown SWH id: " + swhId); } // WebGraph node id (long) -> SWH id (string) // Each line in .nodeToSwh.csv is formatted as: swhId // The file is ordered by nodeId, meaning node0's swhId is at line 0, hence we can read the // nodeId-th line to get corresponding swhId public SwhId getSwhId(long node) { - String swhId = nodeToSwhMap.readLine(node); + if (node < 0 || node >= nbIds) { + throw new IllegalArgumentException("Node id " + node + " should be between 0 and " + nbIds); + } + + String swhId = nodeToSwhMap.readAtLine(node); return new SwhId(swhId); } - void dump() throws ClassNotFoundException, IOException { + @SuppressWarnings("unchecked") + void dump() throws IOException { // First internal mapping: SWH id (string) -> WebGraph MPH (long) - @SuppressWarnings("unchecked") - Object2LongFunction mphMap = - (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); + Object2LongFunction mphMap = null; + try { + mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); + } catch (ClassNotFoundException e) { + throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); + } // Second internal mapping: WebGraph MPH (long) -> BFS ordering (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); if (loaded != nbIds) { throw new IllegalArgumentException("Graph contains " + nbIds + " nodes, but read " + loaded); } // Dump complete mapping for all nodes: SWH id (string) <=> WebGraph node id (long) MMapOutputFile swhToNodeMapOut = new MMapOutputFile(graphPath + ".swhToNodeMap.csv", SWH_TO_NODE_LINE_LENGTH, nbIds); MMapOutputFile nodeToSwhMapOut = new MMapOutputFile(graphPath + ".nodeToSwhMap.csv", NODE_TO_SWH_LINE_LENGTH, nbIds); InputStream nodeFile = new GZIPInputStream(new FileInputStream(graphPath + ".nodes.csv.gz")); FastBufferedReader fileBuffer = new FastBufferedReader(new InputStreamReader(nodeFile, "UTF-8")); LineIterator lineIterator = new LineIterator(fileBuffer); for (long iNode = 0; iNode < nbIds && lineIterator.hasNext(); iNode++) { String swhId = lineIterator.next().toString(); long mphId = mphMap.getLong(swhId); long nodeId = LongBigArrays.get(bfsMap, mphId); { String paddedNodeId = String.format("%0" + NODE_ID_SIZE + "d", nodeId); String line = swhId + " " + paddedNodeId + "\n"; long lineIndex = iNode; - swhToNodeMapOut.writeLine(line, lineIndex); + swhToNodeMapOut.writeAtLine(line, lineIndex); } { String line = swhId + "\n"; long lineIndex = nodeId; - nodeToSwhMapOut.writeLine(line, lineIndex); + nodeToSwhMapOut.writeAtLine(line, lineIndex); } } swhToNodeMapOut.close(); nodeToSwhMapOut.close(); } - public void close() { + public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java index 11176ca..bcdc23a 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/SwhId.java +++ b/api/server/src/main/java/org/softwareheritage/graph/SwhId.java @@ -1,48 +1,49 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; public class SwhId { String swhId; String type; String hash; public SwhId(String swhId) { this.swhId = swhId; String[] parts = swhId.split(":"); if (parts.length != 4) { throw new IllegalArgumentException("Incorrect SWH ID format: " + swhId); } // SWH ID format: 'swh:1:type:hash' + // https://docs.softwareheritage.org/devel/swh-model/persistent-identifiers.html this.type = parts[2]; this.hash = parts[3]; } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (! (otherObj instanceof SwhId)) return false; SwhId other = (SwhId) otherObj; return swhId.equals(other.getSwhId()); } @Override public String toString() { return swhId; } @JsonValue public String getSwhId() { return swhId; } public String getType() { return type; } public String getHash() { return hash; } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java index f00d2a2..657bb90 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java +++ b/api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java @@ -1,68 +1,77 @@ package org.softwareheritage.graph.algo; import java.util.ArrayList; import java.util.Stack; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; import org.softwareheritage.graph.SwhPath; public class Visit { Graph graph; boolean isTransposed; String allowedEdges; Stack currentPath; ArrayList paths; LongArrayBitVector visited; public Visit(Graph graph, SwhId start, String allowedEdges, String algorithm, String direction) { + if (!algorithm.matches("dfs|bfs")) { + throw new IllegalArgumentException( + "Unknown traversal algorithm: " + algorithm + " (should be 'dfs' or 'bfs')"); + } + if (!direction.matches("forward|backward")) { + throw new IllegalArgumentException( + "Unknown direction: " + direction + " (should be 'forward' or 'backward')"); + } + this.graph = graph; - this.isTransposed = (direction == "backward"); + this.isTransposed = (direction.equals("backward")); this.allowedEdges = allowedEdges; this.paths = new ArrayList(); this.currentPath = new Stack(); this.visited = LongArrayBitVector.ofLength(graph.getNbNodes()); - if (algorithm == "dfs") { + if (algorithm.equals("dfs")) { dfs(graph.getNode(start)); } } // Allow Jackson JSON to only serialize the 'paths' field public ArrayList getPaths() { return paths; } private void dfs(long currentNode) { visited.set(currentNode); currentPath.push(currentNode); long degree = graph.degree(currentNode, isTransposed); LazyLongIterator neighbors = graph.neighbors(currentNode, isTransposed); if (degree == 0) { SwhPath path = new SwhPath(); for (long node : currentPath) { path.add(graph.getSwhId(node)); } paths.add(path); } while (degree-- > 0) { long nextNode = neighbors.nextLong(); if (isEdgeAllowed(currentNode, nextNode) && !visited.getBoolean(nextNode)) { dfs(nextNode); } } currentPath.pop(); } private boolean isEdgeAllowed(long currentNode, long nextNode) { // TODO return true; } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/utils/MMapInputFile.java b/api/server/src/main/java/org/softwareheritage/graph/utils/MMapInputFile.java index 428f33c..154c69b 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/utils/MMapInputFile.java +++ b/api/server/src/main/java/org/softwareheritage/graph/utils/MMapInputFile.java @@ -1,42 +1,36 @@ package org.softwareheritage.graph.utils; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferInputStream; public class MMapInputFile { ByteBufferInputStream bufferMap; int lineLength; - public MMapInputFile(String path, int lineLength) { + public MMapInputFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); - } catch (IOException e) { - System.out.println("Could not load MMapInputFile " + path + ": " + e); } } - public String readLine(long lineIndex) { + public String readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); String line = new String(buffer); return line.trim(); } - public void close() { - try { - bufferMap.close(); - } catch (IOException e) { - System.out.println("Could not close MMapInputFile: " + e); - } + public void close() throws IOException { + bufferMap.close(); } } diff --git a/api/server/src/main/java/org/softwareheritage/graph/utils/MMapOutputFile.java b/api/server/src/main/java/org/softwareheritage/graph/utils/MMapOutputFile.java index a220aa9..38828aa 100644 --- a/api/server/src/main/java/org/softwareheritage/graph/utils/MMapOutputFile.java +++ b/api/server/src/main/java/org/softwareheritage/graph/utils/MMapOutputFile.java @@ -1,41 +1,35 @@ package org.softwareheritage.graph.utils; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferOutputStream; public class MMapOutputFile { ByteBufferOutputStream bufferMap; int lineLength; - public MMapOutputFile(String path, int lineLength, long nbLines) { + public MMapOutputFile(String path, int lineLength, long nbLines) throws IOException { this.bufferMap = null; this.lineLength = lineLength; long totalSize = nbLines * lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "rw")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferOutputStream.map(fileChannel, totalSize, FileChannel.MapMode.READ_WRITE); - } catch (IOException e) { - System.out.println("Could not load MMapOutputFile " + path + ": " + e); } } - public void writeLine(String line, long lineIndex) { + public void writeAtLine(String line, long lineIndex) { long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.write(line.getBytes(), 0, lineLength); } - public void close() { - try { - bufferMap.close(); - } catch (IOException e) { - System.out.println("Could not close MMapOutputFile: " + e); - } + public void close() throws IOException { + bufferMap.close(); } }