diff --git a/java/server/src/main/java/org/softwareheritage/graph/App.java b/java/server/src/main/java/org/softwareheritage/graph/App.java index 0aeb17c..c6bb4fe 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/App.java +++ b/java/server/src/main/java/org/softwareheritage/graph/App.java @@ -1,199 +1,198 @@ package org.softwareheritage.graph; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.Switch; import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.algo.Stats; /** * Web framework of the swh-graph server REST API. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { - SimpleJSAP jsap = new SimpleJSAP( - App.class.getName(), + SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[] { - new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", - "Binding port of the server."), - new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - JSAP.NOT_GREEDY, "The basename of the compressed graph."), - new Switch("timings", 't', "timings", "Show timings in API result metadata."), - } - ); + new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", + "Binding port of the server."), + new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the compressed graph."), + new Switch("timings", 't', "timings", "Show timings in API result metadata."), + }); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ - private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { + private static void startServer(String graphPath, int port, boolean showTimings) + throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(src); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(src); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(src); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(src); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(src, dstFmt, algorithm); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the REST API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the REST API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map outputNoTimings = new HashMap<>(); outputNoTimings.put("result", output.result); return outputNoTimings; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java index a1f124d..0619b42 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Endpoint.java @@ -1,281 +1,282 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.SwhPath; import org.softwareheritage.graph.algo.Traversal; import org.softwareheritage.graph.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.algo.Traversal */ public class Endpoint { /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** - * Endpoint result metadata. - */ + * Endpoint result metadata. + */ public class Meta { /** Operations timings */ public Timings timings; public Meta() { this.timings = new Timings(); } /** - * Wrapper class for JSON output format. - */ + * Wrapper class for JSON output format. + */ public class Timings { /** Time in seconds to do the traversal */ public float traversal; /** Time in seconds to convert input SWH PID to node id */ public float pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ public float node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#leaves(long) */ public Output leaves(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) */ public Output neighbors(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#walk */ public Output walk(SwhPID src, String dstFmt, String algorithm) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, algorithm); output.meta.timings.traversal = Timing.stop(startTime); - } catch (IllegalArgumentException ignored2) { } + } catch (IllegalArgumentException ignored2) { + } } startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) */ public Output visitNodes(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param src source node of endpoint call specified as a {@link SwhPID} * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) */ public Output visitPaths(SwhPID src) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java index 43ba9c8..c3805bc 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java +++ b/java/server/src/main/java/org/softwareheritage/graph/Neighbors.java @@ -1,88 +1,88 @@ package org.softwareheritage.graph; import java.util.Iterator; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.AllowedEdges; import org.softwareheritage.graph.Graph; /** * Iterator class to go over a node neighbors in the graph. *

* Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.AllowedEdges */ public class Neighbors implements Iterable { /** Graph used to explore neighbors */ Graph graph; /** Boolean to specify the use of the transposed graph */ boolean useTransposed; /** Graph edge restriction */ AllowedEdges edges; /** Source node from which neighbors will be listed */ long srcNodeId; /** * Constructor. * * @param graph graph used to explore neighbors * @param useTransposed boolean value to use transposed graph * @param edges edges allowed to be used in the graph * @param srcNodeId source node from where to list neighbors */ public Neighbors(Graph graph, boolean useTransposed, AllowedEdges edges, long srcNodeId) { this.graph = graph; this.useTransposed = useTransposed; this.edges = edges; this.srcNodeId = srcNodeId; } @Override public Iterator iterator() { return new NeighborsIterator(); } /** - * Inner class for {@link Neighbors} iterator. - * - * @author Thibault Allançon - * @version 0.0.1 - * @since 0.0.1 - */ + * Inner class for {@link Neighbors} iterator. + * + * @author Thibault Allançon + * @version 0.0.1 + * @since 0.0.1 + */ public class NeighborsIterator implements Iterator { LazyLongIterator neighbors; long nextNeighborId; public NeighborsIterator() { this.neighbors = graph.neighbors(srcNodeId, useTransposed); this.nextNeighborId = -1; } public boolean hasNext() { // Case 1: no edge restriction, bypass type checks and skip to next neighbor if (edges.restrictedTo == null) { nextNeighborId = neighbors.nextLong(); return (nextNeighborId != -1); } // Case 2: edge restriction, look ahead for next neighbor while ((nextNeighborId = neighbors.nextLong()) != -1) { if (edges.isAllowed(srcNodeId, nextNeighborId)) { return true; } } return false; } public Long next() { return nextNeighborId; } } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java index 3a847c2..39c6604 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPID.java @@ -1,97 +1,100 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.Node; /** * A Software Heritage PID, see persistent * identifier documentation. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class SwhPID { /** Fixed hash length of the PID */ public static final int HASH_LENGTH = 40; /** Full PID as a string */ String swhPID; /** PID node type */ Node.Type type; /** PID hex-encoded SHA1 hash */ String hash; /** * Constructor. * * @param swhPID full PID as a string */ public SwhPID(String swhPID) { this.swhPID = swhPID; // PID format: 'swh:1:type:hash' String[] parts = swhPID.split(":"); if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { - throw new IllegalArgumentException("Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID); + throw new IllegalArgumentException( + "Expected SWH PID format to be 'swh:1:type:hash', got: " + swhPID); } this.type = Node.Type.fromStr(parts[2]); this.hash = parts[3]; if (!hash.matches("[0-9a-f]{" + HASH_LENGTH + "}")) { throw new IllegalArgumentException("Wrong SWH PID hash format in: " + swhPID); } } @Override public boolean equals(Object otherObj) { - if (otherObj == this) return true; - if (!(otherObj instanceof SwhPID)) return false; + if (otherObj == this) + return true; + if (!(otherObj instanceof SwhPID)) + return false; SwhPID other = (SwhPID) otherObj; return swhPID.equals(other.getSwhPID()); } @Override public int hashCode() { return swhPID.hashCode(); } @Override public String toString() { return swhPID; } /** * Returns full PID as a string. * * @return full PID string */ @JsonValue public String getSwhPID() { return swhPID; } /** * Returns PID node type. * * @return PID corresponding {@link Node.Type} * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType() { return type; } /** * Returns PID hex-encoded SHA1 hash. * * @return PID string hash */ public String getHash() { return hash; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java index fd3385d..0ad7518 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/server/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -1,124 +1,126 @@ package org.softwareheritage.graph; import java.util.ArrayList; import com.fasterxml.jackson.annotation.JsonValue; import org.softwareheritage.graph.SwhPID; /** * Wrapper class to store a list of {@link SwhPID}. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.SwhPID */ public class SwhPath { /** Internal list of {@link SwhPID} */ ArrayList path; /** * Constructor. */ public SwhPath() { this.path = new ArrayList(); } /** * Constructor. * * @param swhPIDs variable number of string PIDs to initialize this path with */ - public SwhPath(String ...swhPIDs) { + public SwhPath(String... swhPIDs) { this(); for (String swhPID : swhPIDs) { add(new SwhPID(swhPID)); } } /** * Constructor. * * @param swhPIDs variable number of {@link SwhPID} to initialize this path with * @see org.softwareheritage.graph.SwhPID */ - public SwhPath(SwhPID ...swhPIDs) { + public SwhPath(SwhPID... swhPIDs) { this(); for (SwhPID swhPID : swhPIDs) { add(swhPID); } } /** * Returns this path as a list of {@link SwhPID}. * * @return list of {@link SwhPID} constituting the path * @see org.softwareheritage.graph.SwhPID */ @JsonValue public ArrayList getPath() { return path; } /** * Adds a {@link SwhPID} to this path. * * @param {@link SwhPID} to add to this path * @see org.softwareheritage.graph.SwhPID */ public void add(SwhPID swhPID) { path.add(swhPID); } /** * Returns the {@link SwhPID} at the specified position in this path. * * @param index position of the {@link SwhPID} to return * @return {@link SwhPID} at the specified position * @see org.softwareheritage.graph.SwhPID */ public SwhPID get(int index) { return path.get(index); } /** * Returns the number of elements in this path. * * @return number of elements in this path */ public int size() { return path.size(); } @Override public boolean equals(Object otherObj) { - if (otherObj == this) return true; - if (!(otherObj instanceof SwhPath)) return false; + if (otherObj == this) + return true; + if (!(otherObj instanceof SwhPath)) + return false; SwhPath other = (SwhPath) otherObj; if (size() != other.size()) { return false; } for (int i = 0; i < size(); i++) { SwhPID thisSwhPID = get(i); SwhPID otherSwhPID = other.get(i); if (!thisSwhPID.equals(otherSwhPID)) { return false; } } return true; } @Override public String toString() { String str = new String(); for (SwhPID swhPID : path) { str += swhPID + "/"; } return str; } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java index 9620914..1e8f2ac 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/server/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java @@ -1,116 +1,117 @@ package org.softwareheritage.graph.backend; import java.io.IOException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.backend.MapFile; import org.softwareheritage.graph.backend.Setup; /** * Mapping between internal long node id and external SWH PID. *

* Mappings in both directions are pre-computed and dumped on disk in the {@link Setup} class, then * they are loaded here using mmap(). * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 * @see org.softwareheritage.graph.backend.Setup */ public class NodeIdMap { /** Fixed length of full SWH PID */ public static final int SWH_ID_LENGTH = 50; /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; /** Graph path and basename */ String graphPath; /** Number of ids to map */ long nbIds; /** mmap()-ed PID_TO_NODE file */ MapFile swhToNodeMap; /** mmap()-ed NODE_TO_PID file */ MapFile nodeToSwhMap; /** * Constructor. * * @param graphPath full graph path * @param nbNodes number of nodes in the graph */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; // +1 are for spaces and end of lines int swhToNodeLineLength = SWH_ID_LENGTH + 1 + NODE_ID_LENGTH + 1; int nodeToSwhLineLength = SWH_ID_LENGTH + 1; this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, swhToNodeLineLength); this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, nodeToSwhLineLength); } /** * Converts SWH PID to corresponding long node id. * * @param swhPID node represented as a {@link SwhPID} * @return corresponding node as a long id * @see org.softwareheritage.graph.SwhPID */ public long getNodeId(SwhPID swhPID) { // Each line in PID_TO_NODE is formatted as: swhPID nodeId - // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding nodeId + // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding + // nodeId long start = 0; long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; String[] parts = swhToNodeMap.readAtLine(lineNumber).split(" "); if (parts.length != 2) { break; } String currentSwhPID = parts[0]; long currentNodeId = Long.parseLong(parts[1]); int cmp = currentSwhPID.compareTo(swhPID.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); } /** * Converts a node long id to corresponding SWH PID. * * @param nodeId node as a long id * @return corresponding node as a {@link SwhPID} * @see org.softwareheritage.graph.SwhPID */ public SwhPID getSwhPID(long nodeId) { // Each line in NODE_TO_PID is formatted as: swhPID // The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the // nodeId-th line to get corresponding swhPID if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } String swhPID = nodeToSwhMap.readAtLine(nodeId); return new SwhPID(swhPID); } /** * Closes the mapping files. */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java index bd45232..4775a13 100644 --- a/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java +++ b/java/server/src/main/java/org/softwareheritage/graph/benchmark/LinuxLog.java @@ -1,47 +1,46 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhId; /** * Linux git log experiment to benchmark graph traversal. *

* The goal is to do the equivalent of a {@code git log} in the Linux kernel by following revisions * nodes. * * @author Thibault Allançon * @version 0.0.1 * @since 0.0.1 */ public class LinuxLog { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { String path = args[0]; Graph graph = new Graph(path); // A linux kernel commit on Sun Dec 31 final SwhId commit = new SwhId("swh:1:rev:f39d7d78b70e0f39facb1e4fab77ad3df5c52a35"); final long expectedCount = 723640; System.out.println("git log " + commit); System.out.println("Expecting " + expectedCount + " commits"); long startTime = System.nanoTime(); Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev"); long count = endpoint.visitNodes(commit).size(); if (count != expectedCount) { throw new IllegalArgumentException("Counted " + count + " commits"); } long endTime = System.nanoTime(); double duration = (double) (endTime - startTime) / 1_000_000_000; System.out.println("Visit operation done in: " + duration + " seconds"); - } }