diff --git a/java/README.md b/java/README.md index 8618fa9..cffc06a 100644 --- a/java/README.md +++ b/java/README.md @@ -1,50 +1,50 @@ Graph service - Java backend ============================ Server side Java REST API. Build ----- ```bash $ mvn compile assembly:single ``` Start REST API -------------- ```bash $ java -cp target/swh-graph-*.jar \ - org.softwareheritage.graph.App \ + org.softwareheritage.graph.server.App \ ``` Default port is 5009 (use the `--port` option to change port number). If you need timings metadata send back to the client in addition to the result, use the `--timings` flag. Tests ----- Unit tests rely on test data that are already available in the Git repository (under `src/swh/graph/tests/dataset/`). You generally only need to run them using Maven: ```bash $ mvn test ``` In case you want to regenerate the test data: ```bash # Graph compression $ cd src/swh/graph/tests/dataset $ ./generate_graph.sh $ cd ../../../.. $ mvn compile assembly:single # Dump mapping files $ java -cp target/swh-graph-*.jar \ - org.softwareheritage.graph.backend.MapBuilder \ + org.softwareheritage.graph.maps.MapBuilder \ src/swh/graph/tests/dataset/example.nodes.csv.gz \ src/swh/graph/tests/dataset/output/example ``` diff --git a/java/pom.xml b/java/pom.xml index 06c4b33..c0ec690 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,225 +1,225 @@ 4.0.0 org.softwareheritage.graph swh-graph ${git.closest.tag.name} swh-graph https://forge.softwareheritage.org/source/swh-graph/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 junit junit 4.11 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.6.0 it.unimi.dsi fastutil 8.4.1 it.unimi.dsi law 2.6.2 org.apache.hadoop hadoop-common org.umlgraph umlgraph org.eclipse.jetty.aggregate jetty-all it.unimi.di mg4j it.unimi.di mg4j-big com.martiansoftware jsap 2.1 net.sf.py4j py4j 0.10.8.1 commons-codec commons-codec 1.11 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 11 11 -verbose -Xlint:all maven-surefire-plugin 2.22.1 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin 3.3.0 - org.softwareheritage.graph.App + org.softwareheritage.graph.server.App jar-with-dependencies false make-assembly package single pl.project13.maven git-commit-id-plugin 3.0.1 get-the-git-infos revision initialize true true true true v* git.closest.tag.name ^v true org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java index dfe2d9b..b496687 100644 --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -1,196 +1,192 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; -import org.softwareheritage.graph.algo.NodeIdConsumer; - -import org.softwareheritage.graph.algo.Stats; -import org.softwareheritage.graph.algo.Traversal; public class Entry { private Graph graph; private final long PATH_SEPARATOR_ID = -1; public void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); } public Graph get_graph() { return graph.copy(); } public String stats() { try { Stats stats = new Stats(graph.getPath()); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); return objectMapper.writeValueAsString(stats); } catch (IOException e) { throw new RuntimeException("Cannot read stats: " + e); } } private interface NodeCountVisitor { - void accept(long nodeId, NodeIdConsumer consumer); + void accept(long nodeId, Traversal.NodeIdConsumer consumer); } private int count_visitor(NodeCountVisitor f, long srcNodeId) { - int count[] = { 0 }; + int[] count = { 0 }; f.accept(srcNodeId, (node) -> { count[0]++; }); return count[0]; } public int count_leaves(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::leavesVisitor, srcNodeId); } public int count_neighbors(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::neighborsVisitor, srcNodeId); } public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::visitNodesVisitor, srcNodeId); } public QueryHandler get_handler(String clientFIFO) { return new QueryHandler(this.graph.copy(), clientFIFO); } public class QueryHandler { Graph graph; DataOutputStream out; String clientFIFO; public QueryHandler(Graph graph, String clientFIFO) { this.graph = graph; this.clientFIFO = clientFIFO; this.out = null; } public void writeNode(long nodeId) { try { out.writeLong(nodeId); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void writeEdge(long srcId, long dstId) { writeNode(srcId); writeNode(dstId); } public void writePath(ArrayList path) { for (Long nodeId : path) { writeNode(nodeId); } writeNode(PATH_SEPARATOR_ID); } public void open() { try { FileOutputStream file = new FileOutputStream(this.clientFIFO); this.out = new DataOutputStream(file); } catch (IOException e) { throw new RuntimeException("Cannot open client FIFO: " + e); } } public void close() { try { out.close(); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void leaves(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.leavesVisitor(srcNodeId, this::writeNode); close(); } public void neighbors(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.neighborsVisitor(srcNodeId, this::writeNode); close(); } public void visit_nodes(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitNodesVisitor(srcNodeId, this::writeNode); close(); } public void visit_edges(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitNodesVisitor(srcNodeId, null, this::writeEdge); close(); } public void visit_paths(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitPathsVisitor(srcNodeId, this::writePath); close(); } public void walk(String direction, String edgesFmt, String algorithm, long srcNodeId, long dstNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) { writeNode(nodeId); } close(); } public void walk_type(String direction, String edgesFmt, String algorithm, long srcNodeId, String dst) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) { writeNode(nodeId); } close(); } public void random_walk(String direction, String edgesFmt, int retries, long srcNodeId, long dstNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) { writeNode(nodeId); } close(); } public void random_walk_type(String direction, String edgesFmt, int retries, long srcNodeId, String dst) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) { writeNode(nodeId); } close(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java index db9a26f..2fce7cb 100644 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,218 +1,253 @@ package org.softwareheritage.graph; import java.io.IOException; -import it.unimi.dsi.big.webgraph.BVGraph; -import it.unimi.dsi.big.webgraph.ImmutableGraph; -import it.unimi.dsi.big.webgraph.Transform; -import it.unimi.dsi.lang.FlyweightPrototype; -import it.unimi.dsi.big.webgraph.LazyLongIterator; +import it.unimi.dsi.big.webgraph.*; -import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.backend.NodeIdMap; -import org.softwareheritage.graph.backend.NodeTypesMap; +import org.softwareheritage.graph.maps.NodeIdMap; +import org.softwareheritage.graph.maps.NodeTypesMap; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent * identifiers (PID) while WebGraph uses integers internally. These two mappings (long id ↔ * PID) are used for the input (users refer to the graph using PID) and the output (convert back to * PID for users results). However, since graph traversal can be restricted depending on the node * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full * PID lookup. * * @author The Software Heritage developers * @see org.softwareheritage.graph.AllowedEdges - * @see org.softwareheritage.graph.backend.NodeIdMap - * @see org.softwareheritage.graph.backend.NodeTypesMap + * @see org.softwareheritage.graph.maps.NodeIdMap + * @see org.softwareheritage.graph.maps.NodeTypesMap */ public class Graph extends ImmutableGraph { /** File extension for the SWH PID to long node id map */ public static final String PID_TO_NODE = ".pid2node.bin"; /** File extension for the long node id to SWH PID map */ public static final String NODE_TO_PID = ".node2pid.bin"; /** File extension for the long node id to node type map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ ImmutableGraph graph; /** Transposed compressed graph (used for backward traversals) */ ImmutableGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWH PIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { + this.path = path; this.graph = BVGraph.loadMapped(path); this.graphTransposed = BVGraph.loadMapped(path + "-transposed"); - this.path = path; this.nodeTypesMap = new NodeTypesMap(path); - - // TODO: we don't need to load the nodeIdMap now that all the - // translation between ints and PIDs is happening on the Python side. - // However, some parts of the code still depend on this, so it's - // commented out while we decide on what to do with it. - this.nodeIdMap = null; // new NodeIdMap(path, numNodes()); + this.nodeIdMap = new NodeIdMap(path, numNodes()); } - // Protected empty constructor to implement copy() - protected Graph() { } - - protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { + protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, + String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { this.graph = graph; this.graphTransposed = graphTransposed; this.path = path; this.nodeIdMap = nodeIdMap; this.nodeTypesMap = nodeTypesMap; } /** * Return a flyweight copy of the graph. */ @Override public Graph copy() { return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); } + @Override + public boolean randomAccess() { + return graph.randomAccess() && graphTransposed.randomAccess(); + } + + /** + * Return a transposed version of the graph. + */ public Graph transpose() { - return new Graph(this.graphTransposed.copy(), this.graph.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); + return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap); } + /** + * Return a symmetric version of the graph. + */ public Graph symmetrize() { ImmutableGraph symmetric = Transform.union(graph, graphTransposed); return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } - - /** - * Returns the graph full path. - * - * @return graph full path - */ - public String getPath() { - return path; - } - - /** - * Converts {@link SwhPID} node to long. - * - * @param swhPID node specified as a {@link SwhPID} - * @return internal long node id - * @see org.softwareheritage.graph.SwhPID - */ - public long getNodeId(SwhPID swhPID) { - return nodeIdMap.getNodeId(swhPID); - } - - /** - * Converts long id node to {@link SwhPID}. - * - * @param nodeId node specified as a long id - * @return external SWH PID - * @see org.softwareheritage.graph.SwhPID - */ - public SwhPID getSwhPID(long nodeId) { - return nodeIdMap.getSwhPID(nodeId); - } - - /** - * Returns node type. - * - * @param nodeId node specified as a long id - * @return corresponding node type - * @see org.softwareheritage.graph.Node.Type - */ - public Node.Type getNodeType(long nodeId) { - return nodeTypesMap.getType(nodeId); - } - - public boolean randomAccess() { return graph.randomAccess() && graphTransposed.randomAccess(); } - /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ + @Override public long numNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ + @Override public long numArcs() { return graph.numArcs(); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator */ + @Override public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } + /** + * Returns lazy iterator of successors of a node while following a specific set of edge types. + * + * @param nodeId node specified as a long id + * @param allowedEdges the specification of which edges can be traversed + * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + */ + public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) { + if (allowedEdges.restrictedTo == null) { + // All edges are allowed, bypass edge check + return this.successors(nodeId); + } else { + LazyLongIterator allSuccessors = this.successors(nodeId); + return new LazyLongIterator() { + @Override + public long nextLong() { + long neighbor; + while ((neighbor = allSuccessors.nextLong()) != -1) { + if (allowedEdges.isAllowed(nodeId, neighbor)) { + return neighbor; + } + } + return -1; + } + + @Override + public long skip(final long n) { + long i; + for (i = 0; i < n && nextLong() != -1; i++); + return i; + } + }; + } + } + /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ + @Override public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { - return graphTransposed.successors(nodeId); + return this.transpose().successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { - return graphTransposed.outdegree(nodeId); + return this.transpose().outdegree(nodeId); } /** * Returns the underlying BVGraph. * * @return WebGraph BVGraph */ public ImmutableGraph getGraph() { return this.graph; } + + /** + * Returns the graph full path. + * + * @return graph full path + */ + public String getPath() { + return path; + } + + /** + * Converts {@link SwhPID} node to long. + * + * @param swhPID node specified as a {@link SwhPID} + * @return internal long node id + * @see org.softwareheritage.graph.SwhPID + */ + public long getNodeId(SwhPID swhPID) { + return nodeIdMap.getNodeId(swhPID); + } + + /** + * Converts long id node to {@link SwhPID}. + * + * @param nodeId node specified as a long id + * @return external SWH PID + * @see org.softwareheritage.graph.SwhPID + */ + public SwhPID getSwhPID(long nodeId) { + return nodeIdMap.getSwhPID(nodeId); + } + + /** + * Returns node type. + * + * @param nodeId node specified as a long id + * @return corresponding node type + * @see org.softwareheritage.graph.Node.Type + */ + public Node.Type getNodeType(long nodeId) { + return nodeTypesMap.getType(nodeId); + } } diff --git a/java/src/main/java/org/softwareheritage/graph/Neighbors.java b/java/src/main/java/org/softwareheritage/graph/Neighbors.java deleted file mode 100644 index d7514a1..0000000 --- a/java/src/main/java/org/softwareheritage/graph/Neighbors.java +++ /dev/null @@ -1,80 +0,0 @@ -package org.softwareheritage.graph; - -import java.util.Iterator; - -import it.unimi.dsi.big.webgraph.LazyLongIterator; - -import org.softwareheritage.graph.AllowedEdges; -import org.softwareheritage.graph.Graph; - -/** - * Iterator class to go over a node neighbors in the graph. - *

- * Wrapper iterator class to easily deal with {@link AllowedEdges} during traversals. - * - * @author The Software Heritage developers - * @see org.softwareheritage.graph.AllowedEdges - */ - -public class Neighbors implements Iterable { - /** Graph used to explore neighbors */ - Graph graph; - /** Graph edge restriction */ - AllowedEdges edges; - /** Source node from which neighbors will be listed */ - long srcNodeId; - - /** - * Constructor. - * - * @param graph graph used to explore neighbors - * @param edges edges allowed to be used in the graph - * @param srcNodeId source node from where to list neighbors - */ - public Neighbors(Graph graph, AllowedEdges edges, long srcNodeId) { - this.graph = graph; - this.edges = edges; - this.srcNodeId = srcNodeId; - } - - @Override - public Iterator iterator() { - return new NeighborsIterator(); - } - - /** - * Inner class for {@link Neighbors} iterator. - * - * @author The Software Heritage developers - */ - - public class NeighborsIterator implements Iterator { - LazyLongIterator neighbors; - long nextNeighborId; - - public NeighborsIterator() { - this.neighbors = graph.successors(srcNodeId); - this.nextNeighborId = -1; - } - - public boolean hasNext() { - // Case 1: no edge restriction, bypass type checks and skip to next neighbor - if (edges.restrictedTo == null) { - nextNeighborId = neighbors.nextLong(); - return (nextNeighborId != -1); - } - - // Case 2: edge restriction, look ahead for next neighbor - while ((nextNeighborId = neighbors.nextLong()) != -1) { - if (edges.isAllowed(srcNodeId, nextNeighborId)) { - return true; - } - } - return false; - } - - public Long next() { - return nextNeighborId; - } - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/algo/Stats.java b/java/src/main/java/org/softwareheritage/graph/Stats.java similarity index 94% rename from java/src/main/java/org/softwareheritage/graph/algo/Stats.java rename to java/src/main/java/org/softwareheritage/graph/Stats.java index 8a0e708..b7200d6 100644 --- a/java/src/main/java/org/softwareheritage/graph/algo/Stats.java +++ b/java/src/main/java/org/softwareheritage/graph/Stats.java @@ -1,68 +1,68 @@ -package org.softwareheritage.graph.algo; +package org.softwareheritage.graph; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; /** * Statistics on the compressed graph. *

* These statistics are not computed but directly read from WebGraph generated .stats and .properties files. * * @author The Software Heritage developers */ public class Stats { - public class Counts { + public static class Counts { public long nodes; public long edges; } - public class Ratios { + public static class Ratios { public double compression; public double bitsPerNode; public double bitsPerEdge; public double avgLocality; } - public class Degree { + public static class Degree { public long min; public long max; public double avg; } public Counts counts; public Ratios ratios; public Degree indegree; public Degree outdegree; /** * Constructor. * * @param graphPath path and basename of compressed graph */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(graphPath + ".properties")); properties.load(new FileInputStream(graphPath + ".stats")); this.counts = new Counts(); this.ratios = new Ratios(); this.indegree = new Degree(); this.outdegree = new Degree(); this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); this.counts.edges = Long.parseLong(properties.getProperty("arcs")); this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); } } diff --git a/java/src/main/java/org/softwareheritage/graph/SwhPath.java b/java/src/main/java/org/softwareheritage/graph/SwhPath.java index 83772a2..e0d1501 100644 --- a/java/src/main/java/org/softwareheritage/graph/SwhPath.java +++ b/java/src/main/java/org/softwareheritage/graph/SwhPath.java @@ -1,124 +1,122 @@ package org.softwareheritage.graph; -import java.util.ArrayList; - import com.fasterxml.jackson.annotation.JsonValue; -import org.softwareheritage.graph.SwhPID; +import java.util.ArrayList; /** * Wrapper class to store a list of {@link SwhPID}. * * @author The Software Heritage developers * @see org.softwareheritage.graph.SwhPID */ public class SwhPath { /** Internal list of {@link SwhPID} */ ArrayList path; /** * Constructor. */ public SwhPath() { - this.path = new ArrayList(); + this.path = new ArrayList<>(); } /** * Constructor. * * @param swhPIDs variable number of string PIDs to initialize this path with */ public SwhPath(String... swhPIDs) { this(); for (String swhPID : swhPIDs) { add(new SwhPID(swhPID)); } } /** * Constructor. * * @param swhPIDs variable number of {@link SwhPID} to initialize this path with * @see org.softwareheritage.graph.SwhPID */ public SwhPath(SwhPID... swhPIDs) { this(); for (SwhPID swhPID : swhPIDs) { add(swhPID); } } /** * Returns this path as a list of {@link SwhPID}. * * @return list of {@link SwhPID} constituting the path * @see org.softwareheritage.graph.SwhPID */ @JsonValue public ArrayList getPath() { return path; } /** * Adds a {@link SwhPID} to this path. * * @param swhPID {@link SwhPID} to add to this path * @see org.softwareheritage.graph.SwhPID */ public void add(SwhPID swhPID) { path.add(swhPID); } /** * Returns the {@link SwhPID} at the specified position in this path. * * @param index position of the {@link SwhPID} to return * @return {@link SwhPID} at the specified position * @see org.softwareheritage.graph.SwhPID */ public SwhPID get(int index) { return path.get(index); } /** * Returns the number of elements in this path. * * @return number of elements in this path */ public int size() { return path.size(); } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SwhPath)) return false; SwhPath other = (SwhPath) otherObj; if (size() != other.size()) { return false; } for (int i = 0; i < size(); i++) { SwhPID thisSwhPID = get(i); SwhPID otherSwhPID = other.get(i); if (!thisSwhPID.equals(otherSwhPID)) { return false; } } return true; } @Override public String toString() { - String str = new String(); + StringBuilder str = new StringBuilder(); for (SwhPID swhPID : path) { - str += swhPID + "/"; + str.append(swhPID).append("/"); } - return str; + return str.toString(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java similarity index 86% rename from java/src/main/java/org/softwareheritage/graph/algo/Traversal.java rename to java/src/main/java/org/softwareheritage/graph/Traversal.java index 0c8a400..421c02e 100644 --- a/java/src/main/java/org/softwareheritage/graph/algo/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java @@ -1,495 +1,523 @@ -package org.softwareheritage.graph.algo; +package org.softwareheritage.graph; import java.util.*; +import java.util.function.Consumer; +import java.util.function.LongConsumer; -import org.softwareheritage.graph.AllowedEdges; -import org.softwareheritage.graph.Endpoint; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.Neighbors; -import org.softwareheritage.graph.Node; +import it.unimi.dsi.big.webgraph.LazyLongIterator; +import org.softwareheritage.graph.server.Endpoint; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to Software Heritage * PID. * * @author The Software Heritage developers - * @see org.softwareheritage.graph.Endpoint + * @see Endpoint */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Graph edge restriction */ AllowedEdges edges; /** Hash set storing if we have visited a node */ HashSet visited; /** Hash map storing parent node id for each nodes during a traversal */ Map parentNode; /** Number of edges accessed during traversal */ long nbEdgesAccessed; /** random number generator, for random walks */ Random rng; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } if (direction.equals("backward")) { this.graph = graph.transpose(); } else { this.graph = graph; } this.edges = new AllowedEdges(graph, edgesFmt); this.visited = new HashSet<>(); this.parentNode = new HashMap<>(); this.nbEdgesAccessed = 0; this.rng = new Random(); } /** * Returns number of accessed edges during traversal. * * @return number of edges accessed in last traversal */ public long getNbEdgesAccessed() { return nbEdgesAccessed; } /** * Returns number of accessed nodes during traversal. * * @return number of nodes accessed in last traversal */ public long getNbNodesAccessed() { return this.visited.size(); } /** * Push version of {@link #leaves} will fire passed callback for each leaf. */ public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, currentNodeId)) { + LazyLongIterator it = graph.successors(currentNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { neighborsCnt++; if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } if (neighborsCnt == 0) { cb.accept(currentNodeId); } } } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); leavesVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** * Push version of {@link #neighbors}: will fire passed callback on each * neighbor. */ public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) { this.nbEdgesAccessed = graph.outdegree(srcNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, srcNodeId)) { + LazyLongIterator it = graph.successors(srcNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { cb.accept(neighborNodeId); } } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); neighborsVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** * Push version of {@link #visitNodes}: will fire passed callback on each * visited node. */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (nodeCb != null) { nodeCb.accept(currentNodeId); } nbEdgesAccessed += graph.outdegree(currentNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, currentNodeId)) { + LazyLongIterator it = graph.successors(currentNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (edgeCb != null) { edgeCb.accept(currentNodeId, neighborNodeId); } if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } } /** One-argument version to handle callbacks properly */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { visitNodesVisitor(srcNodeId, cb, null); } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); visitNodesVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** * Push version of {@link #visitPaths}: will fire passed callback on each * discovered (complete) path. */ public void visitPathsVisitor(long srcNodeId, PathConsumer cb) { Stack currentPath = new Stack<>(); this.nbEdgesAccessed = 0; visitPathsInternalVisitor(srcNodeId, currentPath, cb); } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); visitPathsVisitor(srcNodeId, paths::add); return paths; } private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) { currentPath.push(currentNodeId); long visitedNeighbors = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, currentNodeId)) { + LazyLongIterator it = graph.successors(currentNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { visitPathsInternalVisitor(neighborNodeId, currentPath, cb); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList<>(currentPath); cb.accept(path); } currentPath.pop(); } /** * Performs a graph traversal with backtracking, and returns the first * found path from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String visitOrder) { long dstNodeId; if (visitOrder.equals("dfs")) { dstNodeId = walkInternalDFS(srcNodeId, dst); } else if (visitOrder.equals("bfs")) { dstNodeId = walkInternalBFS(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown visit order: " + visitOrder); } if (dstNodeId == -1) { throw new IllegalArgumentException("Cannot find destination: " + dst); } return backtracking(srcNodeId, dstNodeId); } /** * Performs a random walk (picking a random successor at each step) from * source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids or an empty path to indicate * that no suitable path have been found */ public ArrayList randomWalk(long srcNodeId, T dst) { return randomWalk(srcNodeId, dst, 0); } /** * Performs a stubborn random walk (picking a random successor at each * step) from source to destination. The walk is "stubborn" in the sense * that it will not give up the first time if a satisfying target node is * found, but it will retry up to a limited amount of times. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @param retries number of times to retry; 0 means no retries (single walk) * @return found path as a list of node ids or an empty path to indicate * that no suitable path have been found */ public ArrayList randomWalk(long srcNodeId, T dst, int retries) { long curNodeId = srcNodeId; ArrayList path = new ArrayList<>(); this.nbEdgesAccessed = 0; boolean found; if (retries < 0) { throw new IllegalArgumentException("Negative number of retries given: " + retries); } while (true) { path.add(curNodeId); - Neighbors neighbors = new Neighbors(graph, edges, curNodeId); - curNodeId = randomPick(neighbors.iterator()); + LazyLongIterator successors = graph.successors(curNodeId, edges); + curNodeId = randomPick(successors); if (curNodeId < 0) { found = false; break; } if (isDstNode(curNodeId, dst)) { path.add(curNodeId); found = true; break; } } if (found) { return path; } else if (retries > 0) { // try again return randomWalk(srcNodeId, dst, retries - 1); } else { // not found and no retries left path.clear(); return path; } } /** * Randomly choose an element from an iterator over Longs using reservoir * sampling * * @param elements iterator over selection domain * @return randomly chosen element or -1 if no suitable element was found */ - private long randomPick(Iterator elements) { + private long randomPick(LazyLongIterator elements) { long curPick = -1; long seenCandidates = 0; - while (elements.hasNext()) { + for (long element; (element = elements.nextLong()) != -1;) { seenCandidates++; if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) { - curPick = elements.next(); + curPick = element; } } return curPick; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDFS(long srcNodeId, T dst) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, currentNodeId)) { + LazyLongIterator it = graph.successors(currentNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBFS(long srcNodeId, T dst) { Queue queue = new LinkedList<>(); this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.add(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); - for (long neighborNodeId : new Neighbors(graph, edges, currentNodeId)) { + LazyLongIterator it = graph.successors(currentNodeId, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { queue.add(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList<>(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = parentNode.get(currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } /** * Find a common descendant between two given nodes using two parallel BFS * * @param lhsNode the first node * @param rhsNode the second node * @return the found path, as a list of node ids */ public Long findCommonDescendant(long lhsNode, long rhsNode) { Queue lhsStack = new ArrayDeque<>(); Queue rhsStack = new ArrayDeque<>(); HashSet lhsVisited = new HashSet<>(); HashSet rhsVisited = new HashSet<>(); lhsStack.add(lhsNode); rhsStack.add(rhsNode); lhsVisited.add(lhsNode); rhsVisited.add(rhsNode); this.nbEdgesAccessed = 0; Long curNode; while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) { if (!lhsStack.isEmpty()) { curNode = lhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); - for (long neighborNodeId : new Neighbors(graph, edges, curNode)) { + LazyLongIterator it = graph.successors(curNode, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!lhsVisited.contains(neighborNodeId)) { if (rhsVisited.contains(neighborNodeId)) return neighborNodeId; lhsStack.add(neighborNodeId); lhsVisited.add(neighborNodeId); } } } if (!rhsStack.isEmpty()) { curNode = rhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); - for (long neighborNodeId : new Neighbors(graph, edges, curNode)) { + LazyLongIterator it = graph.successors(curNode, edges); + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!rhsVisited.contains(neighborNodeId)) { if (lhsVisited.contains(neighborNodeId)) return neighborNodeId; rhsStack.add(neighborNodeId); rhsVisited.add(neighborNodeId); } } } } return null; } + + public interface NodeIdConsumer extends LongConsumer { + /** Callback for incrementally receiving node identifiers during a graph + * visit. + */ + void accept(long nodeId); + } + + public interface EdgeIdConsumer { + /** Callback for incrementally receiving edge identifiers during a graph + * visit. + */ + void accept(long srcId, long dstId); + } + + public interface PathConsumer extends Consumer> { + /** Callback for incrementally receiving node paths (made of node + * identifiers) during a graph visit. + */ + void accept(ArrayList path); + } } diff --git a/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java b/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java deleted file mode 100644 index 7c82f0b..0000000 --- a/java/src/main/java/org/softwareheritage/graph/algo/EdgeIdConsumer.java +++ /dev/null @@ -1,9 +0,0 @@ -package org.softwareheritage.graph.algo; - -public interface EdgeIdConsumer { - - /** Callback for incrementally receiving edge identifiers during a graph - * visit. - */ - void accept(long srcId, long dstId); -} diff --git a/java/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java b/java/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java deleted file mode 100644 index fdcd6f1..0000000 --- a/java/src/main/java/org/softwareheritage/graph/algo/NodeIdConsumer.java +++ /dev/null @@ -1,11 +0,0 @@ -package org.softwareheritage.graph.algo; - -import java.util.function.LongConsumer; - -public interface NodeIdConsumer extends LongConsumer { - - /** Callback for incrementally receiving node identifiers during a graph - * visit. - */ - void accept(long nodeId); -} diff --git a/java/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java b/java/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java deleted file mode 100644 index 50d5b15..0000000 --- a/java/src/main/java/org/softwareheritage/graph/algo/PathConsumer.java +++ /dev/null @@ -1,12 +0,0 @@ -package org.softwareheritage.graph.algo; - -import java.util.ArrayList; -import java.util.function.Consumer; - -public interface PathConsumer extends Consumer> { - - /** Callback for incrementally receiving node paths (made of node - * identifiers) during a graph visit. - */ - void accept(ArrayList path); -} diff --git a/java/src/main/java/org/softwareheritage/graph/backend/Pp.java b/java/src/main/java/org/softwareheritage/graph/backend/Pp.java deleted file mode 100644 index af495a7..0000000 --- a/java/src/main/java/org/softwareheritage/graph/backend/Pp.java +++ /dev/null @@ -1,23 +0,0 @@ -package org.softwareheritage.graph.backend; - -import java.io.IOException; -import it.unimi.dsi.fastutil.Size64; -import it.unimi.dsi.fastutil.io.BinIO; -import it.unimi.dsi.fastutil.objects.Object2LongFunction; - -public class Pp { - @SuppressWarnings("unchecked") - public static void main(String[] args) throws IOException { - - Object2LongFunction mphMap = null; - try { - mphMap = (Object2LongFunction) BinIO.loadObject("all.mph"); - } catch (ClassNotFoundException e) { - throw new IllegalArgumentException("The .mph file contains unknown class object: " + e); - } - - long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); - - System.out.println("mph size: " + nbIds); - } -} diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java index bdf51b6..005d3f1 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,170 +1,170 @@ package org.softwareheritage.graph.benchmark; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.StringJoiner; import java.util.function.Function; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; -import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.server.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; /** * Benchmark common utility functions. * * @author The Software Heritage developers */ public class Benchmark { /** * Input arguments. */ public class Args { /** Basename of the compressed graph */ public String graphPath; /** Number of random nodes to use for the benchmark */ public int nbNodes; /** File name for CSV format benchmark log */ public String logFile; /** Random generator */ public Random random; } /** Command line arguments */ public Args args; /** CSV separator for log file */ final String CSV_SEPARATOR = ";"; /** * Constructor. */ public Benchmark() { this.args = new Args(); } /** * Parses benchmark command line arguments. * * @param args command line arguments */ public void parseCommandLineArgs(String[] args) throws JSAPException { SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), "Benchmark tool for Software Heritage use-cases scenarios.", new Parameter[] { new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nb-nodes", "Number of random nodes used to do the benchmark."), new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log-file", "File name to output CSV format benchmark log."), new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed", "Random generator seed."), }); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } this.args.graphPath = config.getString("graphPath"); this.args.nbNodes = config.getInt("nbNodes"); this.args.logFile = config.getString("logFile"); this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } /** * Creates CSV file for log output. */ public void createCSVLogFile() throws IOException { try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); csvHeader.add("use case name") .add("SWH PID") .add("number of edges accessed") .add("traversal timing") .add("pid2node timing") .add("node2pid timing"); csvLog.write(csvHeader.toString() + "\n"); } } /** * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. * * @param useCaseName benchmark use-case name * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark * @param dstFmt destination formatted string as described in the API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, Function operation, String dstFmt, String algorithm) throws IOException { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); ArrayList nbEdgesAccessed = new ArrayList<>(); final boolean append = true; try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { for (long nodeId : nodeIds) { SwhPID swhPID = graph.getSwhPID(nodeId); Endpoint.Output output = (dstFmt == null) ? operation.apply(new Endpoint.Input(swhPID)) : operation.apply(new Endpoint.Input(swhPID, dstFmt, algorithm)); StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); csvLine.add(useCaseName) .add(swhPID.toString()) .add(Long.toString(output.meta.nbEdgesAccessed)) .add(Double.toString(output.meta.timings.traversal)) .add(Double.toString(output.meta.timings.pid2node)) .add(Double.toString(output.meta.timings.node2pid)); csvLog.write(csvLine.toString() + "\n"); timings.add(output.meta.timings.traversal); nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } } System.out.println("\n" + useCaseName + " use-case:"); System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); System.out.println("nb edges accessed:"); Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); statsNbEdgesAccessed.printAll(); } /** * Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call. */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, Function operation) throws IOException { timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 188685f..b060d55 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,46 +1,45 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.server.Endpoint; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage browsing * use-cases scenarios. * * @author The Software Heritage developers */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java index f676607..d4ccdff 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/GenDistribution.java @@ -1,134 +1,134 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.algo.Traversal; +import org.softwareheritage.graph.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.Scanner; import java.util.concurrent.*; public class GenDistribution { private Graph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP( GenDistribution.class.getName(), "", new Parameter[] { new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', "srctype", "Source node type"), new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd', "dsttype", "Destination node type"), new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', "edges", "Edges constraints"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't', "numthreads", "Number of threads"), } ); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); Node.Type srcType = Node.Type.fromStr(config.getString("srcType")); Node.Type dstType = Node.Type.fromStr(config.getString("dstType")); String edgesFmt = config.getString("edgesFmt"); int numThreads = config.getInt("numThreads"); GenDistribution tp = new GenDistribution(); try { tp.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long node = input.nextLong(); if (tp.graph.getNodeType(node) == srcType) { queue.put(node); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { Graph thread_graph = tp.graph.copy(); long startTime; double totalTime; while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } Traversal t = new Traversal(thread_graph, "backward", edgesFmt); int[] count = { 0 }; startTime = Timing.start(); t.visitNodesVisitor(node, (curnode) -> { if (tp.graph.getNodeType(curnode) == dstType) { count[0]++; } }); totalTime = Timing.stop(startTime); System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(), t.getNbEdgesAccessed(), totalTime ); } }); } service.shutdown(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index 613c2b5..f8aec35 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,53 +1,52 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.server.Endpoint; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage provenance * use-cases scenarios. * * @author The Software Heritage developers */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint( "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); bench.timeEndpoint( "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); bench.timeEndpoint( "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); bench.timeEndpoint( "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); bench.timeEndpoint( "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); bench.timeEndpoint( "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index 3e43a2a..c42b96a 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,39 +1,38 @@ package org.softwareheritage.graph.benchmark; import java.io.IOException; import com.martiansoftware.jsap.JSAPException; -import org.softwareheritage.graph.Endpoint; +import org.softwareheritage.graph.server.Endpoint; import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.benchmark.Benchmark; /** * Benchmark Software Heritage vault * use-case scenario. * * @author The Software Heritage developers */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java similarity index 98% rename from java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java rename to java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java index 722dd74..2eebc4c 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/ForkCC.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java @@ -1,216 +1,217 @@ -package org.softwareheritage.graph.benchmark; +package org.softwareheritage.graph.experiments.forks; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; +import org.softwareheritage.graph.benchmark.BFS; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; public class ForkCC { public Boolean includeRootDir; private Graph graph; private Long emptySnapshot; private LongArrayBitVector visited; private LongArrayBitVector whitelist; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; this.whitelist = null; this.visited = null; this.includeRootDir = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { SimpleJSAP jsap = new SimpleJSAP( ForkCC.class.getName(), "", new Parameter[] { new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', "graph", "Basename of the compressed graph"), new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', "whitelist", "Whitelist of origins"), new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 'w', "numthreads", "Number of threads"), new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R', "includerootdir", "Include root directory (default: false)"), } ); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private boolean nodeIsEmptySnapshot(Long node) { if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node){ Node.Type nt = this.graph.getNodeType(node); if (nt == Node.Type.CNT) { return false; } if (nt == Node.Type.DIR && !includeRootDir) return false; if (this.nodeIsEmptySnapshot(node)) return false; if (visited.getBoolean(node)) return false; return true; } private ArrayList> compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 visited = LongArrayBitVector.ofLength(n); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); ArrayList> components = new ArrayList<>(); for (long i = 0; i < n; i++) { if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue; ArrayList component = new ArrayList<>(); queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); Node.Type cur_nt = this.graph.getNodeType(currentNode); if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) { // TODO: add a check that the origin has >=1 non-empty snapshot component.add(currentNode); } final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!shouldVisit(succ)) continue; if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } if (component.size() > 0) { components.add(component); } } pl.done(); queue.close(); return components; } private static void printDistribution(ArrayList> components) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { System.out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { System.out.println(node); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner = null; try { scanner = new Scanner(new File(path)); while(scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); int numThreads = config.getInt("numThreads"); String whitelistPath = config.getString("whitelistPath"); boolean includeRootDir = config.getBoolean("includeRootDir"); ForkCC forkCc = new ForkCC(); try { forkCc.load_graph(graphPath); forkCc.includeRootDir = includeRootDir; } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCc.parseWhitelist(whitelistPath); } ProgressLogger logger = new ProgressLogger(); try { ArrayList> components = forkCc.compute(logger); printDistribution(components); // printLargestComponent(components); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/backend/MapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java similarity index 98% rename from java/src/main/java/org/softwareheritage/graph/backend/MapBuilder.java rename to java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java index 1f57665..4bb6506 100644 --- a/java/src/main/java/org/softwareheritage/graph/backend/MapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/MapBuilder.java @@ -1,217 +1,215 @@ -package org.softwareheritage.graph.backend; +package org.softwareheritage.graph.maps; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.Scanner; import java.util.concurrent.*; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; -import it.unimi.dsi.fastutil.objects.ObjectBigArrays; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.backend.NodeTypesMap; /** * Create maps needed at runtime by the graph service, in particular: * * - SWH PID → WebGraph long node id * - WebGraph long node id → SWH PID (converse of the former) * - WebGraph long node id → SWH node type (enum) * * @author The Software Heritage developers */ public class MapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(MapBuilder.class); /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { if (args.length != 2) { logger.error("Usage: COMPRESSED_GRAPH_BASE_NAME TEMP_DIR < NODES_CSV"); System.exit(1); } String graphPath = args[0]; String tmpDir = args[1]; logger.info("starting maps generation..."); precomputeNodeIdMap(graphPath, tmpDir); logger.info("maps generation completed"); } /** * Computes and dumps on disk mapping files. * * @param graphPath path of the compressed graph */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") static void precomputeNodeIdMap(String graphPath, String tmpDir) throws IOException { ProgressLogger plPid2Node = new ProgressLogger(logger, 10, TimeUnit.SECONDS); ProgressLogger plNode2Pid = new ProgressLogger(logger, 10, TimeUnit.SECONDS); plPid2Node.itemsName = "pid→node"; plNode2Pid.itemsName = "node→pid"; // avg speed for pid→node is sometime skewed due to write to the sort // pipe hanging when sort is sorting; hence also desplay local speed plPid2Node.displayLocalSpeed = true; // first half of PID->node mapping: PID -> WebGraph MPH (long) Object2LongFunction mphMap = null; try { logger.info("loading MPH function..."); mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); logger.info("MPH function loaded"); } catch (ClassNotFoundException e) { logger.error("unknown class object in .mph file: " + e); System.exit(2); } long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); plPid2Node.expectedUpdates = nbIds; plNode2Pid.expectedUpdates = nbIds; // second half of PID->node mapping: WebGraph MPH (long) -> BFS order (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); logger.info("loading BFS order file..."); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); logger.info("BFS order file loaded"); if (loaded != nbIds) { logger.error("graph contains " + nbIds + " nodes, but read " + loaded); System.exit(2); } // Create mapping SWH PID -> WebGraph node id, by sequentially reading // nodes, hashing them with MPH, and permuting according to BFS order FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); LineIterator swhPIDIterator = new LineIterator(buffer); // The WebGraph node id -> SWH PID mapping can be obtained from the // PID->node one by numerically sorting on node id and sequentially // writing obtained PIDs to a binary map. Delegates the sorting job to // /usr/bin/sort via pipes ProcessBuilder processBuilder = new ProcessBuilder(); processBuilder.command("sort", "--numeric-sort", "--key", "2", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir); Process sort = processBuilder.start(); BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream()); BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream()); // for the binary format of pidToNodeMap, see Python module swh.graph.pid:PidToIntMap // for the binary format of nodeToPidMap, see Python module swh.graph.pid:IntToPidMap try (DataOutputStream pidToNodeMap = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(graphPath + Graph.PID_TO_NODE))); BufferedOutputStream nodeToPidMap = new BufferedOutputStream(new FileOutputStream(graphPath + Graph.NODE_TO_PID))) { // background handler for sort output, it will be fed PID/node // pairs while pidToNodeMap is being filled, and will itself fill // nodeToPidMap as soon as data from sort is ready SortOutputHandler outputHandler = new SortOutputHandler(sort_stdout, nodeToPidMap, plNode2Pid); outputHandler.start(); // Type map from WebGraph node ID to SWH type. Used at runtime by // pure Java graph traversals to efficiently check edge // restrictions. final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); final int nbBitsPerNodeType = log2NbTypes; LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); plPid2Node.start("filling pid2node map"); for (long iNode = 0; iNode < nbIds && swhPIDIterator.hasNext(); iNode++) { String strSwhPID = swhPIDIterator.next().toString(); SwhPID swhPID = new SwhPID(strSwhPID); byte[] swhPIDBin = swhPID.toBytes(); long mphId = mphMap.getLong(strSwhPID); long nodeId = BigArrays.get(bfsMap, mphId); pidToNodeMap.write(swhPIDBin, 0, swhPIDBin.length); pidToNodeMap.writeLong(nodeId); sort_stdin.write((strSwhPID + "\t" + nodeId + "\n") .getBytes(StandardCharsets.US_ASCII)); nodeTypesMap.set(nodeId, swhPID.getType().ordinal()); plPid2Node.lightUpdate(); } plPid2Node.done(); sort_stdin.close(); // write type map logger.info("storing type map"); BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); logger.info("type map stored"); // wait for nodeToPidMap filling try { logger.info("waiting for node2pid map..."); int sortExitCode = sort.waitFor(); if (sortExitCode != 0) { logger.error("sort returned non-zero exit code: " + sortExitCode); System.exit(2); } outputHandler.join(); } catch (InterruptedException e) { logger.error("processing of sort output failed with: " + e); System.exit(2); } } } private static class SortOutputHandler extends Thread { private Scanner input; private OutputStream output; private ProgressLogger pl; SortOutputHandler(InputStream input, OutputStream output, ProgressLogger pl) { this.input = new Scanner(input, StandardCharsets.US_ASCII); this.output = output; this.pl = pl; } public void run() { boolean sortDone = false; logger.info("node2pid: waiting for sort output..."); while (input.hasNextLine()) { if (! sortDone) { sortDone = true; this.pl.start("filling node2pid map"); } String line = input.nextLine(); // format: SWH_PID NODE_ID SwhPID swhPID = new SwhPID(line.split("\\t")[0]); // get PID try { output.write((byte[]) swhPID.toBytes()); } catch (IOException e) { logger.error("writing to node->PID map failed with: " + e); } this.pl.lightUpdate(); } this.pl.done(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/backend/MapFile.java b/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java similarity index 97% rename from java/src/main/java/org/softwareheritage/graph/backend/MapFile.java rename to java/src/main/java/org/softwareheritage/graph/maps/MapFile.java index a9a8b67..4c16130 100644 --- a/java/src/main/java/org/softwareheritage/graph/backend/MapFile.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/MapFile.java @@ -1,62 +1,62 @@ -package org.softwareheritage.graph.backend; +package org.softwareheritage.graph.maps; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import it.unimi.dsi.io.ByteBufferInputStream; /** * Wrapper class around very big mmap()-ed file. *

* Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this * Java limit. * * @author The Software Heritage developers */ public class MapFile { /** Memory-mapped file buffer */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; /** * Constructor. * * @param path file path to mmap() * @param lineLength fixed length of a line in the file */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); } } /** * Returns a specific line in the file. * * @param lineIndex line number in the file * @return the line at the specified position */ public byte[] readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); return buffer; } /** * Closes the mmap()-ed file. */ public void close() throws IOException { bufferMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java similarity index 96% rename from java/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java rename to java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java index e86b5ab..fd21f50 100644 --- a/java/src/main/java/org/softwareheritage/graph/backend/NodeIdMap.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java @@ -1,115 +1,114 @@ -package org.softwareheritage.graph.backend; +package org.softwareheritage.graph.maps; import java.io.IOException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.backend.MapFile; /** * Mapping between internal long node id and external SWH PID. * * Mappings in both directions are pre-computed and dumped on disk in the * {@link MapBuilder} class, then they are loaded here using mmap(). * * @author The Software Heritage developers - * @see org.softwareheritage.graph.backend.MapBuilder + * @see org.softwareheritage.graph.maps.MapBuilder */ public class NodeIdMap { /** Fixed length of full SWH PID */ public static final int SWH_ID_LENGTH = 50; /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; /** Fixed length of binary SWH PID buffer */ public static final int SWH_ID_BIN_SIZE = 22; /** Fixed length of binary node id buffer */ public static final int NODE_ID_BIN_SIZE = 8; /** Graph path and basename */ String graphPath; /** Number of ids to map */ long nbIds; /** mmap()-ed PID_TO_NODE file */ MapFile swhToNodeMap; /** mmap()-ed NODE_TO_PID file */ MapFile nodeToSwhMap; /** * Constructor. * * @param graphPath full graph path * @param nbNodes number of nodes in the graph */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, SWH_ID_BIN_SIZE + NODE_ID_BIN_SIZE); this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, SWH_ID_BIN_SIZE); } /** * Converts SWH PID to corresponding long node id. * * @param swhPID node represented as a {@link SwhPID} * @return corresponding node as a long id * @see org.softwareheritage.graph.SwhPID */ public long getNodeId(SwhPID swhPID) { // The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding // nodeId long start = 0; long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; byte[] buffer = swhToNodeMap.readAtLine(lineNumber); byte[] pidBuffer = new byte[SWH_ID_BIN_SIZE]; byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE]; System.arraycopy(buffer, 0, pidBuffer, 0, SWH_ID_BIN_SIZE); System.arraycopy(buffer, SWH_ID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE); String currentSwhPID = SwhPID.fromBytes(pidBuffer).getSwhPID(); long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong(); int cmp = currentSwhPID.compareTo(swhPID.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); } /** * Converts a node long id to corresponding SWH PID. * * @param nodeId node as a long id * @return corresponding node as a {@link SwhPID} * @see org.softwareheritage.graph.SwhPID */ public SwhPID getSwhPID(long nodeId) { // Each line in NODE_TO_PID is formatted as: swhPID // The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the // nodeId-th line to get corresponding swhPID if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } return SwhPID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); } /** * Closes the mapping files. */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java similarity index 97% rename from java/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java rename to java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java index 0b1bf09..fccfa18 100644 --- a/java/src/main/java/org/softwareheritage/graph/backend/NodeTypesMap.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java @@ -1,55 +1,55 @@ -package org.softwareheritage.graph.backend; +package org.softwareheritage.graph.maps; import java.io.IOException; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; /** * Mapping between long node id and SWH node type as described in the data * model. * * The type mapping is pre-computed and dumped on disk in the {@link MapBuilder} * class, then it is loaded in-memory here using * fastutil LongBigList. To be * space-efficient, the mapping is stored as a bitmap using minimum number of * bits per {@link Node.Type}. * * @author The Software Heritage developers */ public class NodeTypesMap { /** * Array storing for each node its type */ public LongBigList nodeTypesMap; /** * Constructor. * * @param graphPath path and basename of the compressed graph */ public NodeTypesMap(String graphPath) throws IOException { try { nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Unknown class object: " + e); } } /** * Returns node type from a node long id. * * @param nodeId node as a long id * @return corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType(long nodeId) { long type = nodeTypesMap.getLong(nodeId); return Node.Type.fromInt((int) type); } } diff --git a/java/src/main/java/org/softwareheritage/graph/App.java b/java/src/main/java/org/softwareheritage/graph/server/App.java similarity index 98% rename from java/src/main/java/org/softwareheritage/graph/App.java rename to java/src/main/java/org/softwareheritage/graph/server/App.java index dabca46..c9edc14 100644 --- a/java/src/main/java/org/softwareheritage/graph/App.java +++ b/java/src/main/java/org/softwareheritage/graph/server/App.java @@ -1,195 +1,193 @@ -package org.softwareheritage.graph; +package org.softwareheritage.graph.server; import java.io.IOException; import java.util.List; import java.util.Map; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.FlaggedOption; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.Switch; import com.martiansoftware.jsap.UnflaggedOption; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; - -import org.softwareheritage.graph.Endpoint; import org.softwareheritage.graph.Graph; +import org.softwareheritage.graph.Stats; import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.algo.Stats; /** * Web framework of the swh-graph server REST API. * * @author The Software Heritage developers */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[] { new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new Switch("timings", 't', "timings", "Show timings in API result metadata."), }); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SwhPID src = new SwhPID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the REST API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the REST API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); return outputNoTimings; } } } diff --git a/java/src/main/java/org/softwareheritage/graph/Endpoint.java b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java similarity index 94% rename from java/src/main/java/org/softwareheritage/graph/Endpoint.java rename to java/src/main/java/org/softwareheritage/graph/server/Endpoint.java index 7e66d62..c1e3268 100644 --- a/java/src/main/java/org/softwareheritage/graph/Endpoint.java +++ b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java @@ -1,311 +1,308 @@ -package org.softwareheritage.graph; +package org.softwareheritage.graph.server; import java.util.ArrayList; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.SwhPath; -import org.softwareheritage.graph.algo.Traversal; +import org.softwareheritage.graph.*; import org.softwareheritage.graph.benchmark.utils.Timing; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author The Software Heritage developers - * @see org.softwareheritage.graph.algo.Traversal + * @see Traversal */ public class Endpoint { /** * Wrapper class to unify traversal methods input signatures. */ public static class Input { /** Source node of endpoint call specified as a {@link SwhPID} */ public SwhPID src; /** * Destination formatted string as described in the API */ public String dstFmt; /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public String algorithm; public Input(SwhPID src) { this.src = src; } public Input(SwhPID src, String dstFmt, String algorithm) { this.src = src; this.dstFmt = dstFmt; this.algorithm = algorithm; } } /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public static class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; /** Number of edges accessed during traversal */ public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWH PID to node id */ public double pid2node; /** Time in seconds to convert output node ids to SWH PIDs */ public double node2pid; } } } /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation * @param edgesFmt a formatted string describing allowed edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWH PIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWH PIDs */ private ArrayList convertNodesToSwhPIDs(ArrayList nodeIds) { ArrayList swhPIDs = new ArrayList<>(); for (long nodeId : nodeIds) { swhPIDs.add(graph.getSwhPID(nodeId)); } return swhPIDs; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSwhPID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSwhPIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#leaves(long) + * @see Traversal#leaves(long) */ public Output leaves(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#neighbors(long) + * @see Traversal#neighbors(long) */ public Output neighbors(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath - * @see org.softwareheritage.graph.algo.Traversal#walk + * @see Traversal#walk */ public Output walk(Input input) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWH PID or a node type try { SwhPID dstSwhPID = new SwhPID(input.dstFmt); long dstNodeId = graph.getNodeId(dstSwhPID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(input.dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPID} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID - * @see org.softwareheritage.graph.algo.Traversal#visitNodes(long) + * @see Traversal#visitNodes(long) */ public Output visitNodes(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPIDs(nodeIds); output.meta.timings.node2pid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see org.softwareheritage.graph.SwhPID * @see org.softwareheritage.graph.SwhPath - * @see org.softwareheritage.graph.algo.Traversal#visitPaths(long) + * @see Traversal#visitPaths(long) */ public Output visitPaths(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.pid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSwhPIDs(paths); output.meta.timings.node2pid = Timing.stop(startTime); return output; } } diff --git a/java/src/test/java/org/softwareheritage/graph/GraphTest.java b/java/src/test/java/org/softwareheritage/graph/GraphTest.java index 7962f61..43f207d 100644 --- a/java/src/test/java/org/softwareheritage/graph/GraphTest.java +++ b/java/src/test/java/org/softwareheritage/graph/GraphTest.java @@ -1,30 +1,30 @@ package org.softwareheritage.graph; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; import org.junit.Assert; import org.junit.BeforeClass; import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder; import org.softwareheritage.graph.Graph; public class GraphTest { static Graph graph; public static void assertEqualsAnyOrder(Collection expecteds, Collection actuals) { Assert.assertThat(expecteds, containsInAnyOrder(actuals.toArray())); } @BeforeClass public static void setUp() throws IOException { - Path graphPath = Paths.get("..", "..", "swh", "graph", "tests", "dataset", "output", "example"); + Path graphPath = Paths.get("..", "swh", "graph", "tests", "dataset", "output", "example"); graph = new Graph(graphPath.toString()); } public Graph getGraph() { return graph; } } diff --git a/java/src/test/java/org/softwareheritage/graph/LeavesTest.java b/java/src/test/java/org/softwareheritage/graph/LeavesTest.java index 44792e1..e74774e 100644 --- a/java/src/test/java/org/softwareheritage/graph/LeavesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/LeavesTest.java @@ -1,111 +1,108 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.junit.Test; -import org.softwareheritage.graph.Endpoint; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.GraphTest; -import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class LeavesTest extends GraphTest { @Test public void forwardFromSnp() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void forwardFromRel() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000015")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000014")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000011")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardFromLeaf() { Graph graph = getGraph(); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); SwhPID src1 = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000015"); ArrayList expectedLeaves1 = new ArrayList<>(); expectedLeaves1.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000019")); ArrayList actualLeaves1 = (ArrayList) endpoint1.leaves(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves1, actualLeaves1); Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); SwhPID src2 = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004"); ArrayList expectedLeaves2 = new ArrayList<>(); expectedLeaves2.add(new SwhPID("swh:1:ori:0000000000000000000000000000000000000021")); expectedLeaves2.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000019")); ArrayList actualLeaves2 = (ArrayList) endpoint2.leaves(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves2, actualLeaves2); } @Test public void forwardRevToRevOnly() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:rev:0000000000000000000000000000000000000018"); Endpoint endpoint = new Endpoint(graph, "forward", "rev:rev"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000003")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void forwardDirToAll() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint = new Endpoint(graph, "forward", "dir:*"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedLeaves.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } @Test public void backwardCntToDirDirToDir() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000005"); Endpoint endpoint = new Endpoint(graph, "backward", "cnt:dir,dir:dir"); ArrayList expectedLeaves = new ArrayList<>(); expectedLeaves.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000012")); ArrayList actualLeaves = (ArrayList) endpoint.leaves(new Endpoint.Input(src)).result; GraphTest.assertEqualsAnyOrder(expectedLeaves, actualLeaves); } } diff --git a/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java b/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java index 98336a9..861c43c 100644 --- a/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java +++ b/java/src/test/java/org/softwareheritage/graph/NeighborsTest.java @@ -1,145 +1,142 @@ package org.softwareheritage.graph; import java.util.ArrayList; import org.junit.Test; -import org.softwareheritage.graph.Endpoint; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.GraphTest; -import org.softwareheritage.graph.SwhPID; +import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class NeighborsTest extends GraphTest { @Test public void zeroNeighbor() { Graph graph = getGraph(); ArrayList expectedNodes = new ArrayList<>(); SwhPID src1 = new SwhPID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals1); SwhPID src2 = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals2); SwhPID src3 = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000015"); Endpoint endpoint3 = new Endpoint(graph, "forward", "*"); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals3); SwhPID src4 = new SwhPID("swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint4 = new Endpoint(graph, "backward", "*"); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals4); SwhPID src5 = new SwhPID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint5 = new Endpoint(graph, "forward", "snp:*,rev:*,rel:*"); ArrayList actuals5 = (ArrayList) endpoint5.neighbors(new Endpoint.Input(src5)).result; GraphTest.assertEqualsAnyOrder(expectedNodes, actuals5); } @Test public void oneNeighbor() { Graph graph = getGraph(); SwhPID src1 = new SwhPID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000002")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SwhPID src2 = new SwhPID("swh:1:dir:0000000000000000000000000000000000000017"); Endpoint endpoint2 = new Endpoint(graph, "forward", "dir:cnt"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000014")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); SwhPID src3 = new SwhPID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint3 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes3 = new ArrayList<>(); expectedNodes3.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes3, actuals3); SwhPID src4 = new SwhPID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint4 = new Endpoint(graph, "backward", "rev:rev"); ArrayList expectedNodes4 = new ArrayList<>(); expectedNodes4.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes4, actuals4); SwhPID src5 = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint5 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes5 = new ArrayList<>(); expectedNodes5.add(new SwhPID("swh:1:ori:0000000000000000000000000000000000000021")); ArrayList actuals5 = (ArrayList) endpoint5.neighbors(new Endpoint.Input(src5)).result; GraphTest.assertEqualsAnyOrder(expectedNodes5, actuals5); } @Test public void twoNeighbors() { Graph graph = getGraph(); SwhPID src1 = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes1.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000009")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SwhPID src2 = new SwhPID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint2 = new Endpoint(graph, "forward", "dir:cnt"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes2.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); SwhPID src3 = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001"); Endpoint endpoint3 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes3 = new ArrayList<>(); expectedNodes3.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000008")); expectedNodes3.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000002")); ArrayList actuals3 = (ArrayList) endpoint3.neighbors(new Endpoint.Input(src3)).result; GraphTest.assertEqualsAnyOrder(expectedNodes3, actuals3); SwhPID src4 = new SwhPID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint4 = new Endpoint(graph, "backward", "rev:snp,rev:rel"); ArrayList expectedNodes4 = new ArrayList<>(); expectedNodes4.add(new SwhPID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes4.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000010")); ArrayList actuals4 = (ArrayList) endpoint4.neighbors(new Endpoint.Input(src4)).result; GraphTest.assertEqualsAnyOrder(expectedNodes4, actuals4); } @Test public void threeNeighbors() { Graph graph = getGraph(); SwhPID src1 = new SwhPID("swh:1:dir:0000000000000000000000000000000000000008"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList expectedNodes1 = new ArrayList<>(); expectedNodes1.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000006")); expectedNodes1.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes1.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); ArrayList actuals1 = (ArrayList) endpoint1.neighbors(new Endpoint.Input(src1)).result; GraphTest.assertEqualsAnyOrder(expectedNodes1, actuals1); SwhPID src2 = new SwhPID("swh:1:rev:0000000000000000000000000000000000000009"); Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList expectedNodes2 = new ArrayList<>(); expectedNodes2.add(new SwhPID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes2.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes2.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000013")); ArrayList actuals2 = (ArrayList) endpoint2.neighbors(new Endpoint.Input(src2)).result; GraphTest.assertEqualsAnyOrder(expectedNodes2, actuals2); } } diff --git a/java/src/test/java/org/softwareheritage/graph/VisitTest.java b/java/src/test/java/org/softwareheritage/graph/VisitTest.java index 3fde78c..9ac4008 100644 --- a/java/src/test/java/org/softwareheritage/graph/VisitTest.java +++ b/java/src/test/java/org/softwareheritage/graph/VisitTest.java @@ -1,547 +1,543 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Set; import java.util.HashSet; import org.junit.Test; -import org.softwareheritage.graph.Endpoint; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.GraphTest; -import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.SwhPath; +import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class VisitTest extends GraphTest { private void assertSameNodesFromPaths(ArrayList paths, ArrayList nodes) { Set expectedNodes = new HashSet(); for (SwhPath path : paths) { for (SwhPID node : path.getPath()) { expectedNodes.add(node); } } GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void forwardFromRoot() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" )); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromMiddle() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" )); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" )); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" )); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:cnt:0000000000000000000000000000000000000011" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromLeaf() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000004" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromRoot() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:ori:0000000000000000000000000000000000000021" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromMiddle() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromLeaf() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019" )); expectedPaths.add( new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019" )); expectedPaths.add( new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:ori:0000000000000000000000000000000000000021" )); expectedPaths.add( new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:ori:0000000000000000000000000000000000000021" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToRev() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRelToRevRevToRev() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:rel:0000000000000000000000000000000000000010"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRevToAllDirToAll() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:rev:0000000000000000000000000000000000000013"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:cnt:0000000000000000000000000000000000000011" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToAllRevToAll() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002" )); expectedPaths.add( new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008" )); expectedPaths.add( new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardNoEdges() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", ""); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", ""); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardRevToRevRevToRel() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhPID)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedPaths = new ArrayList(); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019" )); expectedPaths.add( new SwhPath( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010" )); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromRootNodesOnly() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SwhPID("swh:1:ori:0000000000000000000000000000000000000021")); expectedNodes.add(new SwhPID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000002")); expectedNodes.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000008")); expectedNodes.add(new SwhPID("swh:1:dir:0000000000000000000000000000000000000006")); expectedNodes.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedNodes.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedNodes.add(new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void backwardRevToAllNodesOnly() { Graph graph = getGraph(); SwhPID swhPID = new SwhPID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint = new Endpoint(graph, "backward", "rev:*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhPID)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SwhPID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000013")); expectedNodes.add(new SwhPID("swh:1:rev:0000000000000000000000000000000000000018")); expectedNodes.add(new SwhPID("swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } } diff --git a/java/src/test/java/org/softwareheritage/graph/WalkTest.java b/java/src/test/java/org/softwareheritage/graph/WalkTest.java index 1daab45..54a3b3b 100644 --- a/java/src/test/java/org/softwareheritage/graph/WalkTest.java +++ b/java/src/test/java/org/softwareheritage/graph/WalkTest.java @@ -1,239 +1,234 @@ package org.softwareheritage.graph; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.junit.Assert; import org.junit.Test; -import org.softwareheritage.graph.Endpoint; -import org.softwareheritage.graph.Graph; -import org.softwareheritage.graph.GraphTest; -import org.softwareheritage.graph.SwhPID; -import org.softwareheritage.graph.SwhPath; +import org.softwareheritage.graph.server.Endpoint; public class WalkTest extends GraphTest { @Test public void forwardRootToLeaf() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:snp:0000000000000000000000000000000000000020"); String dstFmt = "swh:1:cnt:0000000000000000000000000000000000000005"; SwhPath solution1 = new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" ); SwhPath solution2 = new SwhPath( "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" ); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2); Assert.assertTrue(possibleSolutions.contains(dfsPath)); Assert.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardLeafToLeaf() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000007"); String dstFmt = "cnt"; SwhPath expectedPath = new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000007" ); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assert.assertEquals(dfsPath, expectedPath); Assert.assertEquals(bfsPath, expectedPath); } @Test public void forwardRevToRev() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:rev:0000000000000000000000000000000000000018"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000003"; SwhPath expectedPath = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003" ); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assert.assertEquals(dfsPath, expectedPath); Assert.assertEquals(bfsPath, expectedPath); } @Test public void backwardRevToRev() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:rev:0000000000000000000000000000000000000003"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000018"; SwhPath expectedPath = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018" ); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assert.assertEquals(dfsPath, expectedPath); Assert.assertEquals(bfsPath, expectedPath); } @Test public void backwardCntToFirstSnp() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:cnt:0000000000000000000000000000000000000001"); String dstFmt = "snp"; SwhPath solution1 = new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020" ); SwhPath solution2 = new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020" ); SwhPath solution3 = new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020" ); SwhPath solution4 = new SwhPath( "swh:1:cnt:0000000000000000000000000000000000000001", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020" ); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4); Assert.assertTrue(possibleSolutions.contains(dfsPath)); Assert.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardRevToFirstCnt() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:rev:0000000000000000000000000000000000000009"); String dstFmt = "cnt"; SwhPath solution1 = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000007" ); SwhPath solution2 = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000005" ); SwhPath solution3 = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:cnt:0000000000000000000000000000000000000004" ); SwhPath solution4 = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:cnt:0000000000000000000000000000000000000001" ); SwhPath solution5 = new SwhPath( "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", "swh:1:cnt:0000000000000000000000000000000000000001" ); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4, solution5); Assert.assertTrue(possibleSolutions.contains(dfsPath)); Assert.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void backwardDirToFirstRel() { Graph graph = getGraph(); SwhPID src = new SwhPID("swh:1:dir:0000000000000000000000000000000000000016"); String dstFmt = "rel"; SwhPath expectedPath = new SwhPath( "swh:1:dir:0000000000000000000000000000000000000016", "swh:1:dir:0000000000000000000000000000000000000017", "swh:1:rev:0000000000000000000000000000000000000018", "swh:1:rel:0000000000000000000000000000000000000019" ); Endpoint endpoint1 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assert.assertEquals(dfsPath, expectedPath); Assert.assertEquals(bfsPath, expectedPath); } } diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py index 254803a..946829b 100644 --- a/swh/graph/webgraph.py +++ b/swh/graph/webgraph.py @@ -1,271 +1,271 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """WebGraph driver """ import logging import os import subprocess from enum import Enum from datetime import datetime from pathlib import Path from typing import Dict, List, Set from click import ParamType from swh.graph.config import check_config_compress class CompressionStep(Enum): MPH = 1 BV = 2 BV_OBL = 3 BFS = 4 PERMUTE = 5 PERMUTE_OBL = 6 STATS = 7 TRANSPOSE = 8 TRANSPOSE_OBL = 9 MAPS = 10 CLEAN_TMP = 11 def __str__(self): return self.name # full compression pipeline COMP_SEQ = list(CompressionStep) # Mapping from compression steps to shell commands implementing them. Commands # will be executed by the shell, so be careful with meta characters. They are # specified here as lists of tokens that will be joined together only for ease # of line splitting. In commands, {tokens} will be interpolated with # configuration values, see :func:`compress`. STEP_ARGV: Dict[CompressionStep, List[str]] = { CompressionStep.MPH: [ "{java}", "it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction", "--temp-dir", "{tmp_dir}", "{out_dir}/{graph_name}.mph", "<( zstdcat {in_dir}/{graph_name}.nodes.csv.zst )", ], # use process substitution (and hence FIFO) above as MPH class load the # entire file in memory when reading from stdin CompressionStep.BV: [ "zstdcat", "{in_dir}/{graph_name}.edges.csv.zst", "|", "{java}", "it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph", "--temp-dir", "{tmp_dir}", "--function", "{out_dir}/{graph_name}.mph", "{out_dir}/{graph_name}-bv", ], CompressionStep.BV_OBL: [ "{java}", "it.unimi.dsi.big.webgraph.BVGraph", "--list", "{out_dir}/{graph_name}-bv", ], CompressionStep.BFS: [ "{java}", "it.unimi.dsi.law.big.graph.BFS", "{out_dir}/{graph_name}-bv", "{out_dir}/{graph_name}.order", ], CompressionStep.PERMUTE: [ "{java}", "it.unimi.dsi.big.webgraph.Transform", "mapOffline", "{out_dir}/{graph_name}-bv", "{out_dir}/{graph_name}", "{out_dir}/{graph_name}.order", "{batch_size}", "{tmp_dir}", ], CompressionStep.PERMUTE_OBL: [ "{java}", "it.unimi.dsi.big.webgraph.BVGraph", "--list", "{out_dir}/{graph_name}", ], CompressionStep.STATS: [ "{java}", "it.unimi.dsi.big.webgraph.Stats", "{out_dir}/{graph_name}", ], CompressionStep.TRANSPOSE: [ "{java}", "it.unimi.dsi.big.webgraph.Transform", "transposeOffline", "{out_dir}/{graph_name}", "{out_dir}/{graph_name}-transposed", "{batch_size}", "{tmp_dir}", ], CompressionStep.TRANSPOSE_OBL: [ "{java}", "it.unimi.dsi.big.webgraph.BVGraph", "--list", "{out_dir}/{graph_name}-transposed", ], CompressionStep.MAPS: [ "zstdcat", "{in_dir}/{graph_name}.nodes.csv.zst", "|", "{java}", - "org.softwareheritage.graph.backend.MapBuilder", + "org.softwareheritage.graph.maps.MapBuilder", "{out_dir}/{graph_name}", "{tmp_dir}", ], CompressionStep.CLEAN_TMP: [ "rm", "-rf", "{out_dir}/{graph_name}-bv.graph", "{out_dir}/{graph_name}-bv.obl", "{out_dir}/{graph_name}-bv.offsets", "{tmp_dir}", ], } class StepOption(ParamType): """click type for specifying a compression step on the CLI parse either individual steps, specified as step names or integers, or step ranges """ name = "compression step" def convert(self, value, param, ctx) -> Set[CompressionStep]: steps: Set[CompressionStep] = set() specs = value.split(",") for spec in specs: if "-" in spec: # step range (raw_l, raw_r) = spec.split("-", maxsplit=1) if raw_l == "": # no left endpoint raw_l = COMP_SEQ[0].name if raw_r == "": # no right endpoint raw_r = COMP_SEQ[-1].name l_step = self.convert(raw_l, param, ctx) r_step = self.convert(raw_r, param, ctx) if len(l_step) != 1 or len(r_step) != 1: self.fail(f"invalid step specification: {value}, " f"see --help") l_idx = l_step.pop() r_idx = r_step.pop() steps = steps.union( set(map(CompressionStep, range(l_idx.value, r_idx.value + 1))) ) else: # singleton step try: steps.add(CompressionStep(int(spec))) # integer step except ValueError: try: steps.add(CompressionStep[spec.upper()]) # step name except KeyError: self.fail( f"invalid step specification: {value}, " f"see --help" ) return steps def do_step(step, conf): cmd = " ".join(STEP_ARGV[step]).format(**conf) cmd_env = os.environ.copy() cmd_env["JAVA_TOOL_OPTIONS"] = conf["java_tool_options"] cmd_env["CLASSPATH"] = conf["classpath"] logging.info(f"running: {cmd}") process = subprocess.Popen( ["/bin/bash", "-c", cmd], env=cmd_env, encoding="utf8", stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) with process.stdout as stdout: for line in stdout: logging.info(line.rstrip()) rc = process.wait() if rc != 0: raise RuntimeError( f"compression step {step} returned non-zero " f"exit code {rc}" ) else: return rc def compress( graph_name: str, in_dir: Path, out_dir: Path, steps: Set[CompressionStep] = set(COMP_SEQ), conf: Dict[str, str] = {}, ): """graph compression pipeline driver from nodes/edges files to compressed on-disk representation Args: graph_name: graph base name, relative to in_dir in_dir: input directory, where the uncompressed graph can be found out_dir: output directory, where the compressed graph will be stored steps: compression steps to run (default: all steps) conf: compression configuration, supporting the following keys (all are optional, so an empty configuration is fine and is the default) - batch_size: batch size for `WebGraph transformations `_; defaults to 1 billion - classpath: java classpath, defaults to swh-graph JAR only - java: command to run java VM, defaults to "java" - java_tool_options: value for JAVA_TOOL_OPTIONS environment variable; defaults to various settings for high memory machines - logback: path to a logback.xml configuration file; if not provided a temporary one will be created and used - max_ram: maximum RAM to use for compression; defaults to available virtual memory - tmp_dir: temporary directory, defaults to the "tmp" subdir of out_dir """ if not steps: steps = set(COMP_SEQ) conf = check_config_compress(conf, graph_name, in_dir, out_dir) compression_start_time = datetime.now() logging.info(f"starting compression at {compression_start_time}") seq_no = 0 for step in COMP_SEQ: if step not in steps: logging.debug(f"skipping compression step {step}") continue seq_no += 1 step_start_time = datetime.now() logging.info( f"starting compression step {step} " f"({seq_no}/{len(steps)}) at {step_start_time}" ) do_step(step, conf) step_end_time = datetime.now() step_duration = step_end_time - step_start_time logging.info( f"completed compression step {step} " f"({seq_no}/{len(steps)}) " f"at {step_end_time} in {step_duration}" ) compression_end_time = datetime.now() compression_duration = compression_end_time - compression_start_time logging.info(f"completed compression in {compression_duration}")