diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 508c1cb..7a64a7f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,50 +1,59 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v2.4.0 hooks: - id: trailing-whitespace - id: check-json - id: check-yaml - repo: https://gitlab.com/pycqa/flake8 rev: 3.8.3 hooks: - id: flake8 - repo: https://github.com/codespell-project/codespell rev: v1.16.0 hooks: - id: codespell args: ["-L te,wth,alledges"] - repo: local hooks: - id: mypy name: mypy entry: mypy args: [swh] pass_filenames: false language: system types: [python] - repo: https://github.com/PyCQA/isort rev: 5.5.2 hooks: - id: isort - repo: https://github.com/python/black rev: 19.10b0 hooks: - id: black +- repo: local + hooks: + - id: java-coding-style + name: java style + entry: mvn + args: ["-f", "java/pom.xml", "spotless:apply"] + pass_filenames: false + language: system + # unfortunately, we are far from being able to enable this... # - repo: https://github.com/PyCQA/pydocstyle.git # rev: 4.0.0 # hooks: # - id: pydocstyle # name: pydocstyle # description: pydocstyle is a static analysis tool for checking compliance with Python docstring conventions. # entry: pydocstyle --convention=google # language: python # types: [python] diff --git a/java/.clang-format b/java/.clang-format deleted file mode 100644 index 8985e96..0000000 --- a/java/.clang-format +++ /dev/null @@ -1,4 +0,0 @@ ---- -BasedOnStyle: Google ---- -Language: Java diff --git a/java/.coding-style.xml b/java/.coding-style.xml new file mode 100644 index 0000000..70c0422 --- /dev/null +++ b/java/.coding-style.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/java/pom.xml b/java/pom.xml index bed537c..ad741e5 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -1,235 +1,264 @@ 4.0.0 org.softwareheritage.graph swh-graph ${git.closest.tag.name} swh-graph https://forge.softwareheritage.org/source/swh-graph/ UTF-8 11 ch.qos.logback logback-classic 1.2.3 org.junit.jupiter junit-jupiter-api 5.7.0 test org.junit.jupiter junit-jupiter-engine 5.7.0 test org.hamcrest hamcrest 2.1 test io.javalin javalin 3.0.0 org.slf4j slf4j-simple 1.7.26 com.fasterxml.jackson.core jackson-databind 2.9.8 it.unimi.dsi webgraph-big 3.6.0 it.unimi.dsi fastutil 8.4.1 it.unimi.dsi law 2.6.2 org.apache.hadoop hadoop-common org.umlgraph umlgraph org.eclipse.jetty.aggregate jetty-all it.unimi.di mg4j it.unimi.di mg4j-big com.martiansoftware jsap 2.1 net.sf.py4j py4j 0.10.8.1 commons-codec commons-codec 1.11 maven-clean-plugin 3.1.0 maven-resources-plugin 3.0.2 maven-compiler-plugin 3.8.0 11 11 -verbose -Xlint:all maven-surefire-plugin 2.22.2 maven-failsafe-plugin 2.22.2 maven-jar-plugin 3.0.2 maven-install-plugin 2.5.2 maven-deploy-plugin 2.8.2 maven-site-plugin 3.7.1 maven-project-info-reports-plugin 3.0.0 maven-assembly-plugin 3.3.0 org.softwareheritage.graph.server.App jar-with-dependencies false make-assembly package single - + + + + com.diffplug.spotless + spotless-maven-plugin + 2.4.1 + + + + + *.md + .gitignore + + + + + true + 4 + + + + + + + 4.16.0 + .coding-style.xml + + + + pl.project13.maven git-commit-id-plugin 3.0.1 get-the-git-infos revision initialize true true true true v* git.closest.tag.name ^v true org.apache.maven.plugins maven-javadoc-plugin 3.1.1 diff --git a/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java b/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java index 71f1196..737148a 100644 --- a/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java +++ b/java/src/main/java/org/softwareheritage/graph/AllowedEdges.java @@ -1,73 +1,74 @@ package org.softwareheritage.graph; import java.util.ArrayList; /** * Edge restriction based on node types, used when visiting the graph. *

* Software Heritage * graph contains multiple node types (contents, directories, revisions, ...) and restricting - * the traversal to specific node types is necessary for many querying operations: use cases. + * the traversal to specific node types is necessary for many querying operations: + * use cases. * * @author The Software Heritage developers */ public class AllowedEdges { /** * 2D boolean matrix storing access rights for all combination of src/dst node types (first - * dimension is source, second dimension is destination), when edge restriction is not enforced - * this array is set to null for early bypass. + * dimension is source, second dimension is destination), when edge restriction is not enforced this + * array is set to null for early bypass. */ public boolean[][] restrictedTo; /** * Constructor. * - * @param edgesFmt a formatted string describing allowed edges + * @param edgesFmt a formatted string describing allowed + * edges */ public AllowedEdges(String edgesFmt) { int nbNodeTypes = Node.Type.values().length; this.restrictedTo = new boolean[nbNodeTypes][nbNodeTypes]; // Special values (null, empty, "*") if (edgesFmt == null || edgesFmt.isEmpty()) { return; } if (edgesFmt.equals("*")) { // Allows for quick bypass (with simple null check) when no edge restriction restrictedTo = null; return; } // Format: "src1:dst1,src2:dst2,[...]" String[] edgeTypes = edgesFmt.split(","); for (String edgeType : edgeTypes) { String[] nodeTypes = edgeType.split(":"); if (nodeTypes.length != 2) { throw new IllegalArgumentException("Cannot parse edge type: " + edgeType); } ArrayList srcTypes = Node.Type.parse(nodeTypes[0]); ArrayList dstTypes = Node.Type.parse(nodeTypes[1]); for (Node.Type srcType : srcTypes) { for (Node.Type dstType : dstTypes) { restrictedTo[srcType.ordinal()][dstType.ordinal()] = true; } } } } /** * Checks if a given edge can be followed during graph traversal. * * @param srcType edge source type * @param dstType edge destination type * @return true if allowed and false otherwise */ public boolean isAllowed(Node.Type srcType, Node.Type dstType) { if (restrictedTo == null) return true; return restrictedTo[srcType.ordinal()][dstType.ordinal()]; } } diff --git a/java/src/main/java/org/softwareheritage/graph/Entry.java b/java/src/main/java/org/softwareheritage/graph/Entry.java index efd4915..75bb4f2 100644 --- a/java/src/main/java/org/softwareheritage/graph/Entry.java +++ b/java/src/main/java/org/softwareheritage/graph/Entry.java @@ -1,193 +1,188 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; public class Entry { private final long PATH_SEPARATOR_ID = -1; private Graph graph; public void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); } public Graph get_graph() { return graph.copy(); } public String stats() { try { Stats stats = new Stats(graph.getPath()); ObjectMapper objectMapper = new ObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); return objectMapper.writeValueAsString(stats); } catch (IOException e) { throw new RuntimeException("Cannot read stats: " + e); } } private int count_visitor(NodeCountVisitor f, long srcNodeId) { int[] count = {0}; f.accept(srcNodeId, (node) -> { count[0]++; }); return count[0]; } public int count_leaves(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::leavesVisitor, srcNodeId); } public int count_neighbors(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::neighborsVisitor, srcNodeId); } public int count_visit_nodes(String direction, String edgesFmt, long srcNodeId) { Traversal t = new Traversal(this.graph.copy(), direction, edgesFmt); return count_visitor(t::visitNodesVisitor, srcNodeId); } public QueryHandler get_handler(String clientFIFO) { return new QueryHandler(this.graph.copy(), clientFIFO); } private interface NodeCountVisitor { void accept(long nodeId, Traversal.NodeIdConsumer consumer); } public class QueryHandler { Graph graph; DataOutputStream out; String clientFIFO; public QueryHandler(Graph graph, String clientFIFO) { this.graph = graph; this.clientFIFO = clientFIFO; this.out = null; } public void writeNode(long nodeId) { try { out.writeLong(nodeId); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void writeEdge(long srcId, long dstId) { writeNode(srcId); writeNode(dstId); } public void writePath(ArrayList path) { for (Long nodeId : path) { writeNode(nodeId); } writeNode(PATH_SEPARATOR_ID); } public void open() { try { FileOutputStream file = new FileOutputStream(this.clientFIFO); this.out = new DataOutputStream(file); } catch (IOException e) { throw new RuntimeException("Cannot open client FIFO: " + e); } } public void close() { try { out.close(); } catch (IOException e) { throw new RuntimeException("Cannot write response to client: " + e); } } public void leaves(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.leavesVisitor(srcNodeId, this::writeNode); close(); } public void neighbors(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.neighborsVisitor(srcNodeId, this::writeNode); close(); } public void visit_nodes(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitNodesVisitor(srcNodeId, this::writeNode); close(); } public void visit_edges(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitNodesVisitor(srcNodeId, null, this::writeEdge); close(); } - public void visit_paths(String direction, String edgesFmt, - long srcNodeId) { + public void visit_paths(String direction, String edgesFmt, long srcNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); t.visitPathsVisitor(srcNodeId, this::writePath); close(); } - public void walk(String direction, String edgesFmt, String algorithm, - long srcNodeId, long dstNodeId) { + public void walk(String direction, String edgesFmt, String algorithm, long srcNodeId, long dstNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstNodeId, algorithm)) { writeNode(nodeId); } close(); } - public void walk_type(String direction, String edgesFmt, String algorithm, - long srcNodeId, String dst) { + public void walk_type(String direction, String edgesFmt, String algorithm, long srcNodeId, String dst) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.walk(srcNodeId, dstType, algorithm)) { writeNode(nodeId); } close(); } - public void random_walk(String direction, String edgesFmt, int retries, - long srcNodeId, long dstNodeId) { + public void random_walk(String direction, String edgesFmt, int retries, long srcNodeId, long dstNodeId) { open(); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.randomWalk(srcNodeId, dstNodeId, retries)) { writeNode(nodeId); } close(); } - public void random_walk_type(String direction, String edgesFmt, int retries, - long srcNodeId, String dst) { + public void random_walk_type(String direction, String edgesFmt, int retries, long srcNodeId, String dst) { open(); Node.Type dstType = Node.Type.fromStr(dst); Traversal t = new Traversal(this.graph, direction, edgesFmt); for (Long nodeId : t.randomWalk(srcNodeId, dstType, retries)) { writeNode(nodeId); } close(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/Graph.java b/java/src/main/java/org/softwareheritage/graph/Graph.java index 3378c0b..ad44a54 100644 --- a/java/src/main/java/org/softwareheritage/graph/Graph.java +++ b/java/src/main/java/org/softwareheritage/graph/Graph.java @@ -1,256 +1,257 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.Transform; import org.softwareheritage.graph.maps.NodeIdMap; import org.softwareheritage.graph.maps.NodeTypesMap; import java.io.IOException; /** * Main class storing the compressed graph and node id mappings. *

* The compressed graph is stored using the WebGraph - * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent - * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id ↔ - * SWHID) are used for the input (users refer to the graph using SWHID) and the output (convert back to - * SWHID for users results). However, since graph traversal can be restricted depending on the node - * type (see {@link AllowedEdges}), a long id → node type map is stored as well to avoid a full - * SWHID lookup. + * ecosystem. Additional mappings are necessary because Software Heritage uses string based persistent + * identifiers (SWHID) while WebGraph uses integers internally. These two mappings (long id + * ↔ SWHID) are used for the input (users refer to the graph using SWHID) and the output + * (convert back to SWHID for users results). However, since graph traversal can be restricted + * depending on the node type (see {@link AllowedEdges}), a long id → node type map is stored + * as well to avoid a full SWHID lookup. * * @author The Software Heritage developers * @see org.softwareheritage.graph.AllowedEdges * @see org.softwareheritage.graph.maps.NodeIdMap * @see org.softwareheritage.graph.maps.NodeTypesMap */ public class Graph extends ImmutableGraph { /** File extension for the SWHID to long node id map */ public static final String SWHID_TO_NODE = ".swhid2node.bin"; /** File extension for the long node id to SWHID map */ public static final String NODE_TO_SWHID = ".node2swhid.bin"; /** File extension for the long node id to node type map */ public static final String NODE_TO_TYPE = ".node2type.map"; /** Compressed graph stored as a {@link it.unimi.dsi.big.webgraph.BVGraph} */ ImmutableGraph graph; /** Transposed compressed graph (used for backward traversals) */ ImmutableGraph graphTransposed; /** Path and basename of the compressed graph */ String path; /** Mapping long id ↔ SWHIDs */ NodeIdMap nodeIdMap; /** Mapping long id → node types */ NodeTypesMap nodeTypesMap; /** * Constructor. * * @param path path and basename of the compressed graph to load */ public Graph(String path) throws IOException { this.path = path; this.graph = ImmutableGraph.loadMapped(path); this.graphTransposed = ImmutableGraph.loadMapped(path + "-transposed"); this.nodeTypesMap = new NodeTypesMap(path); this.nodeIdMap = new NodeIdMap(path, numNodes()); } - protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, - String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { + protected Graph(ImmutableGraph graph, ImmutableGraph graphTransposed, String path, NodeIdMap nodeIdMap, + NodeTypesMap nodeTypesMap) { this.graph = graph; this.graphTransposed = graphTransposed; this.path = path; this.nodeIdMap = nodeIdMap; this.nodeTypesMap = nodeTypesMap; } /** * Return a flyweight copy of the graph. */ @Override public Graph copy() { return new Graph(this.graph.copy(), this.graphTransposed.copy(), this.path, this.nodeIdMap, this.nodeTypesMap); } @Override public boolean randomAccess() { return graph.randomAccess() && graphTransposed.randomAccess(); } /** * Return a transposed version of the graph. */ public Graph transpose() { return new Graph(this.graphTransposed, this.graph, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Return a symmetric version of the graph. */ public Graph symmetrize() { ImmutableGraph symmetric = Transform.union(graph, graphTransposed); return new Graph(symmetric, symmetric, this.path, this.nodeIdMap, this.nodeTypesMap); } /** * Cleans up graph resources after use. */ public void cleanUp() throws IOException { nodeIdMap.close(); } /** * Returns number of nodes in the graph. * * @return number of nodes in the graph */ @Override public long numNodes() { return graph.numNodes(); } /** * Returns number of edges in the graph. * * @return number of edges in the graph */ @Override public long numArcs() { return graph.numArcs(); } /** * Returns lazy iterator of successors of a node. * * @param nodeId node specified as a long id - * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + * @return lazy iterator of successors of the node, specified as a + * WebGraph LazyLongIterator */ @Override public LazyLongIterator successors(long nodeId) { return graph.successors(nodeId); } /** * Returns lazy iterator of successors of a node while following a specific set of edge types. * * @param nodeId node specified as a long id * @param allowedEdges the specification of which edges can be traversed - * @return lazy iterator of successors of the node, specified as a WebGraph LazyLongIterator + * @return lazy iterator of successors of the node, specified as a + * WebGraph LazyLongIterator */ public LazyLongIterator successors(long nodeId, AllowedEdges allowedEdges) { if (allowedEdges.restrictedTo == null) { // All edges are allowed, bypass edge check return this.successors(nodeId); } else { LazyLongIterator allSuccessors = this.successors(nodeId); Graph thisGraph = this; return new LazyLongIterator() { @Override public long nextLong() { long neighbor; while ((neighbor = allSuccessors.nextLong()) != -1) { if (allowedEdges.isAllowed(thisGraph.getNodeType(nodeId), thisGraph.getNodeType(neighbor))) { return neighbor; } } return -1; } @Override public long skip(final long n) { long i; - for (i = 0; i < n && nextLong() != -1; i++) ; + for (i = 0; i < n && nextLong() != -1; i++) + ; return i; } }; } } /** * Returns the outdegree of a node. * * @param nodeId node specified as a long id * @return outdegree of a node */ @Override public long outdegree(long nodeId) { return graph.outdegree(nodeId); } /** * Returns lazy iterator of predecessors of a node. * * @param nodeId node specified as a long id - * @return lazy iterator of predecessors of the node, specified as a WebGraph LazyLongIterator + * @return lazy iterator of predecessors of the node, specified as a + * WebGraph LazyLongIterator */ public LazyLongIterator predecessors(long nodeId) { return this.transpose().successors(nodeId); } /** * Returns the indegree of a node. * * @param nodeId node specified as a long id * @return indegree of a node */ public long indegree(long nodeId) { return this.transpose().outdegree(nodeId); } /** * Returns the underlying BVGraph. * * @return WebGraph BVGraph */ public ImmutableGraph getGraph() { return this.graph; } /** * Returns the graph full path. * * @return graph full path */ public String getPath() { return path; } /** * Converts {@link SWHID} node to long. * * @param swhid node specified as a {@link SWHID} * @return internal long node id * @see SWHID */ public long getNodeId(SWHID swhid) { return nodeIdMap.getNodeId(swhid); } /** * Converts long id node to {@link SWHID}. * * @param nodeId node specified as a long id * @return external SWHID * @see SWHID */ public SWHID getSWHID(long nodeId) { return nodeIdMap.getSWHID(nodeId); } /** * Returns node type. * * @param nodeId node specified as a long id * @return corresponding node type * @see org.softwareheritage.graph.Node.Type */ public Node.Type getNodeType(long nodeId) { return nodeTypesMap.getType(nodeId); } } diff --git a/java/src/main/java/org/softwareheritage/graph/Node.java b/java/src/main/java/org/softwareheritage/graph/Node.java index 6f10d4f..e4a61d3 100644 --- a/java/src/main/java/org/softwareheritage/graph/Node.java +++ b/java/src/main/java/org/softwareheritage/graph/Node.java @@ -1,117 +1,116 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * A node in the Software Heritage graph. * * @author The Software Heritage developers */ public class Node { /** * Software Heritage graph node types, as described in the * data model. */ public enum Type { /** Content node */ CNT, /** Directory node */ DIR, /** Origin node */ ORI, /** Release node */ REL, /** Revision node */ REV, /** Snapshot node */ SNP; /** * Converts integer to corresponding SWH node type. * * @param intType node type represented as an integer * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromInt(int intType) { switch (intType) { case 0: return CNT; case 1: return DIR; case 2: return ORI; case 3: return REL; case 4: return REV; case 5: return SNP; } return null; } /** * Converts node types to the corresponding int value * * @param type node type as an enum * @return the corresponding int value */ public static int toInt(Node.Type type) { switch (type) { case CNT: return 0; case DIR: return 1; case ORI: return 2; case REL: return 3; case REV: return 4; case SNP: return 5; } throw new IllegalArgumentException("Unknown node type: " + type); } /** * Converts string to corresponding SWH node type. * * @param strType node type represented as a string * @return the corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public static Node.Type fromStr(String strType) { if (!strType.matches("cnt|dir|ori|rel|rev|snp")) { throw new IllegalArgumentException("Unknown node type: " + strType); } return Node.Type.valueOf(strType.toUpperCase()); } /** - * Parses SWH node type possible values from formatted string (see the API - * syntax). + * Parses SWH node type possible values from formatted string (see the + * API syntax). * * @param strFmtType node types represented as a formatted string * @return a list containing the {@link Node.Type} values * @see org.softwareheritage.graph.Node.Type */ public static ArrayList parse(String strFmtType) { ArrayList types = new ArrayList<>(); if (strFmtType.equals("*")) { List nodeTypes = Arrays.asList(Node.Type.values()); types.addAll(nodeTypes); } else { types.add(Node.Type.fromStr(strFmtType)); } return types; } } } diff --git a/java/src/main/java/org/softwareheritage/graph/SWHID.java b/java/src/main/java/org/softwareheritage/graph/SWHID.java index 6b810cc..16aff83 100644 --- a/java/src/main/java/org/softwareheritage/graph/SWHID.java +++ b/java/src/main/java/org/softwareheritage/graph/SWHID.java @@ -1,124 +1,118 @@ package org.softwareheritage.graph; import com.fasterxml.jackson.annotation.JsonValue; import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Hex; /** - * A Software Heritage persistent identifier (SWHID), see persistent + * A Software Heritage persistent identifier (SWHID), see persistent * identifier documentation. * * @author The Software Heritage developers */ public class SWHID { /** Fixed hash length of the SWHID */ public static final int HASH_LENGTH = 40; /** Full SWHID as a string */ String swhid; /** SWHID node type */ Node.Type type; /** * Constructor. * * @param swhid full SWHID as a string */ public SWHID(String swhid) { this.swhid = swhid; // SWHID format: 'swh:1:type:hash' String[] parts = swhid.split(":"); if (parts.length != 4 || !parts[0].equals("swh") || !parts[1].equals("1")) { throw new IllegalArgumentException("malformed SWHID: " + swhid); } this.type = Node.Type.fromStr(parts[2]); if (!parts[3].matches("[0-9a-f]{" + HASH_LENGTH + "}")) { throw new IllegalArgumentException("malformed SWHID: " + swhid); } } /** * Creates a SWHID from a compact binary representation. *

- * The binary format is specified in the Python module - * swh.graph.swhid:str_to_bytes . + * The binary format is specified in the Python module swh.graph.swhid:str_to_bytes . */ public static SWHID fromBytes(byte[] input) { byte[] digest = new byte[20]; System.arraycopy(input, 2, digest, 0, digest.length); - String swhidStr = String.format( - "swh:%d:%s:%s", - input[0], - Node.Type.fromInt(input[1]).toString().toLowerCase(), - Hex.encodeHexString(digest) - ); + String swhidStr = String.format("swh:%d:%s:%s", input[0], Node.Type.fromInt(input[1]).toString().toLowerCase(), + Hex.encodeHexString(digest)); return new SWHID(swhidStr); } @Override public boolean equals(Object otherObj) { if (otherObj == this) return true; if (!(otherObj instanceof SWHID)) return false; SWHID other = (SWHID) otherObj; return swhid.equals(other.getSWHID()); } @Override public int hashCode() { return swhid.hashCode(); } @Override public String toString() { return swhid; } /** * Converts SWHID to a compact binary representation. *

- * The binary format is specified in the Python module - * swh.graph.swhid:str_to_bytes . + * The binary format is specified in the Python module swh.graph.swhid:str_to_bytes . */ public byte[] toBytes() { byte[] bytes = new byte[22]; byte[] digest; - bytes[0] = (byte) 1; // namespace version - bytes[1] = (byte) Node.Type.toInt(this.type); // SWHID type + bytes[0] = (byte) 1; // namespace version + bytes[1] = (byte) Node.Type.toInt(this.type); // SWHID type try { - digest = Hex.decodeHex(this.swhid.substring(10)); // SHA1 hash + digest = Hex.decodeHex(this.swhid.substring(10)); // SHA1 hash System.arraycopy(digest, 0, bytes, 2, digest.length); } catch (DecoderException e) { throw new IllegalArgumentException("invalid hex sequence in SWHID: " + this.swhid); } return bytes; } /** * Returns full SWHID as a string. * * @return full SWHID string */ @JsonValue public String getSWHID() { return swhid; } /** * Returns SWHID node type. * * @return SWHID corresponding {@link Node.Type} * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType() { return type; } } diff --git a/java/src/main/java/org/softwareheritage/graph/Stats.java b/java/src/main/java/org/softwareheritage/graph/Stats.java index 92d4fd8..1c1cb0f 100644 --- a/java/src/main/java/org/softwareheritage/graph/Stats.java +++ b/java/src/main/java/org/softwareheritage/graph/Stats.java @@ -1,67 +1,67 @@ package org.softwareheritage.graph; import java.io.FileInputStream; import java.io.IOException; import java.util.Properties; /** * Statistics on the compressed graph. *

- * These statistics are not computed but directly read from WebGraph generated .stats and .properties files. + * These statistics are not computed but directly read from + * WebGraph generated .stats and .properties files. * * @author The Software Heritage developers */ public class Stats { public Counts counts; public Ratios ratios; public Degree indegree; public Degree outdegree; /** * Constructor. * * @param graphPath path and basename of compressed graph */ public Stats(String graphPath) throws IOException { Properties properties = new Properties(); properties.load(new FileInputStream(graphPath + ".properties")); properties.load(new FileInputStream(graphPath + ".stats")); this.counts = new Counts(); this.ratios = new Ratios(); this.indegree = new Degree(); this.outdegree = new Degree(); this.counts.nodes = Long.parseLong(properties.getProperty("nodes")); this.counts.edges = Long.parseLong(properties.getProperty("arcs")); this.ratios.compression = Double.parseDouble(properties.getProperty("compratio")); this.ratios.bitsPerNode = Double.parseDouble(properties.getProperty("bitspernode")); this.ratios.bitsPerEdge = Double.parseDouble(properties.getProperty("bitsperlink")); this.ratios.avgLocality = Double.parseDouble(properties.getProperty("avglocality")); this.indegree.min = Long.parseLong(properties.getProperty("minindegree")); this.indegree.max = Long.parseLong(properties.getProperty("maxindegree")); this.indegree.avg = Double.parseDouble(properties.getProperty("avgindegree")); this.outdegree.min = Long.parseLong(properties.getProperty("minoutdegree")); this.outdegree.max = Long.parseLong(properties.getProperty("maxoutdegree")); this.outdegree.avg = Double.parseDouble(properties.getProperty("avgoutdegree")); } public static class Counts { public long nodes; public long edges; } public static class Ratios { public double compression; public double bitsPerNode; public double bitsPerEdge; public double avgLocality; } public static class Degree { public long min; public long max; public double avg; } } diff --git a/java/src/main/java/org/softwareheritage/graph/Traversal.java b/java/src/main/java/org/softwareheritage/graph/Traversal.java index f83d1fb..ac50aef 100644 --- a/java/src/main/java/org/softwareheritage/graph/Traversal.java +++ b/java/src/main/java/org/softwareheritage/graph/Traversal.java @@ -1,525 +1,516 @@ package org.softwareheritage.graph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.server.Endpoint; import java.util.*; import java.util.function.Consumer; import java.util.function.LongConsumer; /** * Traversal algorithms on the compressed graph. *

* Internal implementation of the traversal API endpoints. These methods only input/output internal * long ids, which are converted in the {@link Endpoint} higher-level class to {@link SWHID}. * * @author The Software Heritage developers * @see Endpoint */ public class Traversal { /** Graph used in the traversal */ Graph graph; /** Graph edge restriction */ AllowedEdges edges; /** Hash set storing if we have visited a node */ HashSet visited; /** Hash map storing parent node id for each nodes during a traversal */ Map parentNode; /** Number of edges accessed during traversal */ long nbEdgesAccessed; /** random number generator, for random walks */ Random rng; /** * Constructor. * * @param graph graph used in the traversal * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges + * @param edgesFmt a formatted string describing allowed + * edges */ public Traversal(Graph graph, String direction, String edgesFmt) { if (!direction.matches("forward|backward")) { throw new IllegalArgumentException("Unknown traversal direction: " + direction); } if (direction.equals("backward")) { this.graph = graph.transpose(); } else { this.graph = graph; } this.edges = new AllowedEdges(edgesFmt); this.visited = new HashSet<>(); this.parentNode = new HashMap<>(); this.nbEdgesAccessed = 0; this.rng = new Random(); } /** * Returns number of accessed edges during traversal. * * @return number of edges accessed in last traversal */ public long getNbEdgesAccessed() { return nbEdgesAccessed; } /** * Returns number of accessed nodes during traversal. * * @return number of nodes accessed in last traversal */ public long getNbNodesAccessed() { return this.visited.size(); } /** * Push version of {@link #leaves} will fire passed callback for each leaf. */ public void leavesVisitor(long srcNodeId, NodeIdConsumer cb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); long neighborsCnt = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); LazyLongIterator it = graph.successors(currentNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { neighborsCnt++; if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } if (neighborsCnt == 0) { cb.accept(currentNodeId); } } } /** * Returns the leaves of a subgraph rooted at the specified source node. * * @param srcNodeId source node * @return list of node ids corresponding to the leaves */ public ArrayList leaves(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); leavesVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** - * Push version of {@link #neighbors}: will fire passed callback on each - * neighbor. + * Push version of {@link #neighbors}: will fire passed callback on each neighbor. */ public void neighborsVisitor(long srcNodeId, NodeIdConsumer cb) { this.nbEdgesAccessed = graph.outdegree(srcNodeId); LazyLongIterator it = graph.successors(srcNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { cb.accept(neighborNodeId); } } /** * Returns node direct neighbors (linked with exactly one edge). * * @param srcNodeId source node * @return list of node ids corresponding to the neighbors */ public ArrayList neighbors(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); neighborsVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** - * Push version of {@link #visitNodes}: will fire passed callback on each - * visited node. + * Push version of {@link #visitNodes}: will fire passed callback on each visited node. */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer nodeCb, EdgeIdConsumer edgeCb) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (nodeCb != null) { nodeCb.accept(currentNodeId); } nbEdgesAccessed += graph.outdegree(currentNodeId); LazyLongIterator it = graph.successors(currentNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (edgeCb != null) { edgeCb.accept(currentNodeId, neighborNodeId); } if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); } } } } /** One-argument version to handle callbacks properly */ public void visitNodesVisitor(long srcNodeId, NodeIdConsumer cb) { visitNodesVisitor(srcNodeId, cb, null); } /** * Performs a graph traversal and returns explored nodes. * * @param srcNodeId source node * @return list of explored node ids */ public ArrayList visitNodes(long srcNodeId) { ArrayList nodeIds = new ArrayList<>(); visitNodesVisitor(srcNodeId, nodeIds::add); return nodeIds; } /** - * Push version of {@link #visitPaths}: will fire passed callback on each - * discovered (complete) path. + * Push version of {@link #visitPaths}: will fire passed callback on each discovered (complete) + * path. */ public void visitPathsVisitor(long srcNodeId, PathConsumer cb) { Stack currentPath = new Stack<>(); this.nbEdgesAccessed = 0; visitPathsInternalVisitor(srcNodeId, currentPath, cb); } /** * Performs a graph traversal and returns explored paths. * * @param srcNodeId source node * @return list of explored paths (represented as a list of node ids) */ public ArrayList> visitPaths(long srcNodeId) { ArrayList> paths = new ArrayList<>(); visitPathsVisitor(srcNodeId, paths::add); return paths; } - private void visitPathsInternalVisitor(long currentNodeId, - Stack currentPath, - PathConsumer cb) { + private void visitPathsInternalVisitor(long currentNodeId, Stack currentPath, PathConsumer cb) { currentPath.push(currentNodeId); long visitedNeighbors = 0; nbEdgesAccessed += graph.outdegree(currentNodeId); LazyLongIterator it = graph.successors(currentNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { visitPathsInternalVisitor(neighborNodeId, currentPath, cb); visitedNeighbors++; } if (visitedNeighbors == 0) { ArrayList path = new ArrayList<>(currentPath); cb.accept(path); } currentPath.pop(); } /** - * Performs a graph traversal with backtracking, and returns the first - * found path from source to destination. + * Performs a graph traversal with backtracking, and returns the first found path from source to + * destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return found path as a list of node ids */ public ArrayList walk(long srcNodeId, T dst, String visitOrder) { long dstNodeId; if (visitOrder.equals("dfs")) { dstNodeId = walkInternalDFS(srcNodeId, dst); } else if (visitOrder.equals("bfs")) { dstNodeId = walkInternalBFS(srcNodeId, dst); } else { throw new IllegalArgumentException("Unknown visit order: " + visitOrder); } if (dstNodeId == -1) { throw new IllegalArgumentException("Cannot find destination: " + dst); } return backtracking(srcNodeId, dstNodeId); } /** - * Performs a random walk (picking a random successor at each step) from - * source to destination. + * Performs a random walk (picking a random successor at each step) from source to destination. * * @param srcNodeId source node * @param dst destination (either a node or a node type) - * @return found path as a list of node ids or an empty path to indicate - * that no suitable path have been found + * @return found path as a list of node ids or an empty path to indicate that no suitable path have + * been found */ public ArrayList randomWalk(long srcNodeId, T dst) { return randomWalk(srcNodeId, dst, 0); } /** - * Performs a stubborn random walk (picking a random successor at each - * step) from source to destination. The walk is "stubborn" in the sense - * that it will not give up the first time if a satisfying target node is - * found, but it will retry up to a limited amount of times. + * Performs a stubborn random walk (picking a random successor at each step) from source to + * destination. The walk is "stubborn" in the sense that it will not give up the first time if a + * satisfying target node is found, but it will retry up to a limited amount of times. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @param retries number of times to retry; 0 means no retries (single walk) - * @return found path as a list of node ids or an empty path to indicate - * that no suitable path have been found + * @return found path as a list of node ids or an empty path to indicate that no suitable path have + * been found */ public ArrayList randomWalk(long srcNodeId, T dst, int retries) { long curNodeId = srcNodeId; ArrayList path = new ArrayList<>(); this.nbEdgesAccessed = 0; boolean found; if (retries < 0) { throw new IllegalArgumentException("Negative number of retries given: " + retries); } while (true) { path.add(curNodeId); LazyLongIterator successors = graph.successors(curNodeId, edges); curNodeId = randomPick(successors); if (curNodeId < 0) { found = false; break; } if (isDstNode(curNodeId, dst)) { path.add(curNodeId); found = true; break; } } if (found) { return path; - } else if (retries > 0) { // try again + } else if (retries > 0) { // try again return randomWalk(srcNodeId, dst, retries - 1); - } else { // not found and no retries left + } else { // not found and no retries left path.clear(); return path; } } /** - * Randomly choose an element from an iterator over Longs using reservoir - * sampling + * Randomly choose an element from an iterator over Longs using reservoir sampling * * @param elements iterator over selection domain * @return randomly chosen element or -1 if no suitable element was found */ private long randomPick(LazyLongIterator elements) { long curPick = -1; long seenCandidates = 0; - for (long element; (element = elements.nextLong()) != -1; ) { + for (long element; (element = elements.nextLong()) != -1;) { seenCandidates++; if (Math.round(rng.nextFloat() * (seenCandidates - 1)) == 0) { curPick = element; } } return curPick; } /** * Internal DFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalDFS(long srcNodeId, T dst) { Stack stack = new Stack<>(); this.nbEdgesAccessed = 0; stack.push(srcNodeId); visited.add(srcNodeId); while (!stack.isEmpty()) { long currentNodeId = stack.pop(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); LazyLongIterator it = graph.successors(currentNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { stack.push(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal BFS function of {@link #walk}. * * @param srcNodeId source node * @param dst destination (either a node or a node type) * @return final destination node or -1 if no path found */ private long walkInternalBFS(long srcNodeId, T dst) { Queue queue = new LinkedList<>(); this.nbEdgesAccessed = 0; queue.add(srcNodeId); visited.add(srcNodeId); while (!queue.isEmpty()) { long currentNodeId = queue.poll(); if (isDstNode(currentNodeId, dst)) { return currentNodeId; } nbEdgesAccessed += graph.outdegree(currentNodeId); LazyLongIterator it = graph.successors(currentNodeId, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!visited.contains(neighborNodeId)) { queue.add(neighborNodeId); visited.add(neighborNodeId); parentNode.put(neighborNodeId, currentNodeId); } } } return -1; } /** * Internal function of {@link #walk} to check if a node corresponds to the destination. * * @param nodeId current node * @param dst destination (either a node or a node type) * @return true if the node is a destination, or false otherwise */ private boolean isDstNode(long nodeId, T dst) { if (dst instanceof Long) { long dstNodeId = (Long) dst; return nodeId == dstNodeId; } else if (dst instanceof Node.Type) { Node.Type dstType = (Node.Type) dst; return graph.getNodeType(nodeId) == dstType; } else { return false; } } /** * Internal backtracking function of {@link #walk}. * * @param srcNodeId source node * @param dstNodeId destination node * @return the found path, as a list of node ids */ private ArrayList backtracking(long srcNodeId, long dstNodeId) { ArrayList path = new ArrayList<>(); long currentNodeId = dstNodeId; while (currentNodeId != srcNodeId) { path.add(currentNodeId); currentNodeId = parentNode.get(currentNodeId); } path.add(srcNodeId); Collections.reverse(path); return path; } /** * Find a common descendant between two given nodes using two parallel BFS * * @param lhsNode the first node * @param rhsNode the second node * @return the found path, as a list of node ids */ public Long findCommonDescendant(long lhsNode, long rhsNode) { Queue lhsStack = new ArrayDeque<>(); Queue rhsStack = new ArrayDeque<>(); HashSet lhsVisited = new HashSet<>(); HashSet rhsVisited = new HashSet<>(); lhsStack.add(lhsNode); rhsStack.add(rhsNode); lhsVisited.add(lhsNode); rhsVisited.add(rhsNode); this.nbEdgesAccessed = 0; Long curNode; while (!lhsStack.isEmpty() || !rhsStack.isEmpty()) { if (!lhsStack.isEmpty()) { curNode = lhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); LazyLongIterator it = graph.successors(curNode, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!lhsVisited.contains(neighborNodeId)) { if (rhsVisited.contains(neighborNodeId)) return neighborNodeId; lhsStack.add(neighborNodeId); lhsVisited.add(neighborNodeId); } } } if (!rhsStack.isEmpty()) { curNode = rhsStack.poll(); nbEdgesAccessed += graph.outdegree(curNode); LazyLongIterator it = graph.successors(curNode, edges); - for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1; ) { + for (long neighborNodeId; (neighborNodeId = it.nextLong()) != -1;) { if (!rhsVisited.contains(neighborNodeId)) { if (lhsVisited.contains(neighborNodeId)) return neighborNodeId; rhsStack.add(neighborNodeId); rhsVisited.add(neighborNodeId); } } } } return null; } public interface NodeIdConsumer extends LongConsumer { /** - * Callback for incrementally receiving node identifiers during a graph - * visit. + * Callback for incrementally receiving node identifiers during a graph visit. */ void accept(long nodeId); } public interface EdgeIdConsumer { /** - * Callback for incrementally receiving edge identifiers during a graph - * visit. + * Callback for incrementally receiving edge identifiers during a graph visit. */ void accept(long srcId, long dstId); } public interface PathConsumer extends Consumer> { /** - * Callback for incrementally receiving node paths (made of node - * identifiers) during a graph visit. + * Callback for incrementally receiving node paths (made of node identifiers) during a graph visit. */ void accept(ArrayList path); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java index 44f9b92..883264d 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/BFS.java @@ -1,111 +1,107 @@ package org.softwareheritage.graph.benchmark; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import java.io.File; import java.io.IOException; - public class BFS { private final static Logger LOGGER = LoggerFactory.getLogger(BFS.class); private final ImmutableGraph graph; public BFS(ImmutableGraph graph) { this.graph = graph; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - BFS.class.getName(), - "", + SimpleJSAP jsap = new SimpleJSAP(BFS.class.getName(), "", new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), - new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, - 'T', "transposed", "Use transposed graph (default: false)"), - } - ); + new FlaggedOption("useTransposed", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'T', + "transposed", "Use transposed graph (default: false)"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); boolean useTransposed = config.getBoolean("useTransposed"); System.err.println("Loading graph " + graphPath + " ..."); Graph graph = new Graph(graphPath); System.err.println("Graph loaded."); if (useTransposed) graph = graph.transpose(); BFS bfs = new BFS(graph); bfs.bfsperm(); } // Partly inlined from it.unimi.dsi.law.big.graph.BFS private void bfsperm() throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(BFS.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 final LongArrayBitVector visited = LongArrayBitVector.ofLength(n); final ProgressLogger pl = new ProgressLogger(LOGGER); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting breadth-first visit..."); for (long i = 0; i < n; i++) { - if (visited.getBoolean(i)) continue; + if (visited.getBoolean(i)) + continue; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!visited.getBoolean(succ)) { visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } } pl.update(); } } pl.done(); queue.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java index 7c3cef7..98dd854 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Benchmark.java @@ -1,162 +1,154 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SWHID; import org.softwareheritage.graph.benchmark.utils.Random; import org.softwareheritage.graph.benchmark.utils.Statistics; import org.softwareheritage.graph.server.Endpoint; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.StringJoiner; import java.util.function.Function; /** * Benchmark common utility functions. * * @author The Software Heritage developers */ public class Benchmark { /** CSV separator for log file */ final String CSV_SEPARATOR = ";"; /** Command line arguments */ public Args args; /** * Constructor. */ public Benchmark() { this.args = new Args(); } /** * Parses benchmark command line arguments. * * @param args command line arguments */ public void parseCommandLineArgs(String[] args) throws JSAPException { SimpleJSAP jsap = new SimpleJSAP(Benchmark.class.getName(), "Benchmark tool for Software Heritage use-cases scenarios.", new Parameter[]{ new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), new FlaggedOption("nbNodes", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'n', "nb-nodes", "Number of random nodes used to do the benchmark."), new FlaggedOption("logFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'l', "log-file", "File name to output CSV format benchmark log."), - new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', - "seed", "Random generator seed."), - }); + new FlaggedOption("seed", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 's', "seed", + "Random generator seed."),}); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } this.args.graphPath = config.getString("graphPath"); this.args.nbNodes = config.getInt("nbNodes"); this.args.logFile = config.getString("logFile"); this.args.random = config.contains("seed") ? new Random(config.getLong("seed")) : new Random(); } /** * Creates CSV file for log output. */ public void createCSVLogFile() throws IOException { try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile))) { StringJoiner csvHeader = new StringJoiner(CSV_SEPARATOR); - csvHeader.add("use case name") - .add("SWHID") - .add("number of edges accessed") - .add("traversal timing") - .add("swhid2node timing") - .add("node2swhid timing"); + csvHeader.add("use case name").add("SWHID").add("number of edges accessed").add("traversal timing") + .add("swhid2node timing").add("node2swhid timing"); csvLog.write(csvHeader.toString() + "\n"); } } /** * Times a specific endpoint and outputs individual datapoints along with aggregated statistics. * * @param useCaseName benchmark use-case name * @param graph compressed graph used in the benchmark * @param nodeIds node ids to use as starting point for the endpoint traversal * @param operation endpoint function to benchmark - * @param dstFmt destination formatted string as described in the API + * @param dstFmt destination formatted string as described in the + * API * @param algorithm traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, - Function operation, String dstFmt, String algorithm) - throws IOException { + Function operation, String dstFmt, String algorithm) throws IOException { ArrayList timings = new ArrayList<>(); ArrayList timingsNormalized = new ArrayList<>(); ArrayList nbEdgesAccessed = new ArrayList<>(); final boolean append = true; try (Writer csvLog = new BufferedWriter(new FileWriter(args.logFile, append))) { for (long nodeId : nodeIds) { SWHID swhid = graph.getSWHID(nodeId); Endpoint.Output output = (dstFmt == null) ? operation.apply(new Endpoint.Input(swhid)) : operation.apply(new Endpoint.Input(swhid, dstFmt, algorithm)); StringJoiner csvLine = new StringJoiner(CSV_SEPARATOR); - csvLine.add(useCaseName) - .add(swhid.toString()) - .add(Long.toString(output.meta.nbEdgesAccessed)) + csvLine.add(useCaseName).add(swhid.toString()).add(Long.toString(output.meta.nbEdgesAccessed)) .add(Double.toString(output.meta.timings.traversal)) .add(Double.toString(output.meta.timings.swhid2node)) .add(Double.toString(output.meta.timings.node2swhid)); csvLog.write(csvLine.toString() + "\n"); timings.add(output.meta.timings.traversal); nbEdgesAccessed.add((double) output.meta.nbEdgesAccessed); if (output.meta.nbEdgesAccessed != 0) { timingsNormalized.add(output.meta.timings.traversal / output.meta.nbEdgesAccessed); } } } System.out.println("\n" + useCaseName + " use-case:"); System.out.println("timings:"); Statistics stats = new Statistics(timings); stats.printAll(); System.out.println("timings normalized:"); Statistics statsNormalized = new Statistics(timingsNormalized); statsNormalized.printAll(); System.out.println("nb edges accessed:"); Statistics statsNbEdgesAccessed = new Statistics(nbEdgesAccessed); statsNbEdgesAccessed.printAll(); } /** * Same as {@link #timeEndpoint} but without destination or algorithm specified to endpoint call. */ public void timeEndpoint(String useCaseName, Graph graph, long[] nodeIds, - Function operation) throws IOException { + Function operation) throws IOException { timeEndpoint(useCaseName, graph, nodeIds, operation, null, null); } /** * Input arguments. */ public class Args { /** Basename of the compressed graph */ public String graphPath; /** Number of random nodes to use for the benchmark */ public int nbNodes; /** File name for CSV format benchmark log */ public String logFile; /** Random generator */ public Random random; } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java index 910ca76..1bfc2a7 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Browsing.java @@ -1,44 +1,42 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** - * Benchmark Software Heritage browsing + * Benchmark Software Heritage + * browsing * use-cases scenarios. * * @author The Software Heritage developers */ public class Browsing { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); - long[] dirNodeIds = - bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); - long[] revNodeIds = - bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); + long[] dirNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.DIR); + long[] revNodeIds = bench.args.random.generateNodeIdsOfType(graph, bench.args.nbNodes, Node.Type.REV); Endpoint dirEndpoint = new Endpoint(graph, "forward", "dir:cnt,dir:dir"); Endpoint revEndpoint = new Endpoint(graph, "forward", "rev:rev"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("ls", graph, dirNodeIds, dirEndpoint::neighbors); bench.timeEndpoint("ls -R", graph, dirNodeIds, dirEndpoint::visitPaths); bench.timeEndpoint("git log", graph, revNodeIds, revEndpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java index 324f38e..9ab04c3 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Provenance.java @@ -1,51 +1,45 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** - * Benchmark Software Heritage provenance + * Benchmark Software Heritage + * provenance * use-cases scenarios. * * @author The Software Heritage developers */ public class Provenance { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint commitProvenanceEndpoint = new Endpoint(graph, "backward", "dir:dir,cnt:dir,dir:rev"); Endpoint originProvenanceEndpoint = new Endpoint(graph, "backward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); - bench.timeEndpoint( - "commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); - bench.timeEndpoint( - "commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); - bench.timeEndpoint( - "complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); - - bench.timeEndpoint( - "origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); - bench.timeEndpoint( - "origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); - bench.timeEndpoint( - "complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); + bench.timeEndpoint("commit provenance (dfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "dfs"); + bench.timeEndpoint("commit provenance (bfs)", graph, nodeIds, commitProvenanceEndpoint::walk, "rev", "bfs"); + bench.timeEndpoint("complete commit provenance", graph, nodeIds, commitProvenanceEndpoint::leaves); + + bench.timeEndpoint("origin provenance (dfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "dfs"); + bench.timeEndpoint("origin provenance (bfs)", graph, nodeIds, originProvenanceEndpoint::walk, "ori", "bfs"); + bench.timeEndpoint("complete origin provenance", graph, nodeIds, originProvenanceEndpoint::leaves); } } diff --git a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java index 3a6c6dc..c0cd7ec 100644 --- a/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java +++ b/java/src/main/java/org/softwareheritage/graph/benchmark/Vault.java @@ -1,37 +1,37 @@ package org.softwareheritage.graph.benchmark; import com.martiansoftware.jsap.JSAPException; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.server.Endpoint; import java.io.IOException; /** - * Benchmark Software Heritage vault - * use-case scenario. + * Benchmark Software Heritage + * vault use-case + * scenario. * * @author The Software Heritage developers */ public class Vault { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { Benchmark bench = new Benchmark(); bench.parseCommandLineArgs(args); Graph graph = new Graph(bench.args.graphPath); long[] nodeIds = bench.args.random.generateNodeIds(graph, bench.args.nbNodes); Endpoint endpoint = new Endpoint(graph, "forward", "*"); System.out.println("Used " + bench.args.nbNodes + " random nodes (results are in seconds):"); bench.createCSVLogFile(); bench.timeEndpoint("git bundle", graph, nodeIds, endpoint::visitNodes); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java index e2e68ff..5dab92b 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindCommonAncestor.java @@ -1,66 +1,62 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Traversal; import java.io.IOException; import java.util.Scanner; public class FindCommonAncestor { private Graph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - FindCommonAncestor.class.getName(), - "", - new Parameter[] { - new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'e', "edges", "Edges constraints"), - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(FindCommonAncestor.class.getName(), "", + new Parameter[]{ + new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', + "edges", "Edges constraints"), + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String edgesFmt = config.getString("edgesFmt"); FindCommonAncestor fca = new FindCommonAncestor(); try { fca.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); Traversal t = new Traversal(fca.graph.symmetrize(), "forward", edgesFmt); System.out.println(t.findCommonDescendant(lhsNode, rhsNode)); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java index 6839a8f..f916a8e 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/FindPath.java @@ -1,130 +1,123 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.*; public class FindPath { private Graph graph; private Long emptySnapshot; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - FindPath.class.getName(), - "", - new Parameter[] { - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(FindPath.class.getName(), "", + new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private boolean nodeIsEmptySnapshot(Long node) { - if (this.emptySnapshot == null - && this.graph.getNodeType(node) == Node.Type.SNP + if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } - private Boolean shouldVisit(Long node){ + private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); - if (nt != Node.Type.REV && nt != Node.Type.REL - && nt != Node.Type.SNP && nt != Node.Type.ORI) { + if (nt != Node.Type.REV && nt != Node.Type.REL && nt != Node.Type.SNP && nt != Node.Type.ORI) { return false; } if (this.nodeIsEmptySnapshot(node)) return false; return true; } private ArrayList findPath(Long src, Long dst) { HashSet visited = new HashSet<>(); Queue queue = new ArrayDeque<>(); Map parentNode = new HashMap<>(); queue.add(src); visited.add(src); while (!queue.isEmpty()) { long currentNode = queue.poll(); final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { - if (!shouldVisit(succ) || visited.contains(succ)) continue; + if (!shouldVisit(succ) || visited.contains(succ)) + continue; visited.add(succ); queue.add(succ); parentNode.put(succ, currentNode); if (succ == dst) { ArrayList path = new ArrayList<>(); long n = dst; while (n != src) { path.add(n); n = parentNode.get(n); } path.add(src); Collections.reverse(path); return path; } } } return null; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); FindPath fpath = new FindPath(); try { fpath.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long lhsNode = input.nextLong(); long rhsNode = input.nextLong(); ArrayList path = fpath.findPath(lhsNode, rhsNode); if (path != null) { for (Long n : path) { System.out.format("%d ", n); } System.out.println(); - } - else { + } else { System.out.println("null"); } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java index 0b239d1..8dddf1d 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCC.java @@ -1,253 +1,249 @@ package org.softwareheritage.graph.experiments.forks; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; -import org.softwareheritage.graph.benchmark.BFS; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; public class ForkCC { public Boolean includeRootDir; private Graph graph; private Long emptySnapshot; private LongArrayBitVector visited; private LongArrayBitVector whitelist; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - ForkCC.class.getName(), - "", - new Parameter[] { - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, - 't', "whitelist", "Whitelist of origins"), - new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, - 'R', "includerootdir", "Include root directory (default: false)"), - new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'o', "outdir", "Directory where to put the results"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(ForkCC.class.getName(), "", + new Parameter[]{ + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', + "whitelist", "Whitelist of origins"), + new FlaggedOption("includeRootDir", JSAP.BOOLEAN_PARSER, "false", JSAP.NOT_REQUIRED, 'R', + "includerootdir", "Include root directory (default: false)"), + new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', + "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private static void printDistribution(ArrayList> components) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { System.out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { System.out.println(node); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename).symmetrize(); System.err.println("Graph loaded."); this.emptySnapshot = null; this.whitelist = null; this.visited = null; this.includeRootDir = null; } private boolean nodeIsEmptySnapshot(Long node) { - if (this.emptySnapshot == null - && this.graph.getNodeType(node) == Node.Type.SNP + if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private Boolean shouldVisit(Long node) { Node.Type nt = this.graph.getNodeType(node); if (nt == Node.Type.CNT) { return false; } if (nt == Node.Type.DIR && !includeRootDir) return false; if (this.nodeIsEmptySnapshot(node)) return false; if (visited.getBoolean(node)) return false; return true; } private ArrayList> compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 visited = LongArrayBitVector.ofLength(n); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); ArrayList> components = new ArrayList<>(); for (long i = 0; i < n; i++) { - if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) continue; + if (!shouldVisit(i) || this.graph.getNodeType(i) == Node.Type.DIR) + continue; ArrayList component = new ArrayList<>(); queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); Node.Type cur_nt = this.graph.getNodeType(currentNode); - if (cur_nt == Node.Type.ORI - && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) { + if (cur_nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(currentNode))) { // TODO: add a check that the origin has >=1 non-empty snapshot component.add(currentNode); } final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { - if (!shouldVisit(succ)) continue; - if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) continue; + if (!shouldVisit(succ)) + continue; + if (this.graph.getNodeType(succ) == Node.Type.DIR && cur_nt != Node.Type.REV) + continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } if (component.size() > 0) { components.add(component); } } pl.done(); queue.close(); return components; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); boolean includeRootDir = config.getBoolean("includeRootDir"); String outdirPath = config.getString("outdir"); ForkCC forkCc = new ForkCC(); try { forkCc.load_graph(graphPath); forkCc.includeRootDir = includeRootDir; } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCc.parseWhitelist(whitelistPath); } ProgressLogger logger = new ProgressLogger(); - //noinspection ResultOfMethodCallIgnored + // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { ArrayList> components = forkCc.compute(logger); printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java index 1ebc7fb..8e6da02 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ForkCliques.java @@ -1,228 +1,223 @@ package org.softwareheritage.graph.experiments.forks; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.*; public class ForkCliques { private Graph graph; private LongArrayBitVector whitelist; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); this.whitelist = null; } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - ForkCliques.class.getName(), - "", + SimpleJSAP jsap = new SimpleJSAP(ForkCliques.class.getName(), "", new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, - 't', "whitelist", "Whitelist of origins"), - new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'o', "outdir", "Directory where to put the results"), - } - ); + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("whitelistPath", JSAP.STRING_PARSER, null, JSAP.NOT_REQUIRED, 't', + "whitelist", "Whitelist of origins"), + new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', + "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private ArrayList dfsAt(Long baseNode) { ArrayList res = new ArrayList<>(); final Deque stack = new ArrayDeque<>(); HashSet seen = new HashSet<>(); stack.push(baseNode); while (!stack.isEmpty()) { final Long currentNode = stack.pop(); final LazyLongIterator iterator = this.graph.predecessors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (!seen.contains(succ)) { Node.Type nt = this.graph.getNodeType(succ); if (nt == Node.Type.DIR || nt == Node.Type.CNT) continue; - if (nt == Node.Type.ORI - && (this.whitelist == null || this.whitelist.getBoolean(succ))) { + if (nt == Node.Type.ORI && (this.whitelist == null || this.whitelist.getBoolean(succ))) { res.add(succ); } else { stack.push(succ); seen.add(succ); } } } } Collections.sort(res); return res; } private boolean isBaseRevision(Long node) { if (this.graph.getNodeType(node) != Node.Type.REV) return false; final LazyLongIterator iterator = this.graph.successors(node); long succ; while ((succ = iterator.nextLong()) != -1) { if (this.graph.getNodeType(succ) == Node.Type.REV) return false; } - return true; + return true; } static private String fingerprint(ArrayList cluster) { MessageDigest digest; try { digest = MessageDigest.getInstance("SHA-256"); } catch (NoSuchAlgorithmException e) { e.printStackTrace(); return null; } for (Long n : cluster) digest.update(Longs.toByteArray(n)); return new String(digest.digest()); } private ArrayList> compute(ProgressLogger pl) { final long n = this.graph.numNodes(); HashSet fingerprints = new HashSet<>(); ArrayList> clusters = new ArrayList<>(); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting topological sort..."); for (long i = 0; i < n; i++) { if (isBaseRevision(i)) { ArrayList currentCluster = dfsAt(i); String clusterFp = fingerprint(currentCluster); if (!fingerprints.contains(clusterFp)) { fingerprints.add(clusterFp); clusters.add(currentCluster); } } pl.update(); } pl.done(); return clusters; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } } private void parseWhitelist(String path) { System.err.println("Loading whitelist " + path + " ..."); this.whitelist = LongArrayBitVector.ofLength(this.graph.numNodes()); Scanner scanner; try { scanner = new Scanner(new File(path)); - while(scanner.hasNextLong()) { + while (scanner.hasNextLong()) { whitelist.set(scanner.nextLong()); } System.err.println("Whitelist loaded."); } catch (FileNotFoundException e) { e.printStackTrace(); } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String whitelistPath = config.getString("whitelistPath"); String outdirPath = config.getString("outdir"); ForkCliques forkCliques = new ForkCliques(); try { forkCliques.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } if (whitelistPath != null) { forkCliques.parseWhitelist(whitelistPath); } Logger rootLogger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); rootLogger.setLevel(Level.DEBUG); ProgressLogger logger = new ProgressLogger(rootLogger); ArrayList> components = forkCliques.compute(logger); - //noinspection ResultOfMethodCallIgnored + // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); printLargestComponent(components, new Formatter(outdirPath + "/largest_clique.txt")); printAllComponents(components, new Formatter(outdirPath + "/all_cliques.txt")); } catch (FileNotFoundException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java index 81a0c50..d99dc2c 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/forks/ListEmptyOrigins.java @@ -1,93 +1,88 @@ package org.softwareheritage.graph.experiments.forks; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.IOException; import java.util.ArrayList; public class ListEmptyOrigins { private Graph graph; private Long emptySnapshot; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - ListEmptyOrigins.class.getName(), - "", - new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(ListEmptyOrigins.class.getName(), "", + new Parameter[]{new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + 'g', "graph", "Basename of the compressed graph"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); ListEmptyOrigins leo = new ListEmptyOrigins(); try { leo.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ArrayList badlist = leo.compute(leo.graph); for (Long bad : badlist) { System.out.println(bad); } } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); this.emptySnapshot = null; } private boolean nodeIsEmptySnapshot(Long node) { System.err.println(this.graph.getNodeType(node) + " " + this.graph.outdegree(node) + " " + node); - if (this.emptySnapshot == null - && this.graph.getNodeType(node) == Node.Type.SNP + if (this.emptySnapshot == null && this.graph.getNodeType(node) == Node.Type.SNP && this.graph.outdegree(node) == 0) { System.err.println("Found empty snapshot: " + node); this.emptySnapshot = node; } return node.equals(this.emptySnapshot); } private ArrayList compute(ImmutableGraph graph) { final long n = graph.numNodes(); ArrayList bad = new ArrayList<>(); for (long i = 0; i < n; i++) { Node.Type nt = this.graph.getNodeType(i); - if (nt != Node.Type.ORI) continue; + if (nt != Node.Type.ORI) + continue; final LazyLongIterator iterator = graph.successors(i); long succ; boolean found = false; while ((succ = iterator.nextLong()) != -1) { if (this.graph.outdegree(succ) > 0) { found = true; } } if (!found) bad.add(i); } return bad; } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java index 9fd8e6b..1681bd5 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/multiplicationfactor/GenDistribution.java @@ -1,136 +1,130 @@ package org.softwareheritage.graph.experiments.multiplicationfactor; import com.martiansoftware.jsap.*; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.Traversal; import org.softwareheritage.graph.benchmark.utils.Timing; import java.io.IOException; import java.util.Scanner; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class GenDistribution { private Graph graph; private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - GenDistribution.class.getName(), - "", + SimpleJSAP jsap = new SimpleJSAP(GenDistribution.class.getName(), "", new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 's', "srctype", "Source node type"), - new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'd', "dsttype", "Destination node type"), - new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'e', "edges", "Edges constraints"), - - new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, - 't', "numthreads", "Number of threads"), - } - ); + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("srcType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 's', + "srctype", "Source node type"), + new FlaggedOption("dstType", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'd', + "dsttype", "Destination node type"), + new FlaggedOption("edgesFmt", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'e', + "edges", "Edges constraints"), + + new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "128", JSAP.NOT_REQUIRED, 't', + "numthreads", "Number of threads"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); Node.Type srcType = Node.Type.fromStr(config.getString("srcType")); Node.Type dstType = Node.Type.fromStr(config.getString("dstType")); String edgesFmt = config.getString("edgesFmt"); int numThreads = config.getInt("numThreads"); GenDistribution tp = new GenDistribution(); try { tp.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } final long END_OF_QUEUE = -1L; ArrayBlockingQueue queue = new ArrayBlockingQueue<>(numThreads); ExecutorService service = Executors.newFixedThreadPool(numThreads + 1); service.submit(() -> { try { Scanner input = new Scanner(System.in); while (input.hasNextLong()) { long node = input.nextLong(); if (tp.graph.getNodeType(node) == srcType) { queue.put(node); } } } catch (InterruptedException e) { e.printStackTrace(); } finally { for (int i = 0; i < numThreads; ++i) { try { queue.put(END_OF_QUEUE); } catch (InterruptedException e) { e.printStackTrace(); } } } }); for (int i = 0; i < numThreads; ++i) { service.submit(() -> { Graph thread_graph = tp.graph.copy(); long startTime; double totalTime; while (true) { Long node = null; try { node = queue.take(); } catch (InterruptedException e) { e.printStackTrace(); } if (node == null || node == END_OF_QUEUE) { return; } Traversal t = new Traversal(thread_graph, "backward", edgesFmt); int[] count = {0}; startTime = Timing.start(); t.visitNodesVisitor(node, (curnode) -> { if (tp.graph.getNodeType(curnode) == dstType) { count[0]++; } }); totalTime = Timing.stop(startTime); - System.out.format("%d %d %d %d %f\n", - node, count[0], t.getNbNodesAccessed(), - t.getNbEdgesAccessed(), totalTime - ); + System.out.format("%d %d %d %d %f\n", node, count[0], t.getNbNodesAccessed(), + t.getNbEdgesAccessed(), totalTime); } }); } service.shutdown(); } private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename); System.err.println("Graph loaded."); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java index d1c8610..9064ef2 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ClusteringCoefficient.java @@ -1,200 +1,189 @@ package org.softwareheritage.graph.experiments.topology; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.big.webgraph.Transform; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.*; import java.util.concurrent.ThreadLocalRandom; public class ClusteringCoefficient { private Graph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename).symmetrize(); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - ClusteringCoefficient.class.getName(), - "", + SimpleJSAP jsap = new SimpleJSAP(ClusteringCoefficient.class.getName(), "", new Parameter[]{ - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'o', "outdir", "Directory where to put the results"), - } - ); + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', + "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } private long oppositeEdges(ImmutableGraph graph, long node) { System.out.format("%d\n", node); HashSet neighborhood = new HashSet<>(); long succ; final LazyLongIterator iterator = graph.successors(node); while ((succ = iterator.nextLong()) != -1) { System.out.format("%d neighbor add %d\n", node, succ); neighborhood.add(succ); } long closed_triplets = 0; for (Long neighbor : neighborhood) { System.out.format("%d neighbor visit %d\n", node, neighbor); final LazyLongIterator it = graph.successors(neighbor); while ((succ = it.nextLong()) != -1) { System.out.format("%d neighbor visit %d succ %d\n", node, neighbor, succ); if (neighborhood.contains(succ)) { closed_triplets++; } } } return closed_triplets; } public void compute(ProgressLogger pl, Formatter out_local, Formatter out_global) { final long n = this.graph.numNodes(); pl.expectedUpdates = n; pl.itemsName = "nodes"; long nodes_d2 = 0; double cum_lcc = 0; double cum_lcc_c0 = 0; double cum_lcc_c1 = 0; HashMap distribution = new HashMap<>(); for (long node = 0; node < n; node++) { long d = graph.outdegree(node); if (d >= 2) { double t = (d * (d - 1)); double m = oppositeEdges(graph, node); double lcc = m / t; distribution.merge(lcc, 1L, Long::sum); cum_lcc += lcc; nodes_d2++; } else { cum_lcc_c1++; } pl.update(); } pl.done(); for (Map.Entry entry : distribution.entrySet()) { out_local.format("%f %d\n", entry.getKey(), entry.getValue()); } double gC = cum_lcc / nodes_d2; double gC0 = cum_lcc_c0 / n; double gC1 = cum_lcc_c1 / n; out_global.format("C: %f\n", gC); out_global.format("C0: %f\n", gC0); out_global.format("C1: %f\n", gC1); } public void compute_approx(Formatter out_global) { final long n = this.graph.numNodes(); long trials = 0; long triangles = 0; while (true) { long node = ThreadLocalRandom.current().nextLong(0, n); long d = graph.outdegree(node); if (d >= 2) { Long u = null; Long v = null; long u_idx = ThreadLocalRandom.current().nextLong(0, d); long v_idx = ThreadLocalRandom.current().nextLong(0, d - 1); if (v_idx >= u_idx) { v_idx++; } long succ; final LazyLongIterator node_iterator = graph.successors(node); for (long succ_idx = 0; (succ = node_iterator.nextLong()) != -1; succ_idx++) { if (succ_idx == u_idx) { u = succ; } if (succ_idx == v_idx) { v = succ; } } final LazyLongIterator u_iterator = graph.successors(u); while ((succ = u_iterator.nextLong()) != -1) { if (succ == v) triangles++; } } trials++; if (trials % 100 == 0 || true) { - double gC = (double)triangles / (double)trials; + double gC = (double) triangles / (double) trials; out_global.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); System.out.format("C: %f (triangles: %d, trials: %d)\n", gC, triangles, trials); } } } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdirPath = config.getString("outdir"); ClusteringCoefficient ccoef = new ClusteringCoefficient(); try { ccoef.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } Logger rootLogger = (Logger) LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME); rootLogger.setLevel(Level.DEBUG); new File(outdirPath).mkdirs(); try { - ccoef.compute_approx( - new Formatter(outdirPath + "/local.txt") - ); + ccoef.compute_approx(new Formatter(outdirPath + "/local.txt")); /* - ccoef.compute( - symmetric, - new ProgressLogger(rootLogger), - new Formatter(outdirPath + "/local.txt"), - new Formatter(outdirPath + "/global.txt") - ); + * ccoef.compute( symmetric, new ProgressLogger(rootLogger), new Formatter(outdirPath + + * "/local.txt"), new Formatter(outdirPath + "/global.txt") ); */ } catch (FileNotFoundException e) { e.printStackTrace(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java index f2e27d3..ae573fd 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/ConnectedComponents.java @@ -1,187 +1,179 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; -import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.LazyLongIterator; -import it.unimi.dsi.big.webgraph.Transform; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.*; public class ConnectedComponents { private Graph graph; private void load_graph(String graphBasename) throws IOException { System.err.println("Loading graph " + graphBasename + " ..."); this.graph = new Graph(graphBasename).symmetrize(); System.err.println("Graph loaded."); } private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - ConnectedComponents.class.getName(), - "", - new Parameter[] { - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'o', "outdir", "Directory where to put the results"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(ConnectedComponents.class.getName(), "", + new Parameter[]{ + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("outdir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', + "outdir", "Directory where to put the results"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } - private HashMap /*ArrayList>*/ compute(ProgressLogger pl) throws IOException { + private HashMap /* ArrayList> */ compute(ProgressLogger pl) throws IOException { final long n = graph.numNodes(); // Allow enough memory to behave like in-memory queue - int bufferSize = (int)Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); + int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); // Use a disk based queue to store BFS frontier final File queueFile = File.createTempFile(ConnectedComponents.class.getSimpleName(), "queue"); final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); final byte[] byteBuf = new byte[Long.BYTES]; // WARNING: no 64-bit version of this data-structure, but it can support // indices up to 2^37 LongArrayBitVector visited = LongArrayBitVector.ofLength(n); pl.expectedUpdates = n; pl.itemsName = "nodes"; pl.start("Starting connected components visit..."); // ArrayList> components = new ArrayList<>(); HashMap componentDistribution = new HashMap<>(); for (long i = 0; i < n; i++) { // ArrayList component = new ArrayList<>(); long componentNodes = 0; queue.enqueue(Longs.toByteArray(i)); visited.set(i); while (!queue.isEmpty()) { queue.dequeue(byteBuf); final long currentNode = Longs.fromByteArray(byteBuf); // component.add(currentNode); componentNodes += 1; final LazyLongIterator iterator = graph.successors(currentNode); long succ; while ((succ = iterator.nextLong()) != -1) { if (visited.getBoolean(succ)) continue; visited.set(succ); queue.enqueue(Longs.toByteArray(succ)); } pl.update(); } /* - if (component.size() > 0) { - components.add(component); - } - */ + * if (component.size() > 0) { components.add(component); } + */ if (componentNodes > 0) componentDistribution.merge(componentNodes, 1L, Long::sum); } pl.done(); // return components; return componentDistribution; } private static void printDistribution(ArrayList> components, Formatter out) { TreeMap distribution = new TreeMap<>(); for (ArrayList component : components) { distribution.merge((long) component.size(), 1L, Long::sum); } for (Map.Entry entry : distribution.entrySet()) { out.format("%d %d\n", entry.getKey(), entry.getValue()); } out.close(); } private static void printLargestComponent(ArrayList> components, Formatter out) { int indexLargest = 0; for (int i = 1; i < components.size(); ++i) { if (components.get(i).size() > components.get(indexLargest).size()) indexLargest = i; } ArrayList component = components.get(indexLargest); for (Long node : component) { out.format("%d\n", node); } out.close(); } private static void printAllComponents(ArrayList> components, Formatter out) { for (int i = 1; i < components.size(); ++i) { ArrayList component = components.get(i); for (Long node : component) { out.format("%d ", node); } out.format("\n"); } out.close(); } public static void main(String[] args) { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String outdirPath = config.getString("outdir"); ConnectedComponents connectedComponents = new ConnectedComponents(); try { connectedComponents.load_graph(graphPath); } catch (IOException e) { System.out.println("Could not load graph: " + e); System.exit(2); } ProgressLogger logger = new ProgressLogger(); - //noinspection ResultOfMethodCallIgnored + // noinspection ResultOfMethodCallIgnored new File(outdirPath).mkdirs(); try { // ArrayList> components = connectedComponents.compute(logger); // components.sort(Comparator.comparing(ArrayList::size).reversed()); // printDistribution(components, new Formatter(outdirPath + "/distribution.txt")); // printLargestComponent(components, new Formatter(outdirPath + "/largest_component.txt")); // printAllComponents(components, new Formatter(outdirPath + "/all_components.txt")); HashMap componentDistribution = connectedComponents.compute(logger); PrintWriter f = new PrintWriter(new FileWriter(outdirPath + "/distribution.txt")); TreeMap sortedDistribution = new TreeMap<>(componentDistribution); for (Map.Entry entry : sortedDistribution.entrySet()) { f.println(entry.getKey() + " " + entry.getValue()); } f.close(); } catch (IOException e) { e.printStackTrace(); } logger.done(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java index 57830c6..c82d4ea 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/InOutDegree.java @@ -1,167 +1,174 @@ package org.softwareheritage.graph.experiments.topology; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.lang.reflect.InvocationTargetException; import java.util.*; import com.martiansoftware.jsap.JSAP; import com.martiansoftware.jsap.JSAPException; import com.martiansoftware.jsap.JSAPResult; import com.martiansoftware.jsap.Parameter; import com.martiansoftware.jsap.SimpleJSAP; import com.martiansoftware.jsap.UnflaggedOption; import it.unimi.dsi.logging.ProgressLogger; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; public class InOutDegree { - private InOutDegree() {} - - private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1; - private static final int TYPE_ALL = Node.Type.values().length; - private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT); - private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR); - private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV); - private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL); - private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP); - private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI); - - public static long[] outdegreeTypes(final Graph graph, long node) { - long[] out = new long[NODE_ARRAY_SIZE]; - var successors = graph.successors(node); - long neighbor; - while ((neighbor = successors.nextLong()) != -1) { - out[Node.Type.toInt(graph.getNodeType(neighbor))]++; - out[TYPE_ALL]++; - } - return out; - } - - public static long[] indegreeTypes(final Graph graph, long node) { - return outdegreeTypes(graph.transpose(), node); - } - - public static void writeDistribution(HashMap distribution, String filename) throws IOException { - PrintWriter f = new PrintWriter(new FileWriter(filename)); - TreeMap sortedDistribution = new TreeMap<>(distribution); - for (Map.Entry entry : sortedDistribution.entrySet()) { - f.println(entry.getKey() + " " + entry.getValue()); - } - f.close(); - } - - public static void run(final Graph graph, String resultsDir) throws IOException { - var cnt_in_dir = new HashMap(); - var dir_in_dir = new HashMap(); - var dir_in_rev = new HashMap(); - var dir_in_all = new HashMap(); - var dir_out_all = new HashMap(); - var dir_out_dir = new HashMap(); - var dir_out_cnt = new HashMap(); - var dir_out_rev = new HashMap(); - var rev_in_dir = new HashMap(); - var rev_in_rel = new HashMap(); - var rev_in_rev = new HashMap(); - var rev_in_snp = new HashMap(); - var rev_in_all = new HashMap(); - var rev_out_rev = new HashMap(); - var rel_in_snp = new HashMap(); - var snp_in_ori = new HashMap(); - var snp_out_all = new HashMap(); - var snp_out_rel = new HashMap(); - var snp_out_rev = new HashMap(); - var ori_out_snp = new HashMap(); - - final ProgressLogger pl = new ProgressLogger(); - pl.itemsName = "nodes"; - pl.expectedUpdates = graph.numNodes(); - pl.start("Scanning..."); - - long[] in; - long[] out; - - for(long i = graph.numNodes(); i-- != 0;) { - switch (graph.getNodeType(i)) { - case CNT: - cnt_in_dir.merge(graph.indegree(i), 1L, Long::sum); - case DIR: - in = indegreeTypes(graph, i); - out = outdegreeTypes(graph, i); - dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum); - dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum); - dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); - dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum); - dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum); - dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum); - dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum); - case REV: - in = indegreeTypes(graph, i); - out = outdegreeTypes(graph, i); - rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum); - rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); - rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum); - rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum); - rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum); - rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum); - case REL: - rel_in_snp.merge(graph.indegree(i), 1L, Long::sum); - case SNP: - out = outdegreeTypes(graph, i); - snp_in_ori.merge(graph.indegree(i), 1L, Long::sum); - snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum); - snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum); - snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum); - case ORI: - ori_out_snp.merge(graph.outdegree(i), 1L, Long::sum); - } - - pl.update(); - } - - pl.done(); - - writeDistribution(cnt_in_dir, resultsDir + "/cnt_in_dir.txt"); - writeDistribution(dir_in_dir, resultsDir + "/dir_in_dir.txt"); - writeDistribution(dir_in_rev, resultsDir + "/dir_in_rev.txt"); - writeDistribution(dir_in_all, resultsDir + "/dir_in_all.txt"); - writeDistribution(dir_out_all, resultsDir + "/dir_out_all.txt"); - writeDistribution(dir_out_dir, resultsDir + "/dir_out_dir.txt"); - writeDistribution(dir_out_cnt, resultsDir + "/dir_out_cnt.txt"); - writeDistribution(dir_out_rev, resultsDir + "/dir_out_rev.txt"); - writeDistribution(rev_in_dir, resultsDir + "/rev_in_dir.txt"); - writeDistribution(rev_in_rel, resultsDir + "/rev_in_rel.txt"); - writeDistribution(rev_in_rev, resultsDir + "/rev_in_rev.txt"); - writeDistribution(rev_in_snp, resultsDir + "/rev_in_snp.txt"); - writeDistribution(rev_in_all, resultsDir + "/rev_in_all.txt"); - writeDistribution(rev_out_rev, resultsDir + "/rev_out_rev.txt"); - writeDistribution(rel_in_snp, resultsDir + "/rel_in_snp.txt"); - writeDistribution(snp_in_ori, resultsDir + "/snp_in_ori.txt"); - writeDistribution(snp_out_all, resultsDir + "/snp_out_all.txt"); - writeDistribution(snp_out_rel, resultsDir + "/snp_out_rel.txt"); - writeDistribution(snp_out_rev, resultsDir + "/snp_out_rev.txt"); - writeDistribution(ori_out_snp, resultsDir + "/ori_out_snp.txt"); - } - - static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, JSAPException, IOException, ClassNotFoundException { - final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), "Computes in and out degrees of the given SWHGraph", - new Parameter[] { - new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), - new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The directory of the resulting files."), - } - ); - - final JSAPResult jsapResult = jsap.parse(arg); - if (jsap.messagePrinted()) System.exit(1); - - final String basename = jsapResult.getString("basename"); - final String resultsDir = jsapResult.userSpecified("resultsDir") ? jsapResult.getString("resultsDir") : basename; - - final ProgressLogger pl = new ProgressLogger(); - - Graph graph = new Graph(basename); - run(graph, resultsDir); - } + private InOutDegree() { + } + + private static final int NODE_ARRAY_SIZE = Node.Type.values().length + 1; + private static final int TYPE_ALL = Node.Type.values().length; + private static final int TYPE_CNT = Node.Type.toInt(Node.Type.CNT); + private static final int TYPE_DIR = Node.Type.toInt(Node.Type.DIR); + private static final int TYPE_REV = Node.Type.toInt(Node.Type.REV); + private static final int TYPE_REL = Node.Type.toInt(Node.Type.REL); + private static final int TYPE_SNP = Node.Type.toInt(Node.Type.SNP); + private static final int TYPE_ORI = Node.Type.toInt(Node.Type.ORI); + + public static long[] outdegreeTypes(final Graph graph, long node) { + long[] out = new long[NODE_ARRAY_SIZE]; + var successors = graph.successors(node); + long neighbor; + while ((neighbor = successors.nextLong()) != -1) { + out[Node.Type.toInt(graph.getNodeType(neighbor))]++; + out[TYPE_ALL]++; + } + return out; + } + + public static long[] indegreeTypes(final Graph graph, long node) { + return outdegreeTypes(graph.transpose(), node); + } + + public static void writeDistribution(HashMap distribution, String filename) throws IOException { + PrintWriter f = new PrintWriter(new FileWriter(filename)); + TreeMap sortedDistribution = new TreeMap<>(distribution); + for (Map.Entry entry : sortedDistribution.entrySet()) { + f.println(entry.getKey() + " " + entry.getValue()); + } + f.close(); + } + + public static void run(final Graph graph, String resultsDir) throws IOException { + var cnt_in_dir = new HashMap(); + var dir_in_dir = new HashMap(); + var dir_in_rev = new HashMap(); + var dir_in_all = new HashMap(); + var dir_out_all = new HashMap(); + var dir_out_dir = new HashMap(); + var dir_out_cnt = new HashMap(); + var dir_out_rev = new HashMap(); + var rev_in_dir = new HashMap(); + var rev_in_rel = new HashMap(); + var rev_in_rev = new HashMap(); + var rev_in_snp = new HashMap(); + var rev_in_all = new HashMap(); + var rev_out_rev = new HashMap(); + var rel_in_snp = new HashMap(); + var snp_in_ori = new HashMap(); + var snp_out_all = new HashMap(); + var snp_out_rel = new HashMap(); + var snp_out_rev = new HashMap(); + var ori_out_snp = new HashMap(); + + final ProgressLogger pl = new ProgressLogger(); + pl.itemsName = "nodes"; + pl.expectedUpdates = graph.numNodes(); + pl.start("Scanning..."); + + long[] in; + long[] out; + + for (long i = graph.numNodes(); i-- != 0;) { + switch (graph.getNodeType(i)) { + case CNT: + cnt_in_dir.merge(graph.indegree(i), 1L, Long::sum); + case DIR: + in = indegreeTypes(graph, i); + out = outdegreeTypes(graph, i); + dir_in_all.merge(in[TYPE_ALL], 1L, Long::sum); + dir_out_all.merge(out[TYPE_ALL], 1L, Long::sum); + dir_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); + dir_in_rev.merge(in[TYPE_REV], 1L, Long::sum); + dir_out_cnt.merge(out[TYPE_CNT], 1L, Long::sum); + dir_out_dir.merge(out[TYPE_DIR], 1L, Long::sum); + dir_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + case REV: + in = indegreeTypes(graph, i); + out = outdegreeTypes(graph, i); + rev_in_all.merge(in[TYPE_ALL], 1L, Long::sum); + rev_in_dir.merge(in[TYPE_DIR], 1L, Long::sum); + rev_in_rev.merge(in[TYPE_REV], 1L, Long::sum); + rev_in_rel.merge(in[TYPE_REL], 1L, Long::sum); + rev_in_snp.merge(in[TYPE_SNP], 1L, Long::sum); + rev_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + case REL: + rel_in_snp.merge(graph.indegree(i), 1L, Long::sum); + case SNP: + out = outdegreeTypes(graph, i); + snp_in_ori.merge(graph.indegree(i), 1L, Long::sum); + snp_out_all.merge(out[TYPE_ALL], 1L, Long::sum); + snp_out_rel.merge(out[TYPE_REL], 1L, Long::sum); + snp_out_rev.merge(out[TYPE_REV], 1L, Long::sum); + case ORI: + ori_out_snp.merge(graph.outdegree(i), 1L, Long::sum); + } + + pl.update(); + } + + pl.done(); + + writeDistribution(cnt_in_dir, resultsDir + "/cnt_in_dir.txt"); + writeDistribution(dir_in_dir, resultsDir + "/dir_in_dir.txt"); + writeDistribution(dir_in_rev, resultsDir + "/dir_in_rev.txt"); + writeDistribution(dir_in_all, resultsDir + "/dir_in_all.txt"); + writeDistribution(dir_out_all, resultsDir + "/dir_out_all.txt"); + writeDistribution(dir_out_dir, resultsDir + "/dir_out_dir.txt"); + writeDistribution(dir_out_cnt, resultsDir + "/dir_out_cnt.txt"); + writeDistribution(dir_out_rev, resultsDir + "/dir_out_rev.txt"); + writeDistribution(rev_in_dir, resultsDir + "/rev_in_dir.txt"); + writeDistribution(rev_in_rel, resultsDir + "/rev_in_rel.txt"); + writeDistribution(rev_in_rev, resultsDir + "/rev_in_rev.txt"); + writeDistribution(rev_in_snp, resultsDir + "/rev_in_snp.txt"); + writeDistribution(rev_in_all, resultsDir + "/rev_in_all.txt"); + writeDistribution(rev_out_rev, resultsDir + "/rev_out_rev.txt"); + writeDistribution(rel_in_snp, resultsDir + "/rel_in_snp.txt"); + writeDistribution(snp_in_ori, resultsDir + "/snp_in_ori.txt"); + writeDistribution(snp_out_all, resultsDir + "/snp_out_all.txt"); + writeDistribution(snp_out_rel, resultsDir + "/snp_out_rel.txt"); + writeDistribution(snp_out_rev, resultsDir + "/snp_out_rev.txt"); + writeDistribution(ori_out_snp, resultsDir + "/ori_out_snp.txt"); + } + + static public void main(final String[] arg) + throws IllegalArgumentException, SecurityException, IllegalAccessException, InvocationTargetException, + NoSuchMethodException, JSAPException, IOException, ClassNotFoundException { + final SimpleJSAP jsap = new SimpleJSAP(InOutDegree.class.getName(), + "Computes in and out degrees of the given SWHGraph", + new Parameter[]{ + new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the graph."), + new UnflaggedOption("resultsDir", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, + JSAP.NOT_GREEDY, "The directory of the resulting files."),}); + + final JSAPResult jsapResult = jsap.parse(arg); + if (jsap.messagePrinted()) + System.exit(1); + + final String basename = jsapResult.getString("basename"); + final String resultsDir = jsapResult.userSpecified("resultsDir") + ? jsapResult.getString("resultsDir") + : basename; + + final ProgressLogger pl = new ProgressLogger(); + + Graph graph = new Graph(basename); + run(graph, resultsDir); + } } diff --git a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java index b84cabe..d1be386 100644 --- a/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java +++ b/java/src/main/java/org/softwareheritage/graph/experiments/topology/SubdatasetSizeFunction.java @@ -1,88 +1,90 @@ package org.softwareheritage.graph.experiments.topology; import com.google.common.primitives.Longs; import com.martiansoftware.jsap.*; import it.unimi.dsi.Util; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.Arrays; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.io.ByteDiskQueue; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.util.XoRoShiRo128PlusRandom; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.experiments.forks.ForkCC; import java.io.*; public class SubdatasetSizeFunction { - private SubdatasetSizeFunction() {} - - public static void run(final Graph graph) throws IOException { - final ProgressLogger pl = new ProgressLogger(); - pl.itemsName = "nodes"; - pl.expectedUpdates = graph.numNodes(); - - long n = graph.numNodes(); - LongArrayBitVector visited = LongArrayBitVector.ofLength(n); - - int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); - final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); - final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); - final byte[] byteBuf = new byte[Long.BYTES]; - - long[][] randomPerm = Util.identity(graph.numNodes()); - LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); - - long visitedSize = 0; - pl.start("Running traversal starting from origins..."); - for (long j = 0; j < n; ++j) { - long i = BigArrays.get(randomPerm, j); - if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) { - continue; - } - queue.enqueue(Longs.toByteArray(i)); - visited.set(i); - - while (!queue.isEmpty()) { - queue.dequeue(byteBuf); - final long currentNode = Longs.fromByteArray(byteBuf); - - visitedSize++; - - final LazyLongIterator iterator = graph.successors(currentNode); - long succ; - while ((succ = iterator.nextLong()) != -1) { - if (visited.getBoolean(succ)) - continue; - visited.set(succ); - queue.enqueue(Longs.toByteArray(succ)); - } - - pl.update(); - } - - System.out.println(visitedSize); - } - pl.done(); - } - - static public void main(final String[] arg) throws IllegalArgumentException, SecurityException, JSAPException, IOException { - final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(), "Computes in and out degrees of the given SWHGraph", - new Parameter[] { - new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph."), - } - ); - - final JSAPResult jsapResult = jsap.parse(arg); - if (jsap.messagePrinted()) System.exit(1); - - final String basename = jsapResult.getString("basename"); - - Graph graph = new Graph(basename); - run(graph); - } + private SubdatasetSizeFunction() { + } + + public static void run(final Graph graph) throws IOException { + final ProgressLogger pl = new ProgressLogger(); + pl.itemsName = "nodes"; + pl.expectedUpdates = graph.numNodes(); + + long n = graph.numNodes(); + LongArrayBitVector visited = LongArrayBitVector.ofLength(n); + + int bufferSize = (int) Math.min(Arrays.MAX_ARRAY_SIZE & ~0x7, 8L * n); + final File queueFile = File.createTempFile(ForkCC.class.getSimpleName(), "queue"); + final ByteDiskQueue queue = ByteDiskQueue.createNew(queueFile, bufferSize, true); + final byte[] byteBuf = new byte[Long.BYTES]; + + long[][] randomPerm = Util.identity(graph.numNodes()); + LongBigArrays.shuffle(randomPerm, new XoRoShiRo128PlusRandom()); + + long visitedSize = 0; + pl.start("Running traversal starting from origins..."); + for (long j = 0; j < n; ++j) { + long i = BigArrays.get(randomPerm, j); + if (visited.getBoolean(i) || graph.getNodeType(i) != Node.Type.ORI) { + continue; + } + queue.enqueue(Longs.toByteArray(i)); + visited.set(i); + + while (!queue.isEmpty()) { + queue.dequeue(byteBuf); + final long currentNode = Longs.fromByteArray(byteBuf); + + visitedSize++; + + final LazyLongIterator iterator = graph.successors(currentNode); + long succ; + while ((succ = iterator.nextLong()) != -1) { + if (visited.getBoolean(succ)) + continue; + visited.set(succ); + queue.enqueue(Longs.toByteArray(succ)); + } + + pl.update(); + } + + System.out.println(visitedSize); + } + pl.done(); + } + + static public void main(final String[] arg) + throws IllegalArgumentException, SecurityException, JSAPException, IOException { + final SimpleJSAP jsap = new SimpleJSAP(SubdatasetSizeFunction.class.getName(), + "Computes in and out degrees of the given SWHGraph", + new Parameter[]{new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, + JSAP.NOT_GREEDY, "The basename of the graph."),}); + + final JSAPResult jsapResult = jsap.parse(arg); + if (jsap.messagePrinted()) + System.exit(1); + + final String basename = jsapResult.getString("basename"); + + Graph graph = new Graph(basename); + run(graph); + } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java index 3076517..0d87a61 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/LabelMapBuilder.java @@ -1,235 +1,215 @@ package org.softwareheritage.graph.maps; import com.martiansoftware.jsap.*; import it.unimi.dsi.big.webgraph.LazyLongIterator; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; -import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntLabel; import it.unimi.dsi.big.webgraph.labelling.FixedWidthIntListLabel; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import it.unimi.dsi.io.OutputBitStream; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.big.webgraph.BVGraph; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.concurrent.TimeUnit; public class LabelMapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(LabelMapBuilder.class); private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - LabelMapBuilder.class.getName(), - "", - new Parameter[] { - new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, - 'g', "graph", "Basename of the compressed graph"), - new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, - 'd', "debug-path", - "Store the intermediate representation here for debug"), - - new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, - 't', "tmp", "Temporary directory path"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(LabelMapBuilder.class.getName(), "", + new Parameter[]{ + new FlaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'g', + "graph", "Basename of the compressed graph"), + new FlaggedOption("debugPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', + "debug-path", "Store the intermediate representation here for debug"), + + new FlaggedOption("tmpDir", JSAP.STRING_PARSER, "tmp", JSAP.NOT_REQUIRED, 't', "tmp", + "Temporary directory path"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } public static void main(String[] args) throws IOException { JSAPResult config = parse_args(args); String graphPath = config.getString("graphPath"); String tmpDir = config.getString("tmpDir"); String debugPath = config.getString("debugPath"); logger.info("Starting label map generation..."); computeLabelMap(graphPath, debugPath, tmpDir); logger.info("Label map generation ended."); } @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast static Object2LongFunction loadMPH(String mphBasename) throws IOException { Object2LongFunction mphMap = null; try { logger.info("loading MPH function..."); mphMap = (Object2LongFunction) BinIO.loadObject(mphBasename + ".mph"); logger.info("MPH function loaded"); } catch (ClassNotFoundException e) { logger.error("unknown class object in .mph file: " + e); System.exit(2); } return mphMap; } - static long getMPHSize(Object2LongFunction mph) - { + static long getMPHSize(Object2LongFunction mph) { return (mph instanceof Size64) ? ((Size64) mph).size64() : mph.size(); } - static long SwhIDToNode(String strSWHID, Object2LongFunction mphMap, long[][] orderMap) - { + static long SwhIDToNode(String strSWHID, Object2LongFunction mphMap, long[][] orderMap) { long mphId = mphMap.getLong(strSWHID); return BigArrays.get(orderMap, mphId); } - static void computeLabelMap(String graphPath, String debugPath, String tmpDir) - throws IOException - { - // Compute intermediate representation in the format "

- * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The dsiutils ByteBufferInputStream is used to overcome this - * Java limit. + * Java has a limit for mmap()-ed files because of unsupported 64-bit indexing. The + * dsiutils ByteBufferInputStream is used to overcome + * this Java limit. * * @author The Software Heritage developers */ public class MapFile { /** Memory-mapped file buffer */ ByteBufferInputStream bufferMap; /** Fixed line length of the mmap()-ed file */ int lineLength; /** * Constructor. * * @param path file path to mmap() * @param lineLength fixed length of a line in the file */ public MapFile(String path, int lineLength) throws IOException { this.bufferMap = null; this.lineLength = lineLength; try (RandomAccessFile mapFile = new RandomAccessFile(new File(path), "r")) { FileChannel fileChannel = mapFile.getChannel(); bufferMap = ByteBufferInputStream.map(fileChannel, FileChannel.MapMode.READ_ONLY); } } /** * Returns a specific line in the file. * * @param lineIndex line number in the file * @return the line at the specified position */ public byte[] readAtLine(long lineIndex) { byte[] buffer = new byte[lineLength]; long position = lineIndex * (long) lineLength; bufferMap.position(position); bufferMap.read(buffer, 0, lineLength); return buffer; } /** * Closes the mmap()-ed file. */ public void close() throws IOException { bufferMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java index c7217c5..04c99de 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java @@ -1,114 +1,116 @@ package org.softwareheritage.graph.maps; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.SWHID; import java.io.IOException; /** * Mapping between internal long node id and external SWHID. *

- * Mappings in both directions are pre-computed and dumped on disk in the - * {@link NodeMapBuilder} class, then they are loaded here using mmap(). + * Mappings in both directions are pre-computed and dumped on disk in the {@link NodeMapBuilder} + * class, then they are loaded here using mmap(). * * @author The Software Heritage developers * @see NodeMapBuilder */ public class NodeIdMap { /** Fixed length of full SWHID */ public static final int SWHID_LENGTH = 50; /** Fixed length of long node id */ public static final int NODE_ID_LENGTH = 20; /** Fixed length of binary SWHID buffer */ public static final int SWHID_BIN_SIZE = 22; /** Fixed length of binary node id buffer */ public static final int NODE_ID_BIN_SIZE = 8; /** Graph path and basename */ String graphPath; /** Number of ids to map */ long nbIds; /** mmap()-ed SWHID_TO_NODE file */ MapFile swhToNodeMap; /** mmap()-ed NODE_TO_SWHID file */ MapFile nodeToSwhMap; /** * Constructor. * * @param graphPath full graph path * @param nbNodes number of nodes in the graph */ public NodeIdMap(String graphPath, long nbNodes) throws IOException { this.graphPath = graphPath; this.nbIds = nbNodes; this.swhToNodeMap = new MapFile(graphPath + Graph.SWHID_TO_NODE, SWHID_BIN_SIZE + NODE_ID_BIN_SIZE); this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_SWHID, SWHID_BIN_SIZE); } /** * Converts SWHID to corresponding long node id. * * @param swhid node represented as a {@link SWHID} * @return corresponding node as a long id * @see SWHID */ public long getNodeId(SWHID swhid) { - // The file is sorted by swhid, hence we can binary search on swhid to get corresponding - // nodeId + /* + * The file is sorted by swhid, hence we can binary search on swhid to get corresponding nodeId + */ long start = 0; long end = nbIds - 1; while (start <= end) { long lineNumber = (start + end) / 2L; byte[] buffer = swhToNodeMap.readAtLine(lineNumber); byte[] swhidBuffer = new byte[SWHID_BIN_SIZE]; byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE]; System.arraycopy(buffer, 0, swhidBuffer, 0, SWHID_BIN_SIZE); System.arraycopy(buffer, SWHID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE); String currentSWHID = SWHID.fromBytes(swhidBuffer).getSWHID(); long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong(); int cmp = currentSWHID.compareTo(swhid.toString()); if (cmp == 0) { return currentNodeId; } else if (cmp < 0) { start = lineNumber + 1; } else { end = lineNumber - 1; } } throw new IllegalArgumentException("Unknown SWHID: " + swhid); } /** * Converts a node long id to corresponding SWHID. * * @param nodeId node as a long id * @return corresponding node as a {@link SWHID} * @see SWHID */ public SWHID getSWHID(long nodeId) { - // Each line in NODE_TO_SWHID is formatted as: swhid - // The file is ordered by nodeId, meaning node0's swhid is at line 0, hence we can read the - // nodeId-th line to get corresponding swhid + /* + * Each line in NODE_TO_SWHID is formatted as: swhid The file is ordered by nodeId, meaning node0's + * swhid is at line 0, hence we can read the nodeId-th line to get corresponding swhid + */ if (nodeId < 0 || nodeId >= nbIds) { throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); } return SWHID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); } /** * Closes the mapping files. */ public void close() throws IOException { swhToNodeMap.close(); nodeToSwhMap.close(); } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeMapBuilder.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeMapBuilder.java index 6d8e939..7ade2f1 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/NodeMapBuilder.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeMapBuilder.java @@ -1,212 +1,217 @@ package org.softwareheritage.graph.maps; import it.unimi.dsi.bits.LongArrayBitVector; import it.unimi.dsi.fastutil.BigArrays; import it.unimi.dsi.fastutil.Size64; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigArrays; import it.unimi.dsi.fastutil.longs.LongBigList; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import it.unimi.dsi.logging.ProgressLogger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import org.softwareheritage.graph.SWHID; import java.io.*; import java.nio.charset.StandardCharsets; import java.util.Scanner; import java.util.concurrent.TimeUnit; /** * Create maps needed at runtime by the graph service, in particular: *

- * - SWHID → WebGraph long node id - * - WebGraph long node id → SWHID (converse of the former) - * - WebGraph long node id → SWH node type (enum) + *

* * @author The Software Heritage developers */ public class NodeMapBuilder { final static String SORT_BUFFER_SIZE = "40%"; final static Logger logger = LoggerFactory.getLogger(NodeMapBuilder.class); /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException { if (args.length != 2) { logger.error("Usage: COMPRESSED_GRAPH_BASE_NAME TEMP_DIR < NODES_CSV"); System.exit(1); } String graphPath = args[0]; String tmpDir = args[1]; logger.info("starting maps generation..."); precomputeNodeIdMap(graphPath, tmpDir); logger.info("maps generation completed"); } /** * Computes and dumps on disk mapping files. * * @param graphPath path of the compressed graph */ // Suppress warning for Object2LongFunction cast @SuppressWarnings("unchecked") - static void precomputeNodeIdMap(String graphPath, String tmpDir) - throws IOException { + static void precomputeNodeIdMap(String graphPath, String tmpDir) throws IOException { ProgressLogger plSWHID2Node = new ProgressLogger(logger, 10, TimeUnit.SECONDS); ProgressLogger plNode2SWHID = new ProgressLogger(logger, 10, TimeUnit.SECONDS); plSWHID2Node.itemsName = "swhid→node"; plNode2SWHID.itemsName = "node→swhid"; - // avg speed for swhid→node is sometime skewed due to write to the sort - // pipe hanging when sort is sorting; hence also desplay local speed + /* + * avg speed for swhid→node is sometime skewed due to write to the sort pipe hanging when sort is + * sorting; hence also desplay local speed + */ plSWHID2Node.displayLocalSpeed = true; // first half of SWHID->node mapping: SWHID -> WebGraph MPH (long) Object2LongFunction mphMap = null; try { logger.info("loading MPH function..."); mphMap = (Object2LongFunction) BinIO.loadObject(graphPath + ".mph"); logger.info("MPH function loaded"); } catch (ClassNotFoundException e) { logger.error("unknown class object in .mph file: " + e); System.exit(2); } long nbIds = (mphMap instanceof Size64) ? ((Size64) mphMap).size64() : mphMap.size(); plSWHID2Node.expectedUpdates = nbIds; plNode2SWHID.expectedUpdates = nbIds; // second half of SWHID->node mapping: WebGraph MPH (long) -> BFS order (long) long[][] bfsMap = LongBigArrays.newBigArray(nbIds); logger.info("loading BFS order file..."); long loaded = BinIO.loadLongs(graphPath + ".order", bfsMap); logger.info("BFS order file loaded"); if (loaded != nbIds) { logger.error("graph contains " + nbIds + " nodes, but read " + loaded); System.exit(2); } - // Create mapping SWHID -> WebGraph node id, by sequentially reading - // nodes, hashing them with MPH, and permuting according to BFS order - FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, - StandardCharsets.US_ASCII)); + /* + * Create mapping SWHID -> WebGraph node id, by sequentially reading nodes, hashing them with MPH, + * and permuting according to BFS order + */ + FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); LineIterator swhidIterator = new LineIterator(buffer); - // The WebGraph node id -> SWHID mapping can be obtained from the - // SWHID->node one by numerically sorting on node id and sequentially - // writing obtained SWHIDs to a binary map. Delegates the sorting job to - // /usr/bin/sort via pipes + /* + * The WebGraph node id -> SWHID mapping can be obtained from the SWHID->node one by numerically + * sorting on node id and sequentially writing obtained SWHIDs to a binary map. Delegates the + * sorting job to /usr/bin/sort via pipes + */ ProcessBuilder processBuilder = new ProcessBuilder(); - processBuilder.command("sort", "--numeric-sort", "--key", "2", - "--buffer-size", SORT_BUFFER_SIZE, + processBuilder.command("sort", "--numeric-sort", "--key", "2", "--buffer-size", SORT_BUFFER_SIZE, "--temporary-directory", tmpDir); Process sort = processBuilder.start(); BufferedOutputStream sort_stdin = new BufferedOutputStream(sort.getOutputStream()); BufferedInputStream sort_stdout = new BufferedInputStream(sort.getInputStream()); // for the binary format of swhidToNodeMap, see Python module swh.graph.swhid:SwhidToIntMap // for the binary format of nodeToSwhidMap, see Python module swh.graph.swhid:IntToSwhidMap - try (DataOutputStream swhidToNodeMap = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(graphPath + Graph.SWHID_TO_NODE))); - BufferedOutputStream nodeToSwhidMap = new BufferedOutputStream(new FileOutputStream(graphPath + Graph.NODE_TO_SWHID))) { - - // background handler for sort output, it will be fed SWHID/node - // pairs while swhidToNodeMap is being filled, and will itself fill - // nodeToSwhidMap as soon as data from sort is ready + try (DataOutputStream swhidToNodeMap = new DataOutputStream( + new BufferedOutputStream(new FileOutputStream(graphPath + Graph.SWHID_TO_NODE))); + BufferedOutputStream nodeToSwhidMap = new BufferedOutputStream( + new FileOutputStream(graphPath + Graph.NODE_TO_SWHID))) { + + /* + * background handler for sort output, it will be fed SWHID/node pairs while swhidToNodeMap is being + * filled, and will itself fill nodeToSwhidMap as soon as data from sort is ready + */ SortOutputHandler outputHandler = new SortOutputHandler(sort_stdout, nodeToSwhidMap, plNode2SWHID); outputHandler.start(); - // Type map from WebGraph node ID to SWH type. Used at runtime by - // pure Java graph traversals to efficiently check edge - // restrictions. - final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) - / Math.log(2)); + /* + * Type map from WebGraph node ID to SWH type. Used at runtime by pure Java graph traversals to + * efficiently check edge restrictions. + */ + final int log2NbTypes = (int) Math.ceil(Math.log(Node.Type.values().length) / Math.log(2)); final int nbBitsPerNodeType = log2NbTypes; - LongArrayBitVector nodeTypesBitVector = - LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); + LongArrayBitVector nodeTypesBitVector = LongArrayBitVector.ofLength(nbBitsPerNodeType * nbIds); LongBigList nodeTypesMap = nodeTypesBitVector.asLongBigList(nbBitsPerNodeType); plSWHID2Node.start("filling swhid2node map"); for (long iNode = 0; iNode < nbIds && swhidIterator.hasNext(); iNode++) { String swhidStr = swhidIterator.next().toString(); SWHID swhid = new SWHID(swhidStr); byte[] swhidBin = swhid.toBytes(); long mphId = mphMap.getLong(swhidStr); long nodeId = BigArrays.get(bfsMap, mphId); swhidToNodeMap.write(swhidBin, 0, swhidBin.length); swhidToNodeMap.writeLong(nodeId); - sort_stdin.write((swhidStr + "\t" + nodeId + "\n") - .getBytes(StandardCharsets.US_ASCII)); + sort_stdin.write((swhidStr + "\t" + nodeId + "\n").getBytes(StandardCharsets.US_ASCII)); nodeTypesMap.set(nodeId, swhid.getType().ordinal()); plSWHID2Node.lightUpdate(); } plSWHID2Node.done(); sort_stdin.close(); // write type map logger.info("storing type map"); BinIO.storeObject(nodeTypesMap, graphPath + Graph.NODE_TO_TYPE); logger.info("type map stored"); // wait for nodeToSwhidMap filling try { logger.info("waiting for node2swhid map..."); int sortExitCode = sort.waitFor(); if (sortExitCode != 0) { logger.error("sort returned non-zero exit code: " + sortExitCode); System.exit(2); } outputHandler.join(); } catch (InterruptedException e) { logger.error("processing of sort output failed with: " + e); System.exit(2); } } } private static class SortOutputHandler extends Thread { private Scanner input; private OutputStream output; private ProgressLogger pl; SortOutputHandler(InputStream input, OutputStream output, ProgressLogger pl) { this.input = new Scanner(input, StandardCharsets.US_ASCII); this.output = output; this.pl = pl; } public void run() { boolean sortDone = false; logger.info("node2swhid: waiting for sort output..."); while (input.hasNextLine()) { if (!sortDone) { sortDone = true; this.pl.start("filling node2swhid map"); } - String line = input.nextLine(); // format: SWHID NODE_ID - SWHID swhid = new SWHID(line.split("\\t")[0]); // get SWHID + String line = input.nextLine(); // format: SWHID NODE_ID + SWHID swhid = new SWHID(line.split("\\t")[0]); // get SWHID try { output.write((byte[]) swhid.toBytes()); } catch (IOException e) { logger.error("writing to node->SWHID map failed with: " + e); } this.pl.lightUpdate(); } this.pl.done(); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java index c64b283..c835e02 100644 --- a/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java +++ b/java/src/main/java/org/softwareheritage/graph/maps/NodeTypesMap.java @@ -1,54 +1,52 @@ package org.softwareheritage.graph.maps; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.longs.LongBigList; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Node; import java.io.IOException; /** - * Mapping between long node id and SWH node type as described in the data - * model. + * Mapping between long node id and SWH node type as described in the + * data model. *

- * The type mapping is pre-computed and dumped on disk in the {@link NodeMapBuilder} - * class, then it is loaded in-memory here using - * fastutil LongBigList. To be - * space-efficient, the mapping is stored as a bitmap using minimum number of - * bits per {@link Node.Type}. + * The type mapping is pre-computed and dumped on disk in the {@link NodeMapBuilder} class, then it + * is loaded in-memory here using fastutil LongBigList. + * To be space-efficient, the mapping is stored as a bitmap using minimum number of bits per + * {@link Node.Type}. * * @author The Software Heritage developers */ public class NodeTypesMap { /** * Array storing for each node its type */ public LongBigList nodeTypesMap; /** * Constructor. * * @param graphPath path and basename of the compressed graph */ public NodeTypesMap(String graphPath) throws IOException { try { nodeTypesMap = (LongBigList) BinIO.loadObject(graphPath + Graph.NODE_TO_TYPE); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Unknown class object: " + e); } } /** * Returns node type from a node long id. * * @param nodeId node as a long id * @return corresponding {@link Node.Type} value * @see org.softwareheritage.graph.Node.Type */ public Node.Type getType(long nodeId) { long type = nodeTypesMap.getLong(nodeId); return Node.Type.fromInt((int) type); } } diff --git a/java/src/main/java/org/softwareheritage/graph/server/App.java b/java/src/main/java/org/softwareheritage/graph/server/App.java index d061680..be8587a 100644 --- a/java/src/main/java/org/softwareheritage/graph/server/App.java +++ b/java/src/main/java/org/softwareheritage/graph/server/App.java @@ -1,198 +1,196 @@ package org.softwareheritage.graph.server; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.PropertyNamingStrategy; import com.martiansoftware.jsap.*; import io.javalin.Javalin; import io.javalin.http.Context; import io.javalin.plugin.json.JavalinJackson; import org.softwareheritage.graph.Graph; import org.softwareheritage.graph.Stats; import org.softwareheritage.graph.SWHID; import java.io.IOException; import java.util.List; import java.util.Map; /** * Web framework of the swh-graph server REST API. * * @author The Software Heritage developers */ public class App { /** * Main entrypoint. * * @param args command line arguments */ public static void main(String[] args) throws IOException, JSAPException { SimpleJSAP jsap = new SimpleJSAP(App.class.getName(), "Server to load and query a compressed graph representation of Software Heritage archive.", new Parameter[]{ new FlaggedOption("port", JSAP.INTEGER_PARSER, "5009", JSAP.NOT_REQUIRED, 'p', "port", "Binding port of the server."), new UnflaggedOption("graphPath", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the compressed graph."), - new Switch("timings", 't', "timings", "Show timings in API result metadata."), - }); + new Switch("timings", 't', "timings", "Show timings in API result metadata."),}); JSAPResult config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } String graphPath = config.getString("graphPath"); int port = config.getInt("port"); boolean showTimings = config.getBoolean("timings"); startServer(graphPath, port, showTimings); } /** * Loads compressed graph and starts the web server to query it. * * @param graphPath basename of the compressed graph * @param port binding port of the server * @param showTimings true if timings should be in results metadata, false otherwise */ - private static void startServer(String graphPath, int port, boolean showTimings) - throws IOException { + private static void startServer(String graphPath, int port, boolean showTimings) throws IOException { Graph graph = new Graph(graphPath); Stats stats = new Stats(graphPath); // Clean up on exit Runtime.getRuntime().addShutdownHook(new Thread() { public void run() { try { graph.cleanUp(); } catch (IOException e) { System.out.println("Could not clean up graph on exit: " + e); } } }); // Configure Jackson JSON to use snake case naming style ObjectMapper objectMapper = JavalinJackson.getObjectMapper(); objectMapper.setPropertyNamingStrategy(PropertyNamingStrategy.SNAKE_CASE); JavalinJackson.configure(objectMapper); Javalin app = Javalin.create().start(port); app.before("/stats/*", ctx -> { checkQueryStrings(ctx, ""); }); app.before("/leaves/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/neighbors/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/visit/*", ctx -> { checkQueryStrings(ctx, "direction|edges"); }); app.before("/walk/*", ctx -> { checkQueryStrings(ctx, "direction|edges|traversal"); }); app.get("/stats/", ctx -> { ctx.json(stats); }); // Graph traversal endpoints // By default the traversal is a forward DFS using all edges app.get("/leaves/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.leaves(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/neighbors/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.neighbors(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/nodes/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitNodes(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/visit/paths/:src", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.visitPaths(new Endpoint.Input(src)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.get("/walk/:src/:dst", ctx -> { SWHID src = new SWHID(ctx.pathParam("src")); String dstFmt = ctx.pathParam("dst"); String direction = ctx.queryParam("direction", "forward"); String edgesFmt = ctx.queryParam("edges", "*"); String algorithm = ctx.queryParam("traversal", "dfs"); Endpoint endpoint = new Endpoint(graph, direction, edgesFmt); Endpoint.Output output = endpoint.walk(new Endpoint.Input(src, dstFmt, algorithm)); ctx.json(formatEndpointOutput(output, showTimings)); }); app.exception(IllegalArgumentException.class, (e, ctx) -> { ctx.status(400); ctx.result(e.getMessage()); }); } /** * Checks query strings names provided to the REST API. * * @param ctx Javalin HTTP request context * @param allowedFmt a regular expression describing allowed query strings names * @throws IllegalArgumentException unknown query string provided */ private static void checkQueryStrings(Context ctx, String allowedFmt) { Map> queryParamMap = ctx.queryParamMap(); for (String key : queryParamMap.keySet()) { if (!key.matches(allowedFmt)) { throw new IllegalArgumentException("Unknown query string: " + key); } } } /** * Formats endpoint result into final JSON for the REST API. *

* Removes unwanted information if necessary, such as timings (to prevent use of side channels * attacks). * * @param output endpoint operation output which needs formatting * @param showTimings true if timings should be in results metadata, false otherwise * @return final Object with desired JSON format */ private static Object formatEndpointOutput(Endpoint.Output output, boolean showTimings) { if (showTimings) { return output; } else { Map metaNoTimings = Map.of("nb_edges_accessed", output.meta.nbEdgesAccessed); Map outputNoTimings = Map.of("result", output.result, "meta", metaNoTimings); return outputNoTimings; } } } diff --git a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java index 10b7860..8ddb280 100644 --- a/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java +++ b/java/src/main/java/org/softwareheritage/graph/server/Endpoint.java @@ -1,308 +1,309 @@ package org.softwareheritage.graph.server; import org.softwareheritage.graph.*; import org.softwareheritage.graph.benchmark.utils.Timing; import java.util.ArrayList; /** * REST API endpoints wrapper functions. *

* Graph operations are segmented between high-level class (this one) and the low-level class * ({@link Traversal}). The {@link Endpoint} class creates wrappers for each endpoints by performing * all the input/output node ids conversions and logging timings. * * @author The Software Heritage developers * @see Traversal */ public class Endpoint { /** Graph where traversal endpoint is performed */ Graph graph; /** Internal traversal API */ Traversal traversal; /** * Constructor. * * @param graph the graph used for traversal endpoint * @param direction a string (either "forward" or "backward") specifying edge orientation - * @param edgesFmt a formatted string describing allowed edges + * @param edgesFmt a formatted string describing allowed + * edges */ public Endpoint(Graph graph, String direction, String edgesFmt) { this.graph = graph; this.traversal = new Traversal(graph, direction, edgesFmt); } /** * Converts a list of (internal) long node ids to a list of corresponding (external) SWHIDs. * * @param nodeIds the list of long node ids * @return a list of corresponding SWHIDs */ private ArrayList convertNodesToSWHIDs(ArrayList nodeIds) { ArrayList swhids = new ArrayList<>(); for (long nodeId : nodeIds) { swhids.add(graph.getSWHID(nodeId)); } return swhids; } /** * Converts a list of (internal) long node ids to the corresponding {@link SwhPath}. * * @param nodeIds the list of long node ids * @return the corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private SwhPath convertNodesToSwhPath(ArrayList nodeIds) { SwhPath path = new SwhPath(); for (long nodeId : nodeIds) { path.add(graph.getSWHID(nodeId)); } return path; } /** * Converts a list of paths made of (internal) long node ids to one made of {@link SwhPath}-s. * * @param pathsNodeId the list of paths with long node ids * @return a list of corresponding {@link SwhPath} * @see org.softwareheritage.graph.SwhPath */ private ArrayList convertPathsToSWHIDs(ArrayList> pathsNodeId) { ArrayList paths = new ArrayList<>(); for (ArrayList path : pathsNodeId) { paths.add(convertNodesToSwhPath(path)); } return paths; } /** * Leaves endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#leaves(long) */ public Output leaves(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.leaves(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Neighbors endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#neighbors(long) */ public Output neighbors(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.neighbors(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Walk endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting {@link SwhPath} from endpoint call and operation metadata * @see SWHID * @see org.softwareheritage.graph.SwhPath * @see Traversal#walk */ public Output walk(Input input) { Output output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); ArrayList nodeIds = new ArrayList(); // Destination is either a SWHID or a node type try { SWHID dstSWHID = new SWHID(input.dstFmt); long dstNodeId = graph.getNodeId(dstSWHID); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstNodeId, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored1) { try { Node.Type dstType = Node.Type.fromStr(input.dstFmt); startTime = Timing.start(); nodeIds = traversal.walk(srcNodeId, dstType, input.algorithm); output.meta.timings.traversal = Timing.stop(startTime); } catch (IllegalArgumentException ignored2) { } } output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSwhPath(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * VisitNodes endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SWHID} from endpoint call and operation metadata * @see SWHID * @see Traversal#visitNodes(long) */ public Output visitNodes(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList nodeIds = traversal.visitNodes(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertNodesToSWHIDs(nodeIds); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * VisitPaths endpoint wrapper. * * @param input input parameters for the underlying endpoint call * @return the resulting list of {@link SwhPath} from endpoint call and operation metadata * @see SWHID * @see org.softwareheritage.graph.SwhPath * @see Traversal#visitPaths(long) */ public Output visitPaths(Input input) { Output> output = new Output<>(); long startTime; startTime = Timing.start(); long srcNodeId = graph.getNodeId(input.src); output.meta.timings.swhid2node = Timing.stop(startTime); startTime = Timing.start(); ArrayList> paths = traversal.visitPaths(srcNodeId); output.meta.timings.traversal = Timing.stop(startTime); output.meta.nbEdgesAccessed = traversal.getNbEdgesAccessed(); startTime = Timing.start(); output.result = convertPathsToSWHIDs(paths); output.meta.timings.node2swhid = Timing.stop(startTime); return output; } /** * Wrapper class to unify traversal methods input signatures. */ public static class Input { /** Source node of endpoint call specified as a {@link SWHID} */ public SWHID src; /** - * Destination formatted string as described in the API + * Destination formatted string as described in the + * API */ public String dstFmt; /** Traversal algorithm used in endpoint call (either "dfs" or "bfs") */ public String algorithm; public Input(SWHID src) { this.src = src; } public Input(SWHID src, String dstFmt, String algorithm) { this.src = src; this.dstFmt = dstFmt; this.algorithm = algorithm; } } /** * Wrapper class to return both the endpoint result and metadata (such as timings). */ public static class Output { /** The result content itself */ public T result; /** Various metadata about the result */ public Meta meta; public Output() { this.result = null; this.meta = new Meta(); } /** * Endpoint result metadata. */ public class Meta { /** Operations timings */ public Timings timings; /** Number of edges accessed during traversal */ public long nbEdgesAccessed; public Meta() { this.timings = new Timings(); this.nbEdgesAccessed = 0; } /** * Wrapper class for JSON output format. */ public class Timings { /** Time in seconds to do the traversal */ public double traversal; /** Time in seconds to convert input SWHID to node id */ public double swhid2node; /** Time in seconds to convert output node ids to SWHIDs */ public double node2swhid; } } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/MPHTranslate.java b/java/src/main/java/org/softwareheritage/graph/utils/MPHTranslate.java index 6e1f8f4..ca7012b 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/MPHTranslate.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/MPHTranslate.java @@ -1,58 +1,50 @@ package org.softwareheritage.graph.utils; import com.martiansoftware.jsap.*; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.fastutil.objects.Object2LongFunction; import it.unimi.dsi.io.FastBufferedReader; import it.unimi.dsi.io.LineIterator; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; public class MPHTranslate { private static JSAPResult parse_args(String[] args) { JSAPResult config = null; try { - SimpleJSAP jsap = new SimpleJSAP( - MPHTranslate.class.getName(), - "", - new Parameter[]{ - new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.REQUIRED, - "Filename of the serialized MPH"), - } - ); + SimpleJSAP jsap = new SimpleJSAP(MPHTranslate.class.getName(), "", + new Parameter[]{new UnflaggedOption("function", JSAP.STRING_PARSER, JSAP.REQUIRED, + "Filename of the serialized MPH"),}); config = jsap.parse(args); if (jsap.messagePrinted()) { System.exit(1); } } catch (JSAPException e) { e.printStackTrace(); } return config; } @SuppressWarnings("unchecked") // Suppress warning for Object2LongFunction cast - static Object2LongFunction loadMPH(String mphPath) - throws IOException, ClassNotFoundException { + static Object2LongFunction loadMPH(String mphPath) throws IOException, ClassNotFoundException { return (Object2LongFunction) BinIO.loadObject(mphPath); } - public static void main(String[] args) - throws IOException, ClassNotFoundException { + public static void main(String[] args) throws IOException, ClassNotFoundException { JSAPResult config = parse_args(args); String mphPath = config.getString("function"); Object2LongFunction mphMap = loadMPH(mphPath); - FastBufferedReader buffer = new FastBufferedReader( - new InputStreamReader(System.in, StandardCharsets.US_ASCII)); + FastBufferedReader buffer = new FastBufferedReader(new InputStreamReader(System.in, StandardCharsets.US_ASCII)); LineIterator lineIterator = new LineIterator(buffer); while (lineIterator.hasNext()) { String line = lineIterator.next().toString(); System.out.println(mphMap.getLong(line)); } } } diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java index 5243d12..545dc8f 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadGraph.java @@ -1,32 +1,27 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.ImmutableGraph; import it.unimi.dsi.big.webgraph.NodeIterator; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.IOException; public class ReadGraph { public static void main(String[] args) throws IOException { String graphPath = args[0]; ImmutableGraph graph = ImmutableGraph.load(graphPath); NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes()); NodeIterator it = graph.nodeIterator(); while (it.hasNext()) { long srcNode = it.nextLong(); var s = it.successors(); long dstNode; while ((dstNode = s.nextLong()) >= 0) { - System.out.format( - "%s %s\n", - nodeMap.getSWHID(srcNode), - nodeMap.getSWHID(dstNode) - ); + System.out.format("%s %s\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode)); } } } } - diff --git a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java index 10b4c20..0e0f604 100644 --- a/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java +++ b/java/src/main/java/org/softwareheritage/graph/utils/ReadLabelledGraph.java @@ -1,48 +1,40 @@ package org.softwareheritage.graph.utils; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledImmutableGraph; import it.unimi.dsi.big.webgraph.labelling.ArcLabelledNodeIterator; import it.unimi.dsi.big.webgraph.labelling.BitStreamArcLabelledImmutableGraph; import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.util.PermutedFrontCodedStringList; import org.softwareheritage.graph.maps.NodeIdMap; import java.io.IOException; public class ReadLabelledGraph { public static void main(String[] args) throws IOException, ClassNotFoundException { String graphPath = args[0]; ArcLabelledImmutableGraph graph = BitStreamArcLabelledImmutableGraph.loadOffline(graphPath + "-labelled"); NodeIdMap nodeMap = new NodeIdMap(graphPath, graph.numNodes()); - PermutedFrontCodedStringList labelMap = (PermutedFrontCodedStringList) BinIO.loadObject(graphPath + "-labels.fcl"); + PermutedFrontCodedStringList labelMap = (PermutedFrontCodedStringList) BinIO + .loadObject(graphPath + "-labels.fcl"); ArcLabelledNodeIterator it = graph.nodeIterator(); while (it.hasNext()) { long srcNode = it.nextLong(); ArcLabelledNodeIterator.LabelledArcIterator s = it.successors(); long dstNode; while ((dstNode = s.nextLong()) >= 0) { int[] labels = (int[]) s.label().get(); if (labels.length > 0) { for (int label : labels) { - System.out.format( - "%s %s %s\n", - nodeMap.getSWHID(srcNode), - nodeMap.getSWHID(dstNode), - labelMap.get(label) - ); + System.out.format("%s %s %s\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode), + labelMap.get(label)); } } else { - System.out.format( - "%s %s\n", - nodeMap.getSWHID(srcNode), - nodeMap.getSWHID(dstNode) - ); + System.out.format("%s %s\n", nodeMap.getSWHID(srcNode), nodeMap.getSWHID(dstNode)); } } } } } - diff --git a/java/src/test/java/org/softwareheritage/graph/AllowedEdgesTest.java b/java/src/test/java/org/softwareheritage/graph/AllowedEdgesTest.java index 7edb19d..f91f6ed 100644 --- a/java/src/test/java/org/softwareheritage/graph/AllowedEdgesTest.java +++ b/java/src/test/java/org/softwareheritage/graph/AllowedEdgesTest.java @@ -1,112 +1,113 @@ package org.softwareheritage.graph; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import java.util.ArrayList; - public class AllowedEdgesTest extends GraphTest { static class EdgeType { Node.Type src; Node.Type dst; public EdgeType(Node.Type src, Node.Type dst) { this.src = src; this.dst = dst; } @Override public boolean equals(Object otherObj) { - if (otherObj == this) return true; - if (!(otherObj instanceof EdgeType)) return false; + if (otherObj == this) + return true; + if (!(otherObj instanceof EdgeType)) + return false; EdgeType other = (EdgeType) otherObj; return src == other.src && dst == other.dst; } } void assertEdgeRestriction(AllowedEdges edges, ArrayList expectedAllowed) { Node.Type[] nodeTypes = Node.Type.values(); for (Node.Type src : nodeTypes) { for (Node.Type dst : nodeTypes) { EdgeType edge = new EdgeType(src, dst); boolean isAllowed = edges.isAllowed(src, dst); boolean isExpected = false; for (EdgeType expected : expectedAllowed) { if (expected.equals(edge)) { isExpected = true; break; } } Assertions.assertEquals(isAllowed, isExpected, "Edge type: " + src + " -> " + dst); } } } @Test public void dirToDirDirToCntEdges() { AllowedEdges edges = new AllowedEdges("dir:dir,dir:cnt"); ArrayList expected = new ArrayList<>(); expected.add(new EdgeType(Node.Type.DIR, Node.Type.DIR)); expected.add(new EdgeType(Node.Type.DIR, Node.Type.CNT)); assertEdgeRestriction(edges, expected); } @Test public void relToRevRevToRevRevToDirEdges() { AllowedEdges edges = new AllowedEdges("rel:rev,rev:rev,rev:dir"); ArrayList expected = new ArrayList<>(); expected.add(new EdgeType(Node.Type.REL, Node.Type.REV)); expected.add(new EdgeType(Node.Type.REV, Node.Type.REV)); expected.add(new EdgeType(Node.Type.REV, Node.Type.DIR)); assertEdgeRestriction(edges, expected); } @Test public void revToAllDirToDirEdges() { AllowedEdges edges = new AllowedEdges("rev:*,dir:dir"); ArrayList expected = new ArrayList<>(); for (Node.Type dst : Node.Type.values()) { expected.add(new EdgeType(Node.Type.REV, dst)); } expected.add(new EdgeType(Node.Type.DIR, Node.Type.DIR)); assertEdgeRestriction(edges, expected); } @Test public void allToCntEdges() { AllowedEdges edges = new AllowedEdges("*:cnt"); ArrayList expected = new ArrayList<>(); for (Node.Type src : Node.Type.values()) { expected.add(new EdgeType(src, Node.Type.CNT)); } assertEdgeRestriction(edges, expected); } @Test public void allEdges() { AllowedEdges edges = new AllowedEdges("*:*"); ArrayList expected = new ArrayList<>(); for (Node.Type src : Node.Type.values()) { for (Node.Type dst : Node.Type.values()) { expected.add(new EdgeType(src, dst)); } } assertEdgeRestriction(edges, expected); // Special null value used to quickly bypass edge check when no restriction AllowedEdges edges2 = new AllowedEdges("*"); Assertions.assertNull(edges2.restrictedTo); } @Test public void noEdges() { AllowedEdges edges = new AllowedEdges(""); AllowedEdges edges2 = new AllowedEdges(null); ArrayList expected = new ArrayList<>(); assertEdgeRestriction(edges, expected); assertEdgeRestriction(edges2, expected); } } diff --git a/java/src/test/java/org/softwareheritage/graph/VisitTest.java b/java/src/test/java/org/softwareheritage/graph/VisitTest.java index 9cce68d..de5e8af 100644 --- a/java/src/test/java/org/softwareheritage/graph/VisitTest.java +++ b/java/src/test/java/org/softwareheritage/graph/VisitTest.java @@ -1,540 +1,420 @@ package org.softwareheritage.graph; import java.util.ArrayList; import java.util.Set; import java.util.HashSet; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; // Avoid warnings concerning Endpoint.Output.result manual cast @SuppressWarnings("unchecked") public class VisitTest extends GraphTest { private void assertSameNodesFromPaths(ArrayList paths, ArrayList nodes) { Set expectedNodes = new HashSet(); for (SwhPath path : paths) { expectedNodes.addAll(path.getPath()); } GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void forwardFromRoot() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000007")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000004")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000005")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000007")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000004")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - )); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021", + "swh:1:cnt:0000000000000000000000000000000000000005")); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021", "swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); + "swh:1:cnt:0000000000000000000000000000000000000001")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromMiddle() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - )); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", + "swh:1:cnt:0000000000000000000000000000000000000007")); + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - )); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", + "swh:1:cnt:0000000000000000000000000000000000000004")); + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - )); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", - "swh:1:cnt:0000000000000000000000000000000000000011" - )); + "swh:1:cnt:0000000000000000000000000000000000000005")); + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", + "swh:1:cnt:0000000000000000000000000000000000000011")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromLeaf() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000004" - )); + expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromRoot() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:ori:0000000000000000000000000000000000000021" - )); + expectedPaths.add(new SwhPath("swh:1:ori:0000000000000000000000000000000000000021")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromMiddle() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:dir:0000000000000000000000000000000000000012"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000012", + expectedPaths.add(new SwhPath("swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019" - )); + "swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardFromLeaf() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:cnt:0000000000000000000000000000000000000004"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000004", + expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019" - )); - expectedPaths.add( - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000004", + "swh:1:rel:0000000000000000000000000000000000000019")); + expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019" - )); - expectedPaths.add( - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000004", + "swh:1:rel:0000000000000000000000000000000000000019")); + expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:ori:0000000000000000000000000000000000000021" - )); - expectedPaths.add( - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000004", + "swh:1:ori:0000000000000000000000000000000000000021")); + expectedPaths.add(new SwhPath("swh:1:cnt:0000000000000000000000000000000000000004", "swh:1:dir:0000000000000000000000000000000000000006", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rel:0000000000000000000000000000000000000010", "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:ori:0000000000000000000000000000000000000021" - )); + "swh:1:ori:0000000000000000000000000000000000000021")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToRev() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009" - )); + expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", + "swh:1:rev:0000000000000000000000000000000000000009")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRelToRevRevToRev() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rel:0000000000000000000000000000000000000010"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rel:rev,rev:rev"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:rel:0000000000000000000000000000000000000010", + expectedPaths.add(new SwhPath("swh:1:rel:0000000000000000000000000000000000000010", "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003" - )); + "swh:1:rev:0000000000000000000000000000000000000003")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardRevToAllDirToAll() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000013"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000005")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000005")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000004")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000004")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000007")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000007")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", - "swh:1:cnt:0000000000000000000000000000000000000011" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000011")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:cnt:0000000000000000000000000000000000000001")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000013", "swh:1:dir:0000000000000000000000000000000000000012", "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - )); + "swh:1:cnt:0000000000000000000000000000000000000001")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardSnpToAllRevToAll() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "snp:*,rev:*"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", + expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:dir:0000000000000000000000000000000000000002" - )); - expectedPaths.add( - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", + "swh:1:dir:0000000000000000000000000000000000000002")); + expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008" - )); - expectedPaths.add( - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rel:0000000000000000000000000000000000000010" - )); + "swh:1:dir:0000000000000000000000000000000000000008")); + expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", + "swh:1:rel:0000000000000000000000000000000000000010")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardNoEdges() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "forward", ""); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "forward", ""); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020" - )); + expectedPaths.add(new SwhPath("swh:1:snp:0000000000000000000000000000000000000020")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void backwardRevToRevRevToRel() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList paths = (ArrayList) endpoint1.visitPaths(new Endpoint.Input(swhid)).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev,rev:rel"); ArrayList nodes = (ArrayList) endpoint2.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedPaths = new ArrayList(); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000003", + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", "swh:1:rev:0000000000000000000000000000000000000013", "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019" - )); - expectedPaths.add( - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:rel:0000000000000000000000000000000000000019")); + expectedPaths.add(new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rel:0000000000000000000000000000000000000010" - )); + "swh:1:rel:0000000000000000000000000000000000000010")); GraphTest.assertEqualsAnyOrder(expectedPaths, paths); assertSameNodesFromPaths(expectedPaths, nodes); } @Test public void forwardFromRootNodesOnly() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:ori:0000000000000000000000000000000000000021"); Endpoint endpoint = new Endpoint(graph, "forward", "*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SWHID("swh:1:ori:0000000000000000000000000000000000000021")); expectedNodes.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000002")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000001")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000008")); expectedNodes.add(new SWHID("swh:1:dir:0000000000000000000000000000000000000006")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000004")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000005")); expectedNodes.add(new SWHID("swh:1:cnt:0000000000000000000000000000000000000007")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } @Test public void backwardRevToAllNodesOnly() { Graph graph = getGraph(); SWHID swhid = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint = new Endpoint(graph, "backward", "rev:*"); ArrayList nodes = (ArrayList) endpoint.visitNodes(new Endpoint.Input(swhid)).result; ArrayList expectedNodes = new ArrayList(); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000003")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000009")); expectedNodes.add(new SWHID("swh:1:snp:0000000000000000000000000000000000000020")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000010")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000013")); expectedNodes.add(new SWHID("swh:1:rev:0000000000000000000000000000000000000018")); expectedNodes.add(new SWHID("swh:1:rel:0000000000000000000000000000000000000019")); GraphTest.assertEqualsAnyOrder(expectedNodes, nodes); } } diff --git a/java/src/test/java/org/softwareheritage/graph/WalkTest.java b/java/src/test/java/org/softwareheritage/graph/WalkTest.java index fe463d2..8ddd0f9 100644 --- a/java/src/test/java/org/softwareheritage/graph/WalkTest.java +++ b/java/src/test/java/org/softwareheritage/graph/WalkTest.java @@ -1,233 +1,187 @@ package org.softwareheritage.graph; import java.util.Arrays; import java.util.List; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.softwareheritage.graph.server.Endpoint; public class WalkTest extends GraphTest { @Test public void forwardRootToLeaf() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:snp:0000000000000000000000000000000000000020"); String dstFmt = "swh:1:cnt:0000000000000000000000000000000000000005"; - SwhPath solution1 = - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - ); - SwhPath solution2 = - new SwhPath( - "swh:1:snp:0000000000000000000000000000000000000020", - "swh:1:rel:0000000000000000000000000000000000000010", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - ); + SwhPath solution1 = new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000005"); + SwhPath solution2 = new SwhPath("swh:1:snp:0000000000000000000000000000000000000020", + "swh:1:rel:0000000000000000000000000000000000000010", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000005"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardLeafToLeaf() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:cnt:0000000000000000000000000000000000000007"); String dstFmt = "cnt"; - SwhPath expectedPath = - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000007" - ); + SwhPath expectedPath = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000007"); Endpoint endpoint1 = new Endpoint(graph, "forward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void forwardRevToRev() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000018"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000003"; - SwhPath expectedPath = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rev:0000000000000000000000000000000000000013", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003" - ); + SwhPath expectedPath = new SwhPath("swh:1:rev:0000000000000000000000000000000000000018", + "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000003"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void backwardRevToRev() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000003"); String dstFmt = "swh:1:rev:0000000000000000000000000000000000000018"; - SwhPath expectedPath = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000013", - "swh:1:rev:0000000000000000000000000000000000000018" - ); + SwhPath expectedPath = new SwhPath("swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000013", + "swh:1:rev:0000000000000000000000000000000000000018"); Endpoint endpoint1 = new Endpoint(graph, "backward", "rev:rev"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "rev:rev"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } @Test public void backwardCntToFirstSnp() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:cnt:0000000000000000000000000000000000000001"); String dstFmt = "snp"; - SwhPath solution1 = - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:snp:0000000000000000000000000000000000000020" - ); - SwhPath solution2 = - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rel:0000000000000000000000000000000000000010", - "swh:1:snp:0000000000000000000000000000000000000020" - ); - SwhPath solution3 = - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:snp:0000000000000000000000000000000000000020" - ); - SwhPath solution4 = - new SwhPath( - "swh:1:cnt:0000000000000000000000000000000000000001", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rel:0000000000000000000000000000000000000010", - "swh:1:snp:0000000000000000000000000000000000000020" - ); + SwhPath solution1 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:dir:0000000000000000000000000000000000000002", + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:snp:0000000000000000000000000000000000000020"); + SwhPath solution2 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:dir:0000000000000000000000000000000000000002", + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rel:0000000000000000000000000000000000000010", + "swh:1:snp:0000000000000000000000000000000000000020"); + SwhPath solution3 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:snp:0000000000000000000000000000000000000020"); + SwhPath solution4 = new SwhPath("swh:1:cnt:0000000000000000000000000000000000000001", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rel:0000000000000000000000000000000000000010", + "swh:1:snp:0000000000000000000000000000000000000020"); Endpoint endpoint1 = new Endpoint(graph, "backward", "*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void forwardRevToFirstCnt() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:rev:0000000000000000000000000000000000000009"); String dstFmt = "cnt"; - SwhPath solution1 = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000007" - ); - SwhPath solution2 = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000005" - ); - SwhPath solution3 = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:dir:0000000000000000000000000000000000000006", - "swh:1:cnt:0000000000000000000000000000000000000004" - ); - SwhPath solution4 = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:dir:0000000000000000000000000000000000000008", - "swh:1:cnt:0000000000000000000000000000000000000001" - ); - SwhPath solution5 = - new SwhPath( - "swh:1:rev:0000000000000000000000000000000000000009", - "swh:1:rev:0000000000000000000000000000000000000003", - "swh:1:dir:0000000000000000000000000000000000000002", - "swh:1:cnt:0000000000000000000000000000000000000001" - ); + SwhPath solution1 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:cnt:0000000000000000000000000000000000000007"); + SwhPath solution2 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000005"); + SwhPath solution3 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:dir:0000000000000000000000000000000000000006", + "swh:1:cnt:0000000000000000000000000000000000000004"); + SwhPath solution4 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:dir:0000000000000000000000000000000000000008", + "swh:1:cnt:0000000000000000000000000000000000000001"); + SwhPath solution5 = new SwhPath("swh:1:rev:0000000000000000000000000000000000000009", + "swh:1:rev:0000000000000000000000000000000000000003", + "swh:1:dir:0000000000000000000000000000000000000002", + "swh:1:cnt:0000000000000000000000000000000000000001"); Endpoint endpoint1 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "forward", "rev:*,dir:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; - List possibleSolutions = - Arrays.asList(solution1, solution2, solution3, solution4, solution5); + List possibleSolutions = Arrays.asList(solution1, solution2, solution3, solution4, solution5); Assertions.assertTrue(possibleSolutions.contains(dfsPath)); Assertions.assertTrue(possibleSolutions.contains(bfsPath)); } @Test public void backwardDirToFirstRel() { Graph graph = getGraph(); SWHID src = new SWHID("swh:1:dir:0000000000000000000000000000000000000016"); String dstFmt = "rel"; - SwhPath expectedPath = - new SwhPath( - "swh:1:dir:0000000000000000000000000000000000000016", - "swh:1:dir:0000000000000000000000000000000000000017", - "swh:1:rev:0000000000000000000000000000000000000018", - "swh:1:rel:0000000000000000000000000000000000000019" - ); + SwhPath expectedPath = new SwhPath("swh:1:dir:0000000000000000000000000000000000000016", + "swh:1:dir:0000000000000000000000000000000000000017", + "swh:1:rev:0000000000000000000000000000000000000018", + "swh:1:rel:0000000000000000000000000000000000000019"); Endpoint endpoint1 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath dfsPath = (SwhPath) endpoint1.walk(new Endpoint.Input(src, dstFmt, "dfs")).result; Endpoint endpoint2 = new Endpoint(graph, "backward", "dir:dir,dir:rev,rev:*"); SwhPath bfsPath = (SwhPath) endpoint2.walk(new Endpoint.Input(src, dstFmt, "bfs")).result; Assertions.assertEquals(dfsPath, expectedPath); Assertions.assertEquals(bfsPath, expectedPath); } }