Changeset View
Changeset View
Standalone View
Standalone View
api/server/src/main/java/org/softwareheritage/graph/algo/Visit.java
package org.softwareheritage.graph.algo; | package org.softwareheritage.graph.algo; | ||||
import java.util.ArrayList; | import java.util.ArrayList; | ||||
import java.util.LinkedHashSet; | |||||
import java.util.Stack; | import java.util.Stack; | ||||
import it.unimi.dsi.big.webgraph.LazyLongIterator; | import it.unimi.dsi.big.webgraph.LazyLongIterator; | ||||
import org.softwareheritage.graph.Edges; | |||||
import org.softwareheritage.graph.Graph; | import org.softwareheritage.graph.Graph; | ||||
import org.softwareheritage.graph.Node; | |||||
import org.softwareheritage.graph.SwhId; | import org.softwareheritage.graph.SwhId; | ||||
import org.softwareheritage.graph.SwhPath; | import org.softwareheritage.graph.SwhPath; | ||||
public class Visit { | public class Visit { | ||||
public enum OutputFmt { ONLY_NODES, ONLY_PATHS, NODES_AND_PATHS } | |||||
Graph graph; | Graph graph; | ||||
boolean useTransposed; | boolean useTransposed; | ||||
String allowedEdges; | Edges edges; | ||||
Stack<Long> currentPath; | |||||
// LinkedHashSet is necessary to preserve insertion order | |||||
LinkedHashSet<SwhId> nodes; | |||||
seirl: If you have the time, could you compare that with TreeSet? I'm not 100% convinced that… | |||||
haltodeAuthorUnsubmitted Done Inline ActionsTreeSet add operation is O(log(N)) while LinkedHashSet is O(1). The javadoc [1] seems to go with LinkedHashSet being faster too: "This implementation spares its clients from the unspecified, generally chaotic ordering provided by HashSet, without incurring the increased cost associated with TreeSet". In practice this can of course vary, I found some benchmarks [2] where TreeSet is faster on smaller entries and LinkedHashSet faster on bigger ones. I guess we would need to do benchmarks in our specific case but that would be a bit overkill. [1] https://docs.oracle.com/javase/10/docs/api/java/util/LinkedHashSet.html haltode: TreeSet add operation is O(log(N)) while LinkedHashSet is O(1). The javadoc [1] seems to go… | |||||
ArrayList<SwhPath> paths; | ArrayList<SwhPath> paths; | ||||
Stack<Long> currentPath; | |||||
public Visit(Graph graph, SwhId swhId, String allowedEdges, String algorithm, String direction) { | public Visit(Graph graph, SwhId src, String edgesFmt, String direction, OutputFmt output) { | ||||
if (!algorithm.matches("dfs|bfs")) { | |||||
throw new IllegalArgumentException("Unknown traversal algorithm: " + algorithm); | |||||
} | |||||
if (!direction.matches("forward|backward")) { | if (!direction.matches("forward|backward")) { | ||||
throw new IllegalArgumentException("Unknown traversal direction: " + direction); | throw new IllegalArgumentException("Unknown traversal direction: " + direction); | ||||
} | } | ||||
this.graph = graph; | this.graph = graph; | ||||
this.useTransposed = (direction.equals("backward")); | this.useTransposed = (direction.equals("backward")); | ||||
this.allowedEdges = allowedEdges; | this.edges = new Edges(edgesFmt); | ||||
this.nodes = new LinkedHashSet<SwhId>(); | |||||
this.paths = new ArrayList<SwhPath>(); | this.paths = new ArrayList<SwhPath>(); | ||||
this.currentPath = new Stack<Long>(); | this.currentPath = new Stack<Long>(); | ||||
if (algorithm.equals("dfs")) { | long nodeId = graph.getNodeId(src); | ||||
dfs(graph.getNodeId(swhId)); | if (output == OutputFmt.ONLY_NODES) { | ||||
dfsOutputOnlyNodes(nodeId); | |||||
} else { | |||||
dfs(nodeId); | |||||
} | |||||
} | } | ||||
public LinkedHashSet<SwhId> getNodes() { | |||||
return nodes; | |||||
} | } | ||||
// Allow Jackson JSON to only serialize the 'paths' field | |||||
public ArrayList<SwhPath> getPaths() { | public ArrayList<SwhPath> getPaths() { | ||||
return paths; | return paths; | ||||
} | } | ||||
private void dfs(long currentNodeId) { | private void dfs(long currentNodeId) { | ||||
nodes.add(graph.getSwhId(currentNodeId)); | |||||
currentPath.push(currentNodeId); | currentPath.push(currentNodeId); | ||||
long degree = graph.degree(currentNodeId, useTransposed); | long degree = graph.degree(currentNodeId, useTransposed); | ||||
LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); | LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); | ||||
long visitedNeighbors = 0; | long visitedNeighbors = 0; | ||||
while (degree-- > 0) { | while (degree-- > 0) { | ||||
long neighborNodeId = neighbors.nextLong(); | long neighborNodeId = neighbors.nextLong(); | ||||
if (isEdgeAllowed(currentNodeId, neighborNodeId)) { | Node.Type currentNodeType = graph.getSwhId(currentNodeId).getType(); | ||||
Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); | |||||
if (edges.isAllowed(currentNodeType, neighborNodeType)) { | |||||
dfs(neighborNodeId); | dfs(neighborNodeId); | ||||
visitedNeighbors++; | visitedNeighbors++; | ||||
} | } | ||||
} | } | ||||
if (visitedNeighbors == 0) { | if (visitedNeighbors == 0) { | ||||
SwhPath path = new SwhPath(); | SwhPath path = new SwhPath(); | ||||
for (long nodeId : currentPath) { | for (long nodeId : currentPath) { | ||||
path.add(graph.getSwhId(nodeId)); | path.add(graph.getSwhId(nodeId)); | ||||
} | } | ||||
paths.add(path); | paths.add(path); | ||||
} | } | ||||
currentPath.pop(); | currentPath.pop(); | ||||
} | } | ||||
private boolean isEdgeAllowed(long currentNodeId, long neighborNodeId) { | private void dfsOutputOnlyNodes(long currentNodeId) { | ||||
if (allowedEdges.equals("all")) { | nodes.add(graph.getSwhId(currentNodeId)); | ||||
return true; | long degree = graph.degree(currentNodeId, useTransposed); | ||||
} | LazyLongIterator neighbors = graph.neighbors(currentNodeId, useTransposed); | ||||
String currentType = graph.getSwhId(currentNodeId).getType(); | while (degree-- > 0) { | ||||
String neighborType = graph.getSwhId(neighborNodeId).getType(); | long neighborNodeId = neighbors.nextLong(); | ||||
String edgeType = currentType + ":" + neighborType; | Node.Type currentNodeType = graph.getSwhId(currentNodeId).getType(); | ||||
String edgeTypeRev = neighborType + ":" + currentType; | Node.Type neighborNodeType = graph.getSwhId(neighborNodeId).getType(); | ||||
return allowedEdges.contains(edgeType) || allowedEdges.contains(edgeTypeRev); | if (edges.isAllowed(currentNodeType, neighborNodeType)) { | ||||
dfsOutputOnlyNodes(neighborNodeId); | |||||
} | |||||
} | |||||
} | } | ||||
} | } |
If you have the time, could you compare that with TreeSet? I'm not 100% convinced that LinkedHashSet would be faster.