Changeset View
Changeset View
Standalone View
Standalone View
java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java
package org.softwareheritage.graph.maps; | package org.softwareheritage.graph.maps; | ||||||||
import it.unimi.dsi.fastutil.io.BinIO; | |||||||||
import it.unimi.dsi.fastutil.longs.LongBigList; | |||||||||
import it.unimi.dsi.fastutil.objects.Object2LongFunction; | |||||||||
import it.unimi.dsi.util.ByteBufferLongBigList; | |||||||||
import org.softwareheritage.graph.Graph; | import org.softwareheritage.graph.Graph; | ||||||||
import org.softwareheritage.graph.SWHID; | import org.softwareheritage.graph.SWHID; | ||||||||
import java.io.FileInputStream; | |||||||||
import java.io.IOException; | import java.io.IOException; | ||||||||
import java.nio.charset.StandardCharsets; | |||||||||
/** | /** | ||||||||
* Mapping between internal long node id and external SWHID. | * Mapping between internal long node id and external SWHID. | ||||||||
* <p> | * <p> | ||||||||
* Mappings in both directions are pre-computed and dumped on disk in the {@link NodeMapBuilder} | * The SWHID -> node mapping is obtained from hashing the SWHID with a MPH, | ||||||||
* class, then they are loaded here using mmap(). | * then permuting it using an mmap()-ed .order file containing the graph | ||||||||
* permutation. | |||||||||
* | |||||||||
* The node -> SWHID reverse mapping is pre-computed and dumped on disk in the | |||||||||
* {@link NodeMapBuilder} class, then it is loaded here using mmap(). | |||||||||
* | * | ||||||||
* @author The Software Heritage developers | * @author The Software Heritage developers | ||||||||
* @see NodeMapBuilder | * @see NodeMapBuilder | ||||||||
*/ | */ | ||||||||
public class NodeIdMap { | public class NodeIdMap { | ||||||||
/** Fixed length of full SWHID */ | |||||||||
public static final int SWHID_LENGTH = 50; | |||||||||
/** Fixed length of long node id */ | |||||||||
public static final int NODE_ID_LENGTH = 20; | |||||||||
/** Fixed length of binary SWHID buffer */ | /** Fixed length of binary SWHID buffer */ | ||||||||
public static final int SWHID_BIN_SIZE = 22; | public static final int SWHID_BIN_SIZE = 22; | ||||||||
/** Fixed length of binary node id buffer */ | |||||||||
public static final int NODE_ID_BIN_SIZE = 8; | |||||||||
/** Graph path and basename */ | /** Graph path and basename */ | ||||||||
String graphPath; | String graphPath; | ||||||||
/** Number of ids to map */ | /** Number of ids to map */ | ||||||||
long nbIds; | long nbIds; | ||||||||
/** mmap()-ed SWHID_TO_NODE file */ | |||||||||
MapFile swhToNodeMap; | |||||||||
/** mmap()-ed NODE_TO_SWHID file */ | /** mmap()-ed NODE_TO_SWHID file */ | ||||||||
MapFile nodeToSwhMap; | MapFile nodeToSwhMap; | ||||||||
/** Minimal perfect hash function SWHID -> initial order */ | |||||||||
vlorentz: to introduce the acronym | |||||||||
Object2LongFunction<byte[]> mph; | |||||||||
/** mmap()-ed long list with the permutation initial order -> graph order */ | |||||||||
LongBigList orderMap; | |||||||||
/** FileInputStream containing the permutation */ | |||||||||
FileInputStream orderInputStream; | |||||||||
/** | /** | ||||||||
* Constructor. | * Constructor. | ||||||||
* | * | ||||||||
* @param graphPath full graph path | * @param graphPath full graph path | ||||||||
* @param nbNodes number of nodes in the graph | * @param nbNodes number of nodes in the graph | ||||||||
*/ | */ | ||||||||
public NodeIdMap(String graphPath, long nbNodes) throws IOException { | public NodeIdMap(String graphPath, long nbNodes) throws IOException { | ||||||||
this.graphPath = graphPath; | this.graphPath = graphPath; | ||||||||
this.nbIds = nbNodes; | this.nbIds = nbNodes; | ||||||||
this.swhToNodeMap = new MapFile(graphPath + Graph.SWHID_TO_NODE, SWHID_BIN_SIZE + NODE_ID_BIN_SIZE); | // node -> SWHID | ||||||||
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_SWHID, SWHID_BIN_SIZE); | this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_SWHID, SWHID_BIN_SIZE); | ||||||||
// SWHID -> node | |||||||||
this.mph = loadMph(graphPath + ".mph"); | |||||||||
this.orderInputStream = new FileInputStream(graphPath + ".order"); | |||||||||
this.orderMap = ByteBufferLongBigList.map(orderInputStream.getChannel()); | |||||||||
} | |||||||||
@SuppressWarnings("unchecked") | |||||||||
private Object2LongFunction<byte[]> loadMph(String path) throws IOException { | |||||||||
try { | |||||||||
return (Object2LongFunction<byte[]>) BinIO.loadObject(path); | |||||||||
} catch (ClassNotFoundException e) { | |||||||||
throw new IOException(e.getMessage()); | |||||||||
} | |||||||||
} | } | ||||||||
/** | /** | ||||||||
* Converts SWHID to corresponding long node id. | * Converts SWHID to corresponding long node id. | ||||||||
* | * | ||||||||
* @param swhid node represented as a {@link SWHID} | * @param swhid node represented as a {@link SWHID} | ||||||||
* @param checkExists if true, error if the SWHID is not present in the graph | |||||||||
vlorentzUnsubmitted Not Done Inline Actionsif false, then invalid/inconsistent results will be returned without any warning vlorentz: `if false, then invalid/inconsistent results will be returned without any warning` | |||||||||
* @return corresponding node as a long id | * @return corresponding node as a long id | ||||||||
* @see SWHID | * @see SWHID | ||||||||
*/ | */ | ||||||||
public long getNodeId(SWHID swhid) { | public long getNodeId(SWHID swhid, boolean checkExists) { | ||||||||
/* | // 1. Hash the SWHID with the MPH to get its original ID | ||||||||
vlorentzUnsubmitted Not Done Inline Actions
let's not assume the reader of this function also read the previous one vlorentz: let's not assume the reader of this function also read the previous one | |||||||||
* The file is sorted by swhid, hence we can binary search on swhid to get corresponding nodeId | long origNodeId = mph.getLong(swhid.toString().getBytes(StandardCharsets.US_ASCII)); | ||||||||
*/ | |||||||||
long start = 0; | // 2. Use the order permutation to get the position in the permuted graph | ||||||||
long end = nbIds - 1; | long nodeId = this.orderMap.getLong(origNodeId); | ||||||||
while (start <= end) { | // 3. Check that the position effectively corresponds to a real node using the reverse map. | ||||||||
long lineNumber = (start + end) / 2L; | // This is necessary because the MPH makes no guarantees on whether the input SWHID is valid. | ||||||||
byte[] buffer = swhToNodeMap.readAtLine(lineNumber); | if (!checkExists || getSWHID(nodeId).equals(swhid)) { | ||||||||
byte[] swhidBuffer = new byte[SWHID_BIN_SIZE]; | return nodeId; | ||||||||
byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE]; | |||||||||
System.arraycopy(buffer, 0, swhidBuffer, 0, SWHID_BIN_SIZE); | |||||||||
System.arraycopy(buffer, SWHID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE); | |||||||||
String currentSWHID = SWHID.fromBytes(swhidBuffer).getSWHID(); | |||||||||
long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong(); | |||||||||
int cmp = currentSWHID.compareTo(swhid.toString()); | |||||||||
if (cmp == 0) { | |||||||||
return currentNodeId; | |||||||||
} else if (cmp < 0) { | |||||||||
start = lineNumber + 1; | |||||||||
} else { | } else { | ||||||||
end = lineNumber - 1; | throw new IllegalArgumentException("Unknown SWHID: " + swhid); | ||||||||
} | } | ||||||||
} | } | ||||||||
throw new IllegalArgumentException("Unknown SWHID: " + swhid); | public long getNodeId(SWHID swhid) { | ||||||||
return getNodeId(swhid, true); | |||||||||
} | } | ||||||||
/** | /** | ||||||||
* Converts a node long id to corresponding SWHID. | * Converts a node long id to corresponding SWHID. | ||||||||
* | * | ||||||||
* @param nodeId node as a long id | * @param nodeId node as a long id | ||||||||
* @return corresponding node as a {@link SWHID} | * @return corresponding node as a {@link SWHID} | ||||||||
* @see SWHID | * @see SWHID | ||||||||
Show All 9 Lines | public SWHID getSWHID(long nodeId) { | ||||||||
return SWHID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); | return SWHID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); | ||||||||
} | } | ||||||||
/** | /** | ||||||||
* Closes the mapping files. | * Closes the mapping files. | ||||||||
*/ | */ | ||||||||
public void close() throws IOException { | public void close() throws IOException { | ||||||||
swhToNodeMap.close(); | orderInputStream.close(); | ||||||||
nodeToSwhMap.close(); | nodeToSwhMap.close(); | ||||||||
} | } | ||||||||
} | } |
to introduce the acronym