Changeset View
Changeset View
Standalone View
Standalone View
java/src/main/java/org/softwareheritage/graph/maps/NodeIdMap.java
package org.softwareheritage.graph.maps; | package org.softwareheritage.graph.maps; | ||||
import org.softwareheritage.graph.Graph; | import org.softwareheritage.graph.Graph; | ||||
import org.softwareheritage.graph.SwhPID; | import org.softwareheritage.graph.SWHID; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
/** | /** | ||||
* Mapping between internal long node id and external SWH PID. | * Mapping between internal long node id and external SWHID. | ||||
* <p> | * <p> | ||||
* Mappings in both directions are pre-computed and dumped on disk in the | * Mappings in both directions are pre-computed and dumped on disk in the | ||||
* {@link NodeMapBuilder} class, then they are loaded here using mmap(). | * {@link NodeMapBuilder} class, then they are loaded here using mmap(). | ||||
* | * | ||||
* @author The Software Heritage developers | * @author The Software Heritage developers | ||||
* @see NodeMapBuilder | * @see NodeMapBuilder | ||||
*/ | */ | ||||
public class NodeIdMap { | public class NodeIdMap { | ||||
/** Fixed length of full SWH PID */ | /** Fixed length of full SWHID */ | ||||
public static final int SWH_ID_LENGTH = 50; | public static final int SWHID_LENGTH = 50; | ||||
/** Fixed length of long node id */ | /** Fixed length of long node id */ | ||||
public static final int NODE_ID_LENGTH = 20; | public static final int NODE_ID_LENGTH = 20; | ||||
/** Fixed length of binary SWH PID buffer */ | /** Fixed length of binary SWHID buffer */ | ||||
public static final int SWH_ID_BIN_SIZE = 22; | public static final int SWHID_BIN_SIZE = 22; | ||||
/** Fixed length of binary node id buffer */ | /** Fixed length of binary node id buffer */ | ||||
public static final int NODE_ID_BIN_SIZE = 8; | public static final int NODE_ID_BIN_SIZE = 8; | ||||
/** Graph path and basename */ | /** Graph path and basename */ | ||||
String graphPath; | String graphPath; | ||||
/** Number of ids to map */ | /** Number of ids to map */ | ||||
long nbIds; | long nbIds; | ||||
/** mmap()-ed PID_TO_NODE file */ | /** mmap()-ed SWHID_TO_NODE file */ | ||||
MapFile swhToNodeMap; | MapFile swhToNodeMap; | ||||
/** mmap()-ed NODE_TO_PID file */ | /** mmap()-ed NODE_TO_SWHID file */ | ||||
MapFile nodeToSwhMap; | MapFile nodeToSwhMap; | ||||
/** | /** | ||||
* Constructor. | * Constructor. | ||||
* | * | ||||
* @param graphPath full graph path | * @param graphPath full graph path | ||||
* @param nbNodes number of nodes in the graph | * @param nbNodes number of nodes in the graph | ||||
*/ | */ | ||||
public NodeIdMap(String graphPath, long nbNodes) throws IOException { | public NodeIdMap(String graphPath, long nbNodes) throws IOException { | ||||
this.graphPath = graphPath; | this.graphPath = graphPath; | ||||
this.nbIds = nbNodes; | this.nbIds = nbNodes; | ||||
this.swhToNodeMap = new MapFile(graphPath + Graph.PID_TO_NODE, SWH_ID_BIN_SIZE + NODE_ID_BIN_SIZE); | this.swhToNodeMap = new MapFile(graphPath + Graph.SWHID_TO_NODE, SWHID_BIN_SIZE + NODE_ID_BIN_SIZE); | ||||
this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_PID, SWH_ID_BIN_SIZE); | this.nodeToSwhMap = new MapFile(graphPath + Graph.NODE_TO_SWHID, SWHID_BIN_SIZE); | ||||
} | } | ||||
/** | /** | ||||
* Converts SWH PID to corresponding long node id. | * Converts SWHID to corresponding long node id. | ||||
* | * | ||||
* @param swhPID node represented as a {@link SwhPID} | * @param swhid node represented as a {@link SWHID} | ||||
* @return corresponding node as a long id | * @return corresponding node as a long id | ||||
* @see org.softwareheritage.graph.SwhPID | * @see SWHID | ||||
*/ | */ | ||||
public long getNodeId(SwhPID swhPID) { | public long getNodeId(SWHID swhid) { | ||||
// The file is sorted by swhPID, hence we can binary search on swhPID to get corresponding | // The file is sorted by swhid, hence we can binary search on swhid to get corresponding | ||||
// nodeId | // nodeId | ||||
long start = 0; | long start = 0; | ||||
long end = nbIds - 1; | long end = nbIds - 1; | ||||
while (start <= end) { | while (start <= end) { | ||||
long lineNumber = (start + end) / 2L; | long lineNumber = (start + end) / 2L; | ||||
byte[] buffer = swhToNodeMap.readAtLine(lineNumber); | byte[] buffer = swhToNodeMap.readAtLine(lineNumber); | ||||
byte[] pidBuffer = new byte[SWH_ID_BIN_SIZE]; | byte[] swhidBuffer = new byte[SWHID_BIN_SIZE]; | ||||
byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE]; | byte[] nodeBuffer = new byte[NODE_ID_BIN_SIZE]; | ||||
System.arraycopy(buffer, 0, pidBuffer, 0, SWH_ID_BIN_SIZE); | System.arraycopy(buffer, 0, swhidBuffer, 0, SWHID_BIN_SIZE); | ||||
System.arraycopy(buffer, SWH_ID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE); | System.arraycopy(buffer, SWHID_BIN_SIZE, nodeBuffer, 0, NODE_ID_BIN_SIZE); | ||||
String currentSwhPID = SwhPID.fromBytes(pidBuffer).getSwhPID(); | String currentSWHID = SWHID.fromBytes(swhidBuffer).getSWHID(); | ||||
long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong(); | long currentNodeId = java.nio.ByteBuffer.wrap(nodeBuffer).getLong(); | ||||
int cmp = currentSwhPID.compareTo(swhPID.toString()); | int cmp = currentSWHID.compareTo(swhid.toString()); | ||||
if (cmp == 0) { | if (cmp == 0) { | ||||
return currentNodeId; | return currentNodeId; | ||||
} else if (cmp < 0) { | } else if (cmp < 0) { | ||||
start = lineNumber + 1; | start = lineNumber + 1; | ||||
} else { | } else { | ||||
end = lineNumber - 1; | end = lineNumber - 1; | ||||
} | } | ||||
} | } | ||||
throw new IllegalArgumentException("Unknown SWH PID: " + swhPID); | throw new IllegalArgumentException("Unknown SWHID: " + swhid); | ||||
} | } | ||||
/** | /** | ||||
* Converts a node long id to corresponding SWH PID. | * Converts a node long id to corresponding SWHID. | ||||
* | * | ||||
* @param nodeId node as a long id | * @param nodeId node as a long id | ||||
* @return corresponding node as a {@link SwhPID} | * @return corresponding node as a {@link SWHID} | ||||
* @see org.softwareheritage.graph.SwhPID | * @see SWHID | ||||
*/ | */ | ||||
public SwhPID getSwhPID(long nodeId) { | public SWHID getSWHID(long nodeId) { | ||||
// Each line in NODE_TO_PID is formatted as: swhPID | // Each line in NODE_TO_SWHID is formatted as: swhid | ||||
// The file is ordered by nodeId, meaning node0's swhPID is at line 0, hence we can read the | // The file is ordered by nodeId, meaning node0's swhid is at line 0, hence we can read the | ||||
// nodeId-th line to get corresponding swhPID | // nodeId-th line to get corresponding swhid | ||||
if (nodeId < 0 || nodeId >= nbIds) { | if (nodeId < 0 || nodeId >= nbIds) { | ||||
throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); | throw new IllegalArgumentException("Node id " + nodeId + " should be between 0 and " + nbIds); | ||||
} | } | ||||
return SwhPID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); | return SWHID.fromBytes(nodeToSwhMap.readAtLine(nodeId)); | ||||
} | } | ||||
/** | /** | ||||
* Closes the mapping files. | * Closes the mapping files. | ||||
*/ | */ | ||||
public void close() throws IOException { | public void close() throws IOException { | ||||
swhToNodeMap.close(); | swhToNodeMap.close(); | ||||
nodeToSwhMap.close(); | nodeToSwhMap.close(); | ||||
} | } | ||||
} | } |