Changeset View
Changeset View
Standalone View
Standalone View
java/src/main/java/org/softwareheritage/graph/SwhGraphProperties.java
Show All 12 Lines | |||||
import it.unimi.dsi.fastutil.bytes.ByteMappedBigList; | import it.unimi.dsi.fastutil.bytes.ByteMappedBigList; | ||||
import it.unimi.dsi.fastutil.ints.IntBigList; | import it.unimi.dsi.fastutil.ints.IntBigList; | ||||
import it.unimi.dsi.fastutil.ints.IntMappedBigList; | import it.unimi.dsi.fastutil.ints.IntMappedBigList; | ||||
import it.unimi.dsi.fastutil.io.BinIO; | import it.unimi.dsi.fastutil.io.BinIO; | ||||
import it.unimi.dsi.fastutil.longs.LongBigList; | import it.unimi.dsi.fastutil.longs.LongBigList; | ||||
import it.unimi.dsi.fastutil.longs.LongMappedBigList; | import it.unimi.dsi.fastutil.longs.LongMappedBigList; | ||||
import it.unimi.dsi.fastutil.shorts.ShortBigList; | import it.unimi.dsi.fastutil.shorts.ShortBigList; | ||||
import it.unimi.dsi.fastutil.shorts.ShortMappedBigList; | import it.unimi.dsi.fastutil.shorts.ShortMappedBigList; | ||||
import it.unimi.dsi.lang.FlyweightPrototype; | |||||
import it.unimi.dsi.sux4j.util.EliasFanoLongBigList; | import it.unimi.dsi.sux4j.util.EliasFanoLongBigList; | ||||
import org.apache.commons.configuration2.ex.ConfigurationException; | import org.apache.commons.configuration2.ex.ConfigurationException; | ||||
import org.softwareheritage.graph.maps.NodeIdMap; | import org.softwareheritage.graph.maps.NodeIdMap; | ||||
import org.softwareheritage.graph.maps.NodeTypesMap; | import org.softwareheritage.graph.maps.NodeTypesMap; | ||||
import java.io.IOException; | import java.io.IOException; | ||||
import java.io.RandomAccessFile; | import java.io.RandomAccessFile; | ||||
import java.util.Base64; | import java.util.Base64; | ||||
Show All 9 Lines | |||||
* using SWHID) and the output (convert back to SWHID for users results). | * using SWHID) and the output (convert back to SWHID for users results). | ||||
* | * | ||||
* Since graph traversal can be restricted depending on the node type (see {@link AllowedEdges}), a | * Since graph traversal can be restricted depending on the node type (see {@link AllowedEdges}), a | ||||
* long id → node type map is stored as well to avoid a full SWHID lookup. | * long id → node type map is stored as well to avoid a full SWHID lookup. | ||||
* | * | ||||
* @see NodeIdMap | * @see NodeIdMap | ||||
* @see NodeTypesMap | * @see NodeTypesMap | ||||
*/ | */ | ||||
public class SwhGraphProperties { | public class SwhGraphProperties implements FlyweightPrototype<SwhGraphProperties> { | ||||
private final String path; | private final String path; | ||||
private final NodeIdMap nodeIdMap; | private final NodeIdMap nodeIdMap; | ||||
private final NodeTypesMap nodeTypesMap; | private final NodeTypesMap nodeTypesMap; | ||||
private LongBigList authorTimestamp; | private LongBigList authorTimestamp; | ||||
private ShortBigList authorTimestampOffset; | private ShortBigList authorTimestampOffset; | ||||
private LongBigList committerTimestamp; | private LongBigList committerTimestamp; | ||||
private ShortBigList committerTimestampOffset; | private ShortBigList committerTimestampOffset; | ||||
private LongBigList contentLength; | private LongBigList contentLength; | ||||
private LongArrayBitVector contentIsSkipped; | private LongArrayBitVector contentIsSkipped; | ||||
private IntBigList authorId; | private IntBigList authorId; | ||||
private IntBigList committerId; | private IntBigList committerId; | ||||
private ByteBigList messageBuffer; | private ByteBigList messageBuffer; | ||||
private LongBigList messageOffsets; | private LongBigList messageOffsets; | ||||
private ByteBigList tagNameBuffer; | private ByteBigList tagNameBuffer; | ||||
private LongBigList tagNameOffsets; | private LongBigList tagNameOffsets; | ||||
private MappedFrontCodedStringBigList edgeLabelNames; | private MappedFrontCodedStringBigList edgeLabelNames; | ||||
protected SwhGraphProperties(String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { | protected SwhGraphProperties(String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap) { | ||||
this.path = path; | this.path = path; | ||||
this.nodeIdMap = nodeIdMap; | this.nodeIdMap = nodeIdMap; | ||||
this.nodeTypesMap = nodeTypesMap; | this.nodeTypesMap = nodeTypesMap; | ||||
} | } | ||||
protected SwhGraphProperties(String path, NodeIdMap nodeIdMap, NodeTypesMap nodeTypesMap, | |||||
LongBigList authorTimestamp, ShortBigList authorTimestampOffset, LongBigList committerTimestamp, | |||||
ShortBigList committerTimestampOffset, LongBigList contentLength, LongArrayBitVector contentIsSkipped, | |||||
IntBigList authorId, IntBigList committerId, ByteBigList messageBuffer, LongBigList messageOffsets, | |||||
ByteBigList tagNameBuffer, LongBigList tagNameOffsets, MappedFrontCodedStringBigList edgeLabelNames) { | |||||
this.path = path; | |||||
this.nodeIdMap = nodeIdMap; | |||||
this.nodeTypesMap = nodeTypesMap; | |||||
this.authorTimestamp = authorTimestamp; | |||||
this.authorTimestampOffset = authorTimestampOffset; | |||||
this.committerTimestamp = committerTimestamp; | |||||
this.committerTimestampOffset = committerTimestampOffset; | |||||
this.contentLength = contentLength; | |||||
JaredR26: FYI, timestamps and content lengths were already threadsafe for reading for me, or if it wasn't… | |||||
this.contentIsSkipped = contentIsSkipped; | |||||
this.authorId = authorId; | |||||
this.committerId = committerId; | |||||
this.messageBuffer = messageBuffer; | |||||
this.messageOffsets = messageOffsets; | |||||
this.tagNameBuffer = tagNameBuffer; | |||||
this.tagNameOffsets = tagNameOffsets; | |||||
this.edgeLabelNames = edgeLabelNames; | |||||
} | |||||
public SwhGraphProperties copy() { | |||||
return new SwhGraphProperties(path, nodeIdMap.copy(), nodeTypesMap.copy(), | |||||
((authorTimestamp instanceof LongMappedBigList) | |||||
? ((LongMappedBigList) authorTimestamp).copy() | |||||
: authorTimestamp), | |||||
((authorTimestampOffset instanceof ShortMappedBigList) | |||||
? ((ShortMappedBigList) authorTimestampOffset).copy() | |||||
: authorTimestampOffset), | |||||
((committerTimestamp instanceof LongMappedBigList) | |||||
? ((LongMappedBigList) committerTimestamp).copy() | |||||
: committerTimestamp), | |||||
((committerTimestampOffset instanceof ShortMappedBigList) | |||||
? ((ShortMappedBigList) committerTimestampOffset).copy() | |||||
: committerTimestampOffset), | |||||
((contentLength instanceof LongMappedBigList) | |||||
? ((LongMappedBigList) contentLength).copy() | |||||
: contentLength), | |||||
(contentIsSkipped != null) ? contentIsSkipped.copy() : null, | |||||
((authorId instanceof IntMappedBigList) ? ((IntMappedBigList) authorId).copy() : authorId), | |||||
((committerId instanceof IntMappedBigList) ? ((IntMappedBigList) committerId).copy() : committerId), | |||||
((messageBuffer instanceof ByteMappedBigList) | |||||
? ((ByteMappedBigList) messageBuffer).copy() | |||||
: messageBuffer), | |||||
((messageOffsets instanceof LongMappedBigList) | |||||
? ((LongMappedBigList) messageOffsets).copy() | |||||
: messageOffsets), | |||||
((tagNameBuffer instanceof ByteMappedBigList) | |||||
? ((ByteMappedBigList) tagNameBuffer).copy() | |||||
: tagNameBuffer), | |||||
((tagNameOffsets instanceof LongMappedBigList) | |||||
? ((LongMappedBigList) tagNameOffsets).copy() | |||||
: tagNameOffsets), | |||||
// TODO: not thread safe!! see https://github.com/vigna/dsiutils/issues/5 | |||||
// Once https://github.com/vigna/dsiutils/pull/6 is merged, add a .copy() here: | |||||
edgeLabelNames); | |||||
} | |||||
public static SwhGraphProperties load(String path) throws IOException { | public static SwhGraphProperties load(String path) throws IOException { | ||||
return new SwhGraphProperties(path, new NodeIdMap(path), new NodeTypesMap(path)); | return new SwhGraphProperties(path, new NodeIdMap(path), new NodeTypesMap(path)); | ||||
} | } | ||||
/** | /** | ||||
* Cleans up resources after use. | * Cleans up resources after use. | ||||
*/ | */ | ||||
public void close() throws IOException { | public void close() throws IOException { | ||||
▲ Show 20 Lines • Show All 252 Lines • Show Last 20 Lines |
FYI, timestamps and content lengths were already threadsafe for reading for me, or if it wasn't, it didn't cause any issues I found. edgeLabelNames and nodeIdMap were the problems, and I'm guessing any of the other string ones.