Page MenuHomeSoftware Heritage

ORCGraphDataset.java
No OneTemporary

ORCGraphDataset.java

package org.softwareheritage.graph.compress;
import com.github.luben.zstd.ZstdOutputStream;
import com.google.common.primitives.Bytes;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinTask;
/**
* A graph dataset in ORC format.
*
* This format of dataset is a full export of the graph, including all the edge and node properties.
*
* For convenience purposes, this class also provides a main method to print all the edges of the
* graph, so that the output can be piped to
* {@link it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph}.
*
* Reading edges from ORC files using this class is about ~2.5 times slower than reading them
* directly from a plaintext format.
*/
public class ORCGraphDataset implements GraphDataset {
final static Logger logger = LoggerFactory.getLogger(ORCGraphDataset.class);
final static public int ORC_BATCH_SIZE = 16 * 1024;
private File datasetDir;
protected ORCGraphDataset() {
}
public ORCGraphDataset(String datasetPath) {
this(new File(datasetPath));
}
public ORCGraphDataset(File datasetDir) {
if (!datasetDir.exists()) {
throw new IllegalArgumentException("Dataset " + datasetDir.getName() + " does not exist");
}
this.datasetDir = datasetDir;
}
/**
* Return the given table as a {@link SwhOrcTable}. The return value can be down-casted to the type
* of the specific table it represents.
*/
public SwhOrcTable getTable(String tableName) {
File tableDir = new File(datasetDir, tableName);
if (!tableDir.exists()) {
return null;
}
switch (tableName) {
case "skipped_content":
return new SkippedContentOrcTable(tableDir);
case "content":
return new ContentOrcTable(tableDir);
case "directory":
return new DirectoryOrcTable(tableDir);
case "directory_entry":
return new DirectoryEntryOrcTable(tableDir);
case "revision":
return new RevisionOrcTable(tableDir);
case "revision_history":
return new RevisionHistoryOrcTable(tableDir);
case "release":
return new ReleaseOrcTable(tableDir);
case "snapshot_branch":
return new SnapshotBranchOrcTable(tableDir);
case "snapshot":
return new SnapshotOrcTable(tableDir);
case "origin_visit_status":
return new OriginVisitStatusOrcTable(tableDir);
case "origin_visit":
return new OriginVisitOrcTable(tableDir);
case "origin":
return new OriginOrcTable(tableDir);
default :
return null;
}
}
/** Return all the tables in this dataset as a map of {@link SwhOrcTable}. */
public Map<String, SwhOrcTable> allTables() {
HashMap<String, SwhOrcTable> tables = new HashMap<>();
File[] tableDirs = datasetDir.listFiles();
if (tableDirs == null) {
return tables;
}
for (File tableDir : tableDirs) {
SwhOrcTable table = getTable(tableDir.getName());
if (table != null) {
tables.put(tableDir.getName(), table);
}
}
return tables;
}
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
Map<String, SwhOrcTable> tables = allTables();
for (SwhOrcTable table : tables.values()) {
table.readEdges(nodeCb, edgeCb);
}
}
/**
* A class representing an ORC table, stored on disk as a set of ORC files all in the same
* directory.
*/
public static class ORCTable {
private final File tableDir;
public ORCTable(File tableDir) {
if (!tableDir.exists()) {
throw new IllegalArgumentException("Table " + tableDir.getName() + " does not exist");
}
this.tableDir = tableDir;
}
public static ORCTable load(File tableDir) {
return new ORCTable(tableDir);
}
/**
* Utility function for byte columns. Return as a byte array the value of the given row in the
* column vector.
*/
public static byte[] getBytesRow(BytesColumnVector columnVector, int row) {
if (columnVector.isRepeating) {
row = 0;
}
return Arrays.copyOfRange(columnVector.vector[row], columnVector.start[row],
columnVector.start[row] + columnVector.length[row]);
}
/**
* Utility function for long columns. Return as a long the value of the given row in the column
* vector.
*/
public static long getLongRow(LongColumnVector columnVector, int row) {
if (columnVector.isRepeating) {
row = 0;
}
return columnVector.vector[row];
}
interface ReadOrcBatchHandler {
void accept(VectorizedRowBatch batch, Map<String, Integer> columnMap) throws IOException;
}
/**
* Read the table, calling the given handler for each new batch of rows. Optionally, if columns is
* not null, will only scan the columns present in this set instead of the entire table.
*
* If this method is called from within a ForkJoinPool, the ORC table will be read in parallel using
* that thread pool. Otherwise, the ORC files will be read sequentially.
*/
public void readOrcTable(ReadOrcBatchHandler batchHandler, Set<String> columns) throws IOException {
File[] listing = tableDir.listFiles();
if (listing == null) {
throw new IOException("No files found in " + tableDir.getName());
}
ForkJoinPool forkJoinPool = ForkJoinTask.getPool();
if (forkJoinPool == null) {
// Sequential case
for (File file : listing) {
readOrcFile(file.getPath(), batchHandler, columns);
}
} else {
// Parallel case
ArrayList<File> listingArray = new ArrayList<>(Arrays.asList(listing));
listingArray.parallelStream().forEach(file -> {
try {
readOrcFile(file.getPath(), batchHandler, columns);
} catch (IOException e) {
throw new RuntimeException(e);
}
});
}
}
private void readOrcFile(String path, ReadOrcBatchHandler batchHandler, Set<String> columns)
throws IOException {
try (Reader reader = OrcFile.createReader(new Path(path), OrcFile.readerOptions(new Configuration()))) {
TypeDescription schema = reader.getSchema();
Reader.Options options = reader.options();
if (columns != null) {
options.include(createColumnsToRead(schema, columns));
}
Map<String, Integer> columnMap = getColumnMap(schema);
try (RecordReader records = reader.rows(options)) {
VectorizedRowBatch batch = reader.getSchema().createRowBatch(ORC_BATCH_SIZE);
while (records.nextBatch(batch)) {
batchHandler.accept(batch, columnMap);
}
}
}
}
private static Map<String, Integer> getColumnMap(TypeDescription schema) {
Map<String, Integer> columnMap = new HashMap<>();
List<String> fieldNames = schema.getFieldNames();
for (int i = 0; i < fieldNames.size(); i++) {
columnMap.put(fieldNames.get(i), i);
}
return columnMap;
}
private static boolean[] createColumnsToRead(TypeDescription schema, Set<String> columns) {
boolean[] columnsToRead = new boolean[schema.getMaximumId() + 1];
List<String> fieldNames = schema.getFieldNames();
List<TypeDescription> columnTypes = schema.getChildren();
for (int i = 0; i < fieldNames.size(); i++) {
if (columns.contains(fieldNames.get(i))) {
logger.debug("Adding column " + fieldNames.get(i) + " with ID " + i + " to the read list");
TypeDescription type = columnTypes.get(i);
for (int id = type.getId(); id <= type.getMaximumId(); id++) {
columnsToRead[id] = true;
}
}
}
return columnsToRead;
}
}
/** Base class for SWH-specific ORC tables. */
public static class SwhOrcTable {
protected ORCTable orcTable;
protected static final byte[] cntPrefix = "swh:1:cnt:".getBytes();
protected static final byte[] dirPrefix = "swh:1:dir:".getBytes();
protected static final byte[] revPrefix = "swh:1:rev:".getBytes();
protected static final byte[] relPrefix = "swh:1:rel:".getBytes();
protected static final byte[] snpPrefix = "swh:1:snp:".getBytes();
protected static final byte[] oriPrefix = "swh:1:ori:".getBytes();
protected String getIdColumn() {
return "id";
}
protected byte[] getSwhidPrefix() {
throw new UnsupportedOperationException();
}
protected byte[] idToSwhid(byte[] id) {
return Bytes.concat(getSwhidPrefix(), id);
}
protected SwhOrcTable() {
}
public SwhOrcTable(File tableDir) {
orcTable = new ORCTable(tableDir);
}
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
// No nodes or edges to read in the table by default.
}
protected static byte[] urlToOriginId(byte[] url) {
return DigestUtils.sha1Hex(url).getBytes();
}
public void readIdColumn(NodeCallback cb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())];
for (int row = 0; row < batch.size; row++) {
byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row));
cb.onNode(id);
}
}, Set.of(getIdColumn()));
}
public void readLongColumn(String longColumn, LongCallback cb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())];
LongColumnVector dateVector = (LongColumnVector) batch.cols[columnMap.get(longColumn)];
for (int row = 0; row < batch.size; row++) {
byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row));
long date = ORCTable.getLongRow(dateVector, row);
cb.onLong(id, date);
}
}, Set.of(getIdColumn(), longColumn));
}
public void readTimestampColumn(String dateColumn, String dateOffsetColumn, TimestampCallback cb)
throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())];
TimestampColumnVector dateVector = (TimestampColumnVector) batch.cols[columnMap.get(dateColumn)];
LongColumnVector dateOffsetVector = (LongColumnVector) batch.cols[columnMap.get(dateOffsetColumn)];
for (int row = 0; row < batch.size; row++) {
byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row));
long date = dateVector.getTimestampAsLong(row); // rounded to seconds
short dateOffset = (short) ORCTable.getLongRow(dateOffsetVector, row);
cb.onTimestamp(id, date, dateOffset);
}
}, Set.of(getIdColumn(), dateColumn, dateOffsetColumn));
}
public void readBytes64Column(String longColumn, BytesCallback cb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())];
BytesColumnVector valueVector = (BytesColumnVector) batch.cols[columnMap.get(longColumn)];
for (int row = 0; row < batch.size; row++) {
byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row));
byte[] value = Base64.getEncoder().encode(ORCTable.getBytesRow(valueVector, row));
cb.onBytes(id, value);
}
}, Set.of(getIdColumn(), longColumn));
}
}
public static class SkippedContentOrcTable extends SwhOrcTable {
public SkippedContentOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected String getIdColumn() {
return "sha1_git";
}
@Override
protected byte[] getSwhidPrefix() {
return cntPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
readIdColumn(nodeCb);
}
}
public static class ContentOrcTable extends SwhOrcTable {
public ContentOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected String getIdColumn() {
return "sha1_git";
}
@Override
protected byte[] getSwhidPrefix() {
return cntPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
readIdColumn(nodeCb);
}
}
public static class DirectoryOrcTable extends SwhOrcTable {
public DirectoryOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected byte[] getSwhidPrefix() {
return dirPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
readIdColumn(nodeCb);
}
}
public static class DirectoryEntryOrcTable extends SwhOrcTable {
public DirectoryEntryOrcTable(File tableDir) {
super(tableDir);
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
byte[] cntType = "file".getBytes();
byte[] dirType = "dir".getBytes();
byte[] revType = "rev".getBytes();
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector srcVector = (BytesColumnVector) batch.cols[columnMap.get("directory_id")];
BytesColumnVector dstVector = (BytesColumnVector) batch.cols[columnMap.get("target")];
BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("type")];
BytesColumnVector labelVector = (BytesColumnVector) batch.cols[columnMap.get("name")];
LongColumnVector permissionVector = (LongColumnVector) batch.cols[columnMap.get("perms")];
for (int row = 0; row < batch.size; row++) {
byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row);
byte[] targetPrefix;
if (Arrays.equals(targetType, cntType)) {
targetPrefix = cntPrefix;
} else if (Arrays.equals(targetType, dirType)) {
targetPrefix = dirPrefix;
} else if (Arrays.equals(targetType, revType)) {
targetPrefix = revPrefix;
} else {
continue;
}
byte[] src = Bytes.concat(dirPrefix, ORCTable.getBytesRow(srcVector, row));
byte[] dst = Bytes.concat(targetPrefix, ORCTable.getBytesRow(dstVector, row));
byte[] label = Base64.getEncoder().encode(ORCTable.getBytesRow(labelVector, row));
long permission = ORCTable.getLongRow(permissionVector, row);
edgeCb.onEdge(src, dst, label, (int) permission);
}
}, Set.of("directory_id", "target", "type", "name", "perms"));
}
}
public static class RevisionOrcTable extends SwhOrcTable {
public RevisionOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected byte[] getSwhidPrefix() {
return revPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector revisionIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")];
BytesColumnVector directoryIdVector = (BytesColumnVector) batch.cols[columnMap.get("directory")];
for (int row = 0; row < batch.size; row++) {
byte[] revisionId = Bytes.concat(revPrefix, ORCTable.getBytesRow(revisionIdVector, row));
byte[] directoryId = Bytes.concat(dirPrefix, ORCTable.getBytesRow(directoryIdVector, row));
nodeCb.onNode(revisionId);
edgeCb.onEdge(revisionId, directoryId, null, -1);
}
}, Set.of("id", "directory"));
}
}
public static class RevisionHistoryOrcTable extends SwhOrcTable {
public RevisionHistoryOrcTable(File tableDir) {
super(tableDir);
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector revisionIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")];
BytesColumnVector parentIdVector = (BytesColumnVector) batch.cols[columnMap.get("parent_id")];
for (int row = 0; row < batch.size; row++) {
byte[] parentId = Bytes.concat(revPrefix, ORCTable.getBytesRow(parentIdVector, row));
byte[] revisionId = Bytes.concat(revPrefix, ORCTable.getBytesRow(revisionIdVector, row));
edgeCb.onEdge(revisionId, parentId, null, -1);
}
}, Set.of("id", "parent_id"));
}
}
public static class ReleaseOrcTable extends SwhOrcTable {
public ReleaseOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected byte[] getSwhidPrefix() {
return relPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
byte[] cntType = "content".getBytes();
byte[] dirType = "directory".getBytes();
byte[] revType = "revision".getBytes();
byte[] relType = "release".getBytes();
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector releaseIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")];
BytesColumnVector targetIdVector = (BytesColumnVector) batch.cols[columnMap.get("target")];
BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("target_type")];
for (int row = 0; row < batch.size; row++) {
byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row);
byte[] targetPrefix;
if (Arrays.equals(targetType, cntType)) {
targetPrefix = cntPrefix;
} else if (Arrays.equals(targetType, dirType)) {
targetPrefix = dirPrefix;
} else if (Arrays.equals(targetType, revType)) {
targetPrefix = revPrefix;
} else if (Arrays.equals(targetType, relType)) {
targetPrefix = relPrefix;
} else {
continue;
}
byte[] releaseId = Bytes.concat(relPrefix, ORCTable.getBytesRow(releaseIdVector, row));
byte[] targetId = Bytes.concat(targetPrefix, ORCTable.getBytesRow(targetIdVector, row));
nodeCb.onNode(releaseId);
edgeCb.onEdge(releaseId, targetId, null, -1);
}
}, Set.of("id", "target", "target_type"));
}
}
public static class SnapshotOrcTable extends SwhOrcTable {
public SnapshotOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected byte[] getSwhidPrefix() {
return snpPrefix;
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
readIdColumn(nodeCb);
}
}
public static class SnapshotBranchOrcTable extends SwhOrcTable {
public SnapshotBranchOrcTable(File tableDir) {
super(tableDir);
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
byte[] cntType = "content".getBytes();
byte[] dirType = "directory".getBytes();
byte[] revType = "revision".getBytes();
byte[] relType = "release".getBytes();
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector snapshotIdVector = (BytesColumnVector) batch.cols[columnMap.get("snapshot_id")];
BytesColumnVector targetIdVector = (BytesColumnVector) batch.cols[columnMap.get("target")];
BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("target_type")];
BytesColumnVector branchNameVector = (BytesColumnVector) batch.cols[columnMap.get("name")];
for (int row = 0; row < batch.size; row++) {
byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row);
byte[] targetPrefix;
if (Arrays.equals(targetType, cntType)) {
targetPrefix = cntPrefix;
} else if (Arrays.equals(targetType, dirType)) {
targetPrefix = dirPrefix;
} else if (Arrays.equals(targetType, revType)) {
targetPrefix = revPrefix;
} else if (Arrays.equals(targetType, relType)) {
targetPrefix = relPrefix;
} else {
continue;
}
byte[] snapshotId = Bytes.concat(snpPrefix, ORCTable.getBytesRow(snapshotIdVector, row));
byte[] targetId = Bytes.concat(targetPrefix, ORCTable.getBytesRow(targetIdVector, row));
byte[] branchName = Base64.getEncoder().encode(ORCTable.getBytesRow(branchNameVector, row));
nodeCb.onNode(snapshotId);
edgeCb.onEdge(snapshotId, targetId, branchName, -1);
}
}, Set.of("snapshot_id", "name", "target", "target_type"));
}
}
public static class OriginVisitStatusOrcTable extends SwhOrcTable {
public OriginVisitStatusOrcTable(File tableDir) {
super(tableDir);
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector originUrlVector = (BytesColumnVector) batch.cols[columnMap.get("origin")];
BytesColumnVector snapshotIdVector = (BytesColumnVector) batch.cols[columnMap.get("snapshot")];
for (int row = 0; row < batch.size; row++) {
byte[] originId = urlToOriginId(ORCTable.getBytesRow(originUrlVector, row));
byte[] snapshot_id = ORCTable.getBytesRow(snapshotIdVector, row);
if (snapshot_id.length == 0) {
continue;
}
edgeCb.onEdge(Bytes.concat(oriPrefix, originId), Bytes.concat(snpPrefix, snapshot_id), null, -1);
}
}, Set.of("origin", "snapshot"));
}
}
public static class OriginVisitOrcTable extends SwhOrcTable {
public OriginVisitOrcTable(File tableDir) {
super(tableDir);
}
}
public static class OriginOrcTable extends SwhOrcTable {
public OriginOrcTable(File tableDir) {
super(tableDir);
}
@Override
protected byte[] getSwhidPrefix() {
return oriPrefix;
}
@Override
protected byte[] idToSwhid(byte[] id) {
return Bytes.concat(getSwhidPrefix(), urlToOriginId(id));
}
@Override
protected String getIdColumn() {
return "url";
}
@Override
public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException {
readIdColumn(nodeCb);
}
public void readURLs(BytesCallback cb) throws IOException {
orcTable.readOrcTable((batch, columnMap) -> {
BytesColumnVector urlVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())];
for (int row = 0; row < batch.size; row++) {
byte[] id = idToSwhid(ORCTable.getBytesRow(urlVector, row));
byte[] url = Base64.getEncoder().encode(ORCTable.getBytesRow(urlVector, row));
cb.onBytes(id, url);
}
}, Set.of(getIdColumn()));
}
}
/**
* Export an ORC graph to the CSV edge dataset format as two different files,
* <code>nodes.csv.zst</code> and <code>edges.csv.zst</code>.
*/
public static void exportToCsvDataset(String orcDataset, String csvDatasetBasename) throws IOException {
ORCGraphDataset dataset = new ORCGraphDataset(orcDataset);
File nodesFile = new File(csvDatasetBasename + ".nodes.csv.zst");
File edgesFile = new File(csvDatasetBasename + ".edges.csv.zst");
FastBufferedOutputStream nodesOut = new FastBufferedOutputStream(
new ZstdOutputStream(new FileOutputStream(nodesFile)));
FastBufferedOutputStream edgesOut = new FastBufferedOutputStream(
new ZstdOutputStream(new FileOutputStream(edgesFile)));
dataset.readEdges((node) -> {
nodesOut.write(node);
nodesOut.write('\n');
}, (src, dst, label, perms) -> {
edgesOut.write(src);
edgesOut.write(' ');
edgesOut.write(dst);
if (label != null) {
edgesOut.write(' ');
edgesOut.write(label);
edgesOut.write(' ');
}
if (perms != -1) {
edgesOut.write(' ');
edgesOut.write(Long.toString(perms).getBytes());
}
edgesOut.write('\n');
});
}
/**
* Print all the edges of the graph to stdout. Can be piped to
* {@link it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph} to import the graph dataset and convert
* it to a {@link it.unimi.dsi.big.webgraph.BVGraph}.
*/
public static void printSimpleEdges(String orcDataset) throws IOException {
ORCGraphDataset dataset = new ORCGraphDataset(orcDataset);
FastBufferedOutputStream out = new FastBufferedOutputStream(System.out);
dataset.readEdges((node) -> {
}, (src, dst, label, perms) -> {
out.write(src);
out.write(' ');
out.write(dst);
out.write('\n');
});
out.flush();
}
public static void main(String[] args) throws IOException {
printSimpleEdges(args[0]);
}
}

File Metadata

Mime Type
text/x-java
Expires
Sat, Jun 21, 5:30 PM (1 w, 6 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3295367

Event Timeline