diff --git a/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java b/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java index d16b5ae..5f82ba6 100644 --- a/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java +++ b/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java @@ -1,718 +1,721 @@ /* * Copyright (c) 2022 The Software Heritage developers * See the AUTHORS file at the top-level directory of this distribution * License: GNU General Public License version 3, or any later version * See top-level LICENSE file for more information */ package org.softwareheritage.graph.compress; import com.github.luben.zstd.ZstdOutputStream; import com.google.common.primitives.Bytes; import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; import org.apache.commons.codec.digest.DigestUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.orc.OrcFile; import org.apache.orc.Reader; import org.apache.orc.RecordReader; import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.util.*; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.ForkJoinTask; /** * A graph dataset in ORC format. * * This format of dataset is a full export of the graph, including all the edge and node properties. * * For convenience purposes, this class also provides a main method to print all the edges of the * graph, so that the output can be piped to * {@link it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph}. * * Reading edges from ORC files using this class is about ~2.5 times slower than reading them * directly from a plaintext format. */ public class ORCGraphDataset implements GraphDataset { final static Logger logger = LoggerFactory.getLogger(ORCGraphDataset.class); final static public int ORC_BATCH_SIZE = 16 * 1024; private File datasetDir; protected ORCGraphDataset() { } public ORCGraphDataset(String datasetPath) { this(new File(datasetPath)); } public ORCGraphDataset(File datasetDir) { if (!datasetDir.exists()) { throw new IllegalArgumentException("Dataset " + datasetDir.getName() + " does not exist"); } this.datasetDir = datasetDir; } /** * Return the given table as a {@link SwhOrcTable}. The return value can be down-casted to the type * of the specific table it represents. */ public SwhOrcTable getTable(String tableName) { File tableDir = new File(datasetDir, tableName); if (!tableDir.exists()) { return null; } switch (tableName) { case "skipped_content": return new SkippedContentOrcTable(tableDir); case "content": return new ContentOrcTable(tableDir); case "directory": return new DirectoryOrcTable(tableDir); case "directory_entry": return new DirectoryEntryOrcTable(tableDir); case "revision": return new RevisionOrcTable(tableDir); case "revision_history": return new RevisionHistoryOrcTable(tableDir); case "release": return new ReleaseOrcTable(tableDir); case "snapshot_branch": return new SnapshotBranchOrcTable(tableDir); case "snapshot": return new SnapshotOrcTable(tableDir); case "origin_visit_status": return new OriginVisitStatusOrcTable(tableDir); case "origin_visit": return new OriginVisitOrcTable(tableDir); case "origin": return new OriginOrcTable(tableDir); default : return null; } } /** Return all the tables in this dataset as a map of {@link SwhOrcTable}. */ public Map allTables() { HashMap tables = new HashMap<>(); File[] tableDirs = datasetDir.listFiles(); if (tableDirs == null) { return tables; } for (File tableDir : tableDirs) { SwhOrcTable table = getTable(tableDir.getName()); if (table != null) { tables.put(tableDir.getName(), table); } } return tables; } public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { Map tables = allTables(); for (SwhOrcTable table : tables.values()) { table.readEdges(nodeCb, edgeCb); } } /** * A class representing an ORC table, stored on disk as a set of ORC files all in the same * directory. */ public static class ORCTable { private final File tableDir; public ORCTable(File tableDir) { if (!tableDir.exists()) { throw new IllegalArgumentException("Table " + tableDir.getName() + " does not exist"); } this.tableDir = tableDir; } public static ORCTable load(File tableDir) { return new ORCTable(tableDir); } /** * Utility function for byte columns. Return as a byte array the value of the given row in the * column vector. */ public static byte[] getBytesRow(BytesColumnVector columnVector, int row) { if (columnVector.isRepeating) { row = 0; } if (columnVector.isNull[row]) { return null; } return Arrays.copyOfRange(columnVector.vector[row], columnVector.start[row], columnVector.start[row] + columnVector.length[row]); } /** * Utility function for long columns. Return as a long the value of the given row in the column * vector. */ public static Long getLongRow(LongColumnVector columnVector, int row) { if (columnVector.isRepeating) { row = 0; } if (columnVector.isNull[row]) { return null; } return columnVector.vector[row]; } interface ReadOrcBatchHandler { void accept(VectorizedRowBatch batch, Map columnMap) throws IOException; } /** * Read the table, calling the given handler for each new batch of rows. Optionally, if columns is * not null, will only scan the columns present in this set instead of the entire table. * * If this method is called from within a ForkJoinPool, the ORC table will be read in parallel using * that thread pool. Otherwise, the ORC files will be read sequentially. */ public void readOrcTable(ReadOrcBatchHandler batchHandler, Set columns) throws IOException { File[] listing = tableDir.listFiles(); if (listing == null) { throw new IOException("No files found in " + tableDir.getName()); } ForkJoinPool forkJoinPool = ForkJoinTask.getPool(); if (forkJoinPool == null) { // Sequential case for (File file : listing) { readOrcFile(file.getPath(), batchHandler, columns); } } else { // Parallel case ArrayList listingArray = new ArrayList<>(Arrays.asList(listing)); listingArray.parallelStream().forEach(file -> { try { readOrcFile(file.getPath(), batchHandler, columns); } catch (IOException e) { throw new RuntimeException(e); } }); } } private void readOrcFile(String path, ReadOrcBatchHandler batchHandler, Set columns) throws IOException { try (Reader reader = OrcFile.createReader(new Path(path), OrcFile.readerOptions(new Configuration()))) { TypeDescription schema = reader.getSchema(); Reader.Options options = reader.options(); if (columns != null) { options.include(createColumnsToRead(schema, columns)); } Map columnMap = getColumnMap(schema); try (RecordReader records = reader.rows(options)) { VectorizedRowBatch batch = reader.getSchema().createRowBatch(ORC_BATCH_SIZE); while (records.nextBatch(batch)) { batchHandler.accept(batch, columnMap); } } } } private static Map getColumnMap(TypeDescription schema) { Map columnMap = new HashMap<>(); List fieldNames = schema.getFieldNames(); for (int i = 0; i < fieldNames.size(); i++) { columnMap.put(fieldNames.get(i), i); } return columnMap; } private static boolean[] createColumnsToRead(TypeDescription schema, Set columns) { boolean[] columnsToRead = new boolean[schema.getMaximumId() + 1]; List fieldNames = schema.getFieldNames(); List columnTypes = schema.getChildren(); for (int i = 0; i < fieldNames.size(); i++) { if (columns.contains(fieldNames.get(i))) { logger.debug("Adding column " + fieldNames.get(i) + " with ID " + i + " to the read list"); TypeDescription type = columnTypes.get(i); for (int id = type.getId(); id <= type.getMaximumId(); id++) { columnsToRead[id] = true; } } } return columnsToRead; } } /** Base class for SWH-specific ORC tables. */ public static class SwhOrcTable { protected ORCTable orcTable; protected static final byte[] cntPrefix = "swh:1:cnt:".getBytes(); protected static final byte[] dirPrefix = "swh:1:dir:".getBytes(); protected static final byte[] revPrefix = "swh:1:rev:".getBytes(); protected static final byte[] relPrefix = "swh:1:rel:".getBytes(); protected static final byte[] snpPrefix = "swh:1:snp:".getBytes(); protected static final byte[] oriPrefix = "swh:1:ori:".getBytes(); protected String getIdColumn() { return "id"; } protected byte[] getSwhidPrefix() { throw new UnsupportedOperationException(); } protected byte[] idToSwhid(byte[] id) { return Bytes.concat(getSwhidPrefix(), id); } protected SwhOrcTable() { } public SwhOrcTable(File tableDir) { orcTable = new ORCTable(tableDir); } public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { // No nodes or edges to read in the table by default. } protected static byte[] urlToOriginId(byte[] url) { return DigestUtils.sha1Hex(url).getBytes(); } public void readIdColumn(NodeCallback cb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())]; for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); cb.onNode(id); } }, Set.of(getIdColumn())); } public void readLongColumn(String longColumn, LongCallback cb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())]; LongColumnVector dateVector = (LongColumnVector) batch.cols[columnMap.get(longColumn)]; for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); Long date = ORCTable.getLongRow(dateVector, row); if (date != null) { cb.onLong(id, date); } } }, Set.of(getIdColumn(), longColumn)); } public void readTimestampColumn(String dateColumn, String dateOffsetColumn, TimestampCallback cb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())]; TimestampColumnVector dateVector = (TimestampColumnVector) batch.cols[columnMap.get(dateColumn)]; LongColumnVector dateOffsetVector = (LongColumnVector) batch.cols[columnMap.get(dateOffsetColumn)]; for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); long date = dateVector.getTimestampAsLong(row); // rounded to seconds Long dateOffset = ORCTable.getLongRow(dateOffsetVector, row); if (dateOffset != null) { cb.onTimestamp(id, date, dateOffset.shortValue()); } } }, Set.of(getIdColumn(), dateColumn, dateOffsetColumn)); } public void readBytes64Column(String longColumn, BytesCallback cb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector idVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())]; BytesColumnVector valueVector = (BytesColumnVector) batch.cols[columnMap.get(longColumn)]; for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); - byte[] value = Base64.getEncoder().encode(ORCTable.getBytesRow(valueVector, row)); - cb.onBytes(id, value); + byte[] value = ORCTable.getBytesRow(valueVector, row); + if (value != null) { + byte[] encodedValue = Base64.getEncoder().encode(value); + cb.onBytes(id, encodedValue); + } } }, Set.of(getIdColumn(), longColumn)); } } public static class SkippedContentOrcTable extends SwhOrcTable { public SkippedContentOrcTable(File tableDir) { super(tableDir); } @Override protected String getIdColumn() { return "sha1_git"; } @Override protected byte[] getSwhidPrefix() { return cntPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { readIdColumn(nodeCb); } } public static class ContentOrcTable extends SwhOrcTable { public ContentOrcTable(File tableDir) { super(tableDir); } @Override protected String getIdColumn() { return "sha1_git"; } @Override protected byte[] getSwhidPrefix() { return cntPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { readIdColumn(nodeCb); } } public static class DirectoryOrcTable extends SwhOrcTable { public DirectoryOrcTable(File tableDir) { super(tableDir); } @Override protected byte[] getSwhidPrefix() { return dirPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { readIdColumn(nodeCb); } } public static class DirectoryEntryOrcTable extends SwhOrcTable { public DirectoryEntryOrcTable(File tableDir) { super(tableDir); } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { byte[] cntType = "file".getBytes(); byte[] dirType = "dir".getBytes(); byte[] revType = "rev".getBytes(); orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector srcVector = (BytesColumnVector) batch.cols[columnMap.get("directory_id")]; BytesColumnVector dstVector = (BytesColumnVector) batch.cols[columnMap.get("target")]; BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("type")]; BytesColumnVector labelVector = (BytesColumnVector) batch.cols[columnMap.get("name")]; LongColumnVector permissionVector = (LongColumnVector) batch.cols[columnMap.get("perms")]; for (int row = 0; row < batch.size; row++) { byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row); byte[] targetPrefix; if (Arrays.equals(targetType, cntType)) { targetPrefix = cntPrefix; } else if (Arrays.equals(targetType, dirType)) { targetPrefix = dirPrefix; } else if (Arrays.equals(targetType, revType)) { targetPrefix = revPrefix; } else { continue; } byte[] src = Bytes.concat(dirPrefix, ORCTable.getBytesRow(srcVector, row)); byte[] dst = Bytes.concat(targetPrefix, ORCTable.getBytesRow(dstVector, row)); byte[] label = Base64.getEncoder().encode(ORCTable.getBytesRow(labelVector, row)); Long permission = ORCTable.getLongRow(permissionVector, row); edgeCb.onEdge(src, dst, label, permission != null ? permission.intValue() : 0); } }, Set.of("directory_id", "target", "type", "name", "perms")); } } public static class RevisionOrcTable extends SwhOrcTable { public RevisionOrcTable(File tableDir) { super(tableDir); } @Override protected byte[] getSwhidPrefix() { return revPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector revisionIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")]; BytesColumnVector directoryIdVector = (BytesColumnVector) batch.cols[columnMap.get("directory")]; for (int row = 0; row < batch.size; row++) { byte[] revisionId = Bytes.concat(revPrefix, ORCTable.getBytesRow(revisionIdVector, row)); byte[] directoryId = Bytes.concat(dirPrefix, ORCTable.getBytesRow(directoryIdVector, row)); nodeCb.onNode(revisionId); edgeCb.onEdge(revisionId, directoryId, null, -1); } }, Set.of("id", "directory")); } } public static class RevisionHistoryOrcTable extends SwhOrcTable { public RevisionHistoryOrcTable(File tableDir) { super(tableDir); } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector revisionIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")]; BytesColumnVector parentIdVector = (BytesColumnVector) batch.cols[columnMap.get("parent_id")]; for (int row = 0; row < batch.size; row++) { byte[] parentId = Bytes.concat(revPrefix, ORCTable.getBytesRow(parentIdVector, row)); byte[] revisionId = Bytes.concat(revPrefix, ORCTable.getBytesRow(revisionIdVector, row)); edgeCb.onEdge(revisionId, parentId, null, -1); } }, Set.of("id", "parent_id")); } } public static class ReleaseOrcTable extends SwhOrcTable { public ReleaseOrcTable(File tableDir) { super(tableDir); } @Override protected byte[] getSwhidPrefix() { return relPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { byte[] cntType = "content".getBytes(); byte[] dirType = "directory".getBytes(); byte[] revType = "revision".getBytes(); byte[] relType = "release".getBytes(); orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector releaseIdVector = (BytesColumnVector) batch.cols[columnMap.get("id")]; BytesColumnVector targetIdVector = (BytesColumnVector) batch.cols[columnMap.get("target")]; BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("target_type")]; for (int row = 0; row < batch.size; row++) { byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row); byte[] targetPrefix; if (Arrays.equals(targetType, cntType)) { targetPrefix = cntPrefix; } else if (Arrays.equals(targetType, dirType)) { targetPrefix = dirPrefix; } else if (Arrays.equals(targetType, revType)) { targetPrefix = revPrefix; } else if (Arrays.equals(targetType, relType)) { targetPrefix = relPrefix; } else { continue; } byte[] releaseId = Bytes.concat(relPrefix, ORCTable.getBytesRow(releaseIdVector, row)); byte[] targetId = Bytes.concat(targetPrefix, ORCTable.getBytesRow(targetIdVector, row)); nodeCb.onNode(releaseId); edgeCb.onEdge(releaseId, targetId, null, -1); } }, Set.of("id", "target", "target_type")); } } public static class SnapshotOrcTable extends SwhOrcTable { public SnapshotOrcTable(File tableDir) { super(tableDir); } @Override protected byte[] getSwhidPrefix() { return snpPrefix; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { readIdColumn(nodeCb); } } public static class SnapshotBranchOrcTable extends SwhOrcTable { public SnapshotBranchOrcTable(File tableDir) { super(tableDir); } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { byte[] cntType = "content".getBytes(); byte[] dirType = "directory".getBytes(); byte[] revType = "revision".getBytes(); byte[] relType = "release".getBytes(); orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector snapshotIdVector = (BytesColumnVector) batch.cols[columnMap.get("snapshot_id")]; BytesColumnVector targetIdVector = (BytesColumnVector) batch.cols[columnMap.get("target")]; BytesColumnVector targetTypeVector = (BytesColumnVector) batch.cols[columnMap.get("target_type")]; BytesColumnVector branchNameVector = (BytesColumnVector) batch.cols[columnMap.get("name")]; for (int row = 0; row < batch.size; row++) { byte[] targetType = ORCTable.getBytesRow(targetTypeVector, row); byte[] targetPrefix; if (Arrays.equals(targetType, cntType)) { targetPrefix = cntPrefix; } else if (Arrays.equals(targetType, dirType)) { targetPrefix = dirPrefix; } else if (Arrays.equals(targetType, revType)) { targetPrefix = revPrefix; } else if (Arrays.equals(targetType, relType)) { targetPrefix = relPrefix; } else { continue; } byte[] snapshotId = Bytes.concat(snpPrefix, ORCTable.getBytesRow(snapshotIdVector, row)); byte[] targetId = Bytes.concat(targetPrefix, ORCTable.getBytesRow(targetIdVector, row)); byte[] branchName = Base64.getEncoder().encode(ORCTable.getBytesRow(branchNameVector, row)); nodeCb.onNode(snapshotId); edgeCb.onEdge(snapshotId, targetId, branchName, -1); } }, Set.of("snapshot_id", "name", "target", "target_type")); } } public static class OriginVisitStatusOrcTable extends SwhOrcTable { public OriginVisitStatusOrcTable(File tableDir) { super(tableDir); } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector originUrlVector = (BytesColumnVector) batch.cols[columnMap.get("origin")]; BytesColumnVector snapshotIdVector = (BytesColumnVector) batch.cols[columnMap.get("snapshot")]; for (int row = 0; row < batch.size; row++) { byte[] originId = urlToOriginId(ORCTable.getBytesRow(originUrlVector, row)); byte[] snapshot_id = ORCTable.getBytesRow(snapshotIdVector, row); if (snapshot_id == null || snapshot_id.length == 0) { continue; } edgeCb.onEdge(Bytes.concat(oriPrefix, originId), Bytes.concat(snpPrefix, snapshot_id), null, -1); } }, Set.of("origin", "snapshot")); } } public static class OriginVisitOrcTable extends SwhOrcTable { public OriginVisitOrcTable(File tableDir) { super(tableDir); } } public static class OriginOrcTable extends SwhOrcTable { public OriginOrcTable(File tableDir) { super(tableDir); } @Override protected byte[] getSwhidPrefix() { return oriPrefix; } @Override protected byte[] idToSwhid(byte[] id) { return Bytes.concat(getSwhidPrefix(), urlToOriginId(id)); } @Override protected String getIdColumn() { return "url"; } @Override public void readEdges(GraphDataset.NodeCallback nodeCb, GraphDataset.EdgeCallback edgeCb) throws IOException { readIdColumn(nodeCb); } public void readURLs(BytesCallback cb) throws IOException { orcTable.readOrcTable((batch, columnMap) -> { BytesColumnVector urlVector = (BytesColumnVector) batch.cols[columnMap.get(getIdColumn())]; for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(urlVector, row)); byte[] url = Base64.getEncoder().encode(ORCTable.getBytesRow(urlVector, row)); cb.onBytes(id, url); } }, Set.of(getIdColumn())); } } /** * Export an ORC graph to the CSV edge dataset format as two different files, * nodes.csv.zst and edges.csv.zst. */ public static void exportToCsvDataset(String orcDataset, String csvDatasetBasename) throws IOException { ORCGraphDataset dataset = new ORCGraphDataset(orcDataset); File nodesFile = new File(csvDatasetBasename + ".nodes.csv.zst"); File edgesFile = new File(csvDatasetBasename + ".edges.csv.zst"); FastBufferedOutputStream nodesOut = new FastBufferedOutputStream( new ZstdOutputStream(new FileOutputStream(nodesFile))); FastBufferedOutputStream edgesOut = new FastBufferedOutputStream( new ZstdOutputStream(new FileOutputStream(edgesFile))); dataset.readEdges((node) -> { nodesOut.write(node); nodesOut.write('\n'); }, (src, dst, label, perms) -> { edgesOut.write(src); edgesOut.write(' '); edgesOut.write(dst); if (label != null) { edgesOut.write(' '); edgesOut.write(label); edgesOut.write(' '); } if (perms != -1) { edgesOut.write(' '); edgesOut.write(Long.toString(perms).getBytes()); } edgesOut.write('\n'); }); } /** * Print all the edges of the graph to stdout. Can be piped to * {@link it.unimi.dsi.big.webgraph.ScatteredArcsASCIIGraph} to import the graph dataset and convert * it to a {@link it.unimi.dsi.big.webgraph.BVGraph}. */ public static void printSimpleEdges(String orcDataset) throws IOException { ORCGraphDataset dataset = new ORCGraphDataset(orcDataset); FastBufferedOutputStream out = new FastBufferedOutputStream(System.out); dataset.readEdges((node) -> { }, (src, dst, label, perms) -> { out.write(src); out.write(' '); out.write(dst); out.write('\n'); }); out.flush(); } public static void main(String[] args) throws IOException { printSimpleEdges(args[0]); } }