diff --git a/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java b/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java --- a/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java +++ b/java/src/main/java/org/softwareheritage/graph/compress/ORCGraphDataset.java @@ -143,6 +143,9 @@ if (columnVector.isRepeating) { row = 0; } + if (columnVector.isNull[row]) { + return null; + } return Arrays.copyOfRange(columnVector.vector[row], columnVector.start[row], columnVector.start[row] + columnVector.length[row]); } @@ -151,10 +154,13 @@ * Utility function for long columns. Return as a long the value of the given row in the column * vector. */ - public static long getLongRow(LongColumnVector columnVector, int row) { + public static Long getLongRow(LongColumnVector columnVector, int row) { if (columnVector.isRepeating) { row = 0; } + if (columnVector.isNull[row]) { + return null; + } return columnVector.vector[row]; } @@ -293,8 +299,10 @@ for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); - long date = ORCTable.getLongRow(dateVector, row); - cb.onLong(id, date); + Long date = ORCTable.getLongRow(dateVector, row); + if (date != null) { + cb.onLong(id, date); + } } }, Set.of(getIdColumn(), longColumn)); } @@ -309,8 +317,10 @@ for (int row = 0; row < batch.size; row++) { byte[] id = idToSwhid(ORCTable.getBytesRow(idVector, row)); long date = dateVector.getTimestampAsLong(row); // rounded to seconds - short dateOffset = (short) ORCTable.getLongRow(dateOffsetVector, row); - cb.onTimestamp(id, date, dateOffset); + Long dateOffset = ORCTable.getLongRow(dateOffsetVector, row); + if (dateOffset != null) { + cb.onTimestamp(id, date, dateOffset.shortValue()); + } } }, Set.of(getIdColumn(), dateColumn, dateOffsetColumn)); } @@ -421,8 +431,8 @@ byte[] src = Bytes.concat(dirPrefix, ORCTable.getBytesRow(srcVector, row)); byte[] dst = Bytes.concat(targetPrefix, ORCTable.getBytesRow(dstVector, row)); byte[] label = Base64.getEncoder().encode(ORCTable.getBytesRow(labelVector, row)); - long permission = ORCTable.getLongRow(permissionVector, row); - edgeCb.onEdge(src, dst, label, (int) permission); + Long permission = ORCTable.getLongRow(permissionVector, row); + edgeCb.onEdge(src, dst, label, permission != null ? permission.intValue() : 0); } }, Set.of("directory_id", "target", "type", "name", "perms")); } @@ -592,7 +602,7 @@ for (int row = 0; row < batch.size; row++) { byte[] originId = urlToOriginId(ORCTable.getBytesRow(originUrlVector, row)); byte[] snapshot_id = ORCTable.getBytesRow(snapshotIdVector, row); - if (snapshot_id.length == 0) { + if (snapshot_id == null || snapshot_id.length == 0) { continue; } edgeCb.onEdge(Bytes.concat(oriPrefix, originId), Bytes.concat(snpPrefix, snapshot_id), null, -1); diff --git a/java/src/main/java/org/softwareheritage/graph/compress/WriteNodeProperties.java b/java/src/main/java/org/softwareheritage/graph/compress/WriteNodeProperties.java --- a/java/src/main/java/org/softwareheritage/graph/compress/WriteNodeProperties.java +++ b/java/src/main/java/org/softwareheritage/graph/compress/WriteNodeProperties.java @@ -111,6 +111,9 @@ for (String tableName : new String[]{"content", "skipped_content"}) { SwhOrcTable table = dataset.getTable(tableName); + if (table == null) { + continue; + } table.readLongColumn("length", (swhid, value) -> { long id = nodeIdMap.getNodeId(swhid); BigArrays.set(valueArray, id, value); @@ -123,10 +126,12 @@ public void writeContentIsSkipped() throws IOException { LongArrayBitVector isSkippedBitVector = LongArrayBitVector.ofLength(numNodes); SwhOrcTable table = dataset.getTable("skipped_content"); - table.readIdColumn((swhid) -> { - long id = nodeIdMap.getNodeId(swhid); - isSkippedBitVector.set(id); - }); + if (table != null) { + table.readIdColumn((swhid) -> { + long id = nodeIdMap.getNodeId(swhid); + isSkippedBitVector.set(id); + }); + } BinIO.storeObject(isSkippedBitVector, graphBasename + ".property.content.is_skipped.bin"); }