Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/webgraph.py
Show First 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | STEP_ARGV: Dict[CompressionStep, List[str]] = { | ||||
], | ], | ||||
CompressionStep.MAPS: [ | CompressionStep.MAPS: [ | ||||
"{java}", | "{java}", | ||||
"org.softwareheritage.graph.compress.NodeMapBuilder", | "org.softwareheritage.graph.compress.NodeMapBuilder", | ||||
"{out_dir}/{graph_name}", | "{out_dir}/{graph_name}", | ||||
"{tmp_dir}", | "{tmp_dir}", | ||||
"< {out_dir}/{graph_name}.nodes.csv.zst", | "< {out_dir}/{graph_name}.nodes.csv.zst", | ||||
], | ], | ||||
CompressionStep.EXTRACT_PERSONS: [ | |||||
"{java}", | |||||
"org.softwareheritage.graph.compress.ExtractPersons", | |||||
"--temp-dir", | |||||
"{tmp_dir}", | |||||
"{in_dir}", | |||||
"{out_dir}/{graph_name}", | |||||
], | |||||
CompressionStep.MPH_PERSONS: [ | CompressionStep.MPH_PERSONS: [ | ||||
"{java}", | "{java}", | ||||
"it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction", | "it.unimi.dsi.sux4j.mph.GOVMinimalPerfectHashFunction", | ||||
"--byte-array", | "--byte-array", | ||||
"--decompressor", | "--decompressor", | ||||
"com.github.luben.zstd.ZstdInputStream", | "com.github.luben.zstd.ZstdInputStream", | ||||
"--temp-dir", | "--temp-dir", | ||||
"{tmp_dir}", | "{tmp_dir}", | ||||
"{out_dir}/{graph_name}.persons.mph", | "{out_dir}/{graph_name}.persons.mph", | ||||
"{out_dir}/{graph_name}.persons.csv.zst", | "{out_dir}/{graph_name}.persons.csv.zst", | ||||
], | ], | ||||
CompressionStep.EXTRACT_PERSONS: [ | |||||
"{java}", | |||||
"org.softwareheritage.graph.compress.ExtractPersons", | |||||
"--temp-dir", | |||||
"{tmp_dir}", | |||||
"{in_dir}", | |||||
"{out_dir}/{graph_name}", | |||||
], | |||||
CompressionStep.NODE_PROPERTIES: [ | CompressionStep.NODE_PROPERTIES: [ | ||||
vlorentz: why this change? previous code was not tested? | |||||
seirlAuthorUnsubmitted Done Inline ActionsIt doesn't change the logic because the order of steps is defined in an enum above this dict. I'm just making it consistent with the order in the enum. seirl: It doesn't change the logic because the order of steps is defined in an enum above this dict. | |||||
"{java}", | "{java}", | ||||
"org.softwareheritage.graph.compress.WriteNodeProperties", | "org.softwareheritage.graph.compress.WriteNodeProperties", | ||||
"{in_dir}", | "{in_dir}", | ||||
"{out_dir}/{graph_name}", | "{out_dir}/{graph_name}", | ||||
], | ], | ||||
CompressionStep.MPH_LABELS: [ | CompressionStep.MPH_LABELS: [ | ||||
"{java}", | "{java}", | ||||
"it.unimi.dsi.sux4j.mph.LcpMonotoneMinimalPerfectHashFunction", | "it.unimi.dsi.sux4j.mph.LcpMonotoneMinimalPerfectHashFunction", | ||||
▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines |
why this change? previous code was not tested?