Page MenuHomeSoftware Heritage

D7718.diff
No OneTemporary

D7718.diff

diff --git a/swh/dataset/journalprocessor.py b/swh/dataset/journalprocessor.py
--- a/swh/dataset/journalprocessor.py
+++ b/swh/dataset/journalprocessor.py
@@ -416,13 +416,13 @@
Node sets are sharded by partition ID (as each object is guaranteed to
be assigned to a deterministic Kafka partition) then by object ID
- prefix. The sharding path of each file looks like:
+ suffix. The sharding path of each file looks like:
- .node_sets/{origin..content}/part-{0..256}/nodes-{0..f}.sqlite
+ .node_sets/{origin..content}/part-{0..256}/nodes-{0..f}.db
"""
- # obj_id_prefix = "{:x}".format(object_id[0] % 16)
- obj_id_prefix = "all" # disable sharding for now
- shard_id = (partition_id, obj_id_prefix)
+ obj_id_suffix = "{:x}".format(object_id[-1] % 16)
+ # obj_id_suffix = "all" # uncomment to disable sharding
+ shard_id = (partition_id, obj_id_suffix)
if shard_id not in self.node_sets:
node_set_dir = (
self.node_sets_path
@@ -430,7 +430,7 @@
/ ("part-{}".format(str(partition_id)))
)
node_set_dir.mkdir(exist_ok=True, parents=True)
- node_set_file = node_set_dir / "nodes-{}.db".format(obj_id_prefix)
+ node_set_file = node_set_dir / "nodes-{}.db".format(obj_id_suffix)
node_set = LevelDBSet(node_set_file)
self.exit_stack.enter_context(node_set)
self.node_sets[shard_id] = node_set

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 17, 8:28 PM (18 h, 5 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223666

Event Timeline