D7718.diff
No OneTemporary
Actions

Size

1 KB

Subscribers

None

D7718.diff
View Options

	diff --git a/swh/dataset/journalprocessor.py b/swh/dataset/journalprocessor.py
	--- a/swh/dataset/journalprocessor.py
	+++ b/swh/dataset/journalprocessor.py
	@@ -416,13 +416,13 @@

	Node sets are sharded by partition ID (as each object is guaranteed to
	be assigned to a deterministic Kafka partition) then by object ID
	- prefix. The sharding path of each file looks like:
	+ suffix. The sharding path of each file looks like:

	- .node_sets/{origin..content}/part-{0..256}/nodes-{0..f}.sqlite
	+ .node_sets/{origin..content}/part-{0..256}/nodes-{0..f}.db
	"""
	- # obj_id_prefix = "{:x}".format(object_id[0] % 16)
	- obj_id_prefix = "all" # disable sharding for now
	- shard_id = (partition_id, obj_id_prefix)
	+ obj_id_suffix = "{:x}".format(object_id[-1] % 16)
	+ # obj_id_suffix = "all" # uncomment to disable sharding
	+ shard_id = (partition_id, obj_id_suffix)
	if shard_id not in self.node_sets:
	node_set_dir = (
	self.node_sets_path
	@@ -430,7 +430,7 @@
	/ ("part-{}".format(str(partition_id)))
	)
	node_set_dir.mkdir(exist_ok=True, parents=True)
	- node_set_file = node_set_dir / "nodes-{}.db".format(obj_id_prefix)
	+ node_set_file = node_set_dir / "nodes-{}.db".format(obj_id_suffix)
	node_set = LevelDBSet(node_set_file)
	self.exit_stack.enter_context(node_set)
	self.node_sets[shard_id] = node_set