Page MenuHomeSoftware Heritage

D7031.diff
No OneTemporary

D7031.diff

diff --git a/swh/perfecthash/__init__.py b/swh/perfecthash/__init__.py
--- a/swh/perfecthash/__init__.py
+++ b/swh/perfecthash/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -33,6 +33,10 @@
def __del__(self):
lib.shard_destroy(self.shard)
+ @staticmethod
+ def key_len():
+ return lib.shard_key_len
+
def create(self, objects_count: int) -> "Shard":
"""Wipe out the content of the Read Shard. It must be followed by
**object_count** calls to the **write** method otherwise the content
@@ -62,7 +66,7 @@
"""Create the perfect hash table the **lookup** method
relies on to find the content of the objects.
- It must be called after **create** an **write** otherwise the
+ It must be called after **create** and **write** otherwise the
content of the Read Shard will be inconsistent.
Returns:
@@ -103,4 +107,6 @@
Returns:
0 on success, -1 on error.
"""
+ if len(key) != Shard.key_len():
+ raise ValueError(f"key length is {len(key)} instead of {Shard.key_len()}")
return lib.shard_object_write(self.shard, key, object, len(object))
diff --git a/swh/perfecthash/build.py b/swh/perfecthash/build.py
--- a/swh/perfecthash/build.py
+++ b/swh/perfecthash/build.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -29,6 +29,7 @@
int shard_lookup_object_size(shard_t *shard, const char *key, uint64_t *object_size);
int shard_lookup_object(shard_t *shard, char *object, uint64_t object_size);
+extern const int shard_key_len;
"""
)
diff --git a/swh/perfecthash/hash.h b/swh/perfecthash/hash.h
--- a/swh/perfecthash/hash.h
+++ b/swh/perfecthash/hash.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2021 The Software Heritage developers
+ * Copyright (C) 2021-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
@@ -11,7 +11,8 @@
#define SHARD_OFFSET_MAGIC 32
#define SHARD_OFFSET_HEADER 512
-#define SHARD_KEY_LEN 32
+#define SHARD_KEY_LEN 20
+extern const int shard_key_len;
#define SHARD_MAGIC "SWHShard"
#define SHARD_VERSION 1
diff --git a/swh/perfecthash/hash.c b/swh/perfecthash/hash.c
--- a/swh/perfecthash/hash.c
+++ b/swh/perfecthash/hash.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2021 The Software Heritage developers
+ * Copyright (C) 2021-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
@@ -18,6 +18,8 @@
#include "swh/perfecthash/hash.h"
+const int shard_key_len = SHARD_KEY_LEN;
+
#ifdef HASH_DEBUG
#define debug(...) printf(__VA_ARGS__)
#else
diff --git a/swh/perfecthash/test_hash.cpp b/swh/perfecthash/test_hash.cpp
--- a/swh/perfecthash/test_hash.cpp
+++ b/swh/perfecthash/test_hash.cpp
@@ -113,6 +113,10 @@
ASSERT_GE(close(open(tmpfile.c_str(), O_CREAT, 0777)), 0);
ASSERT_GE(truncate(tmpfile.c_str(), 10 * 1024 * 1024), 0);
+ std::random_device dev;
+ std::mt19937 prng(dev());
+ std::uniform_int_distribution<int> rand(0, 80 * 1024);
+
//
// Populate a Read Shard with multiple objects (objects_count)
// The object content and their keys are from a random source
@@ -125,8 +129,8 @@
int objects_count = 10;
ASSERT_GE(shard_create(shard, objects_count), 0);
for (int i = 0; i < objects_count; i++) {
- std::string key = gen_random(32);
- std::string object = gen_random(50);
+ std::string key = gen_random(SHARD_KEY_LEN);
+ std::string object = gen_random(rand(prng));
key2object[key] = object;
std::cout << key << std::endl;
ASSERT_GE(shard_object_write(shard, key.c_str(), object.c_str(),
diff --git a/swh/perfecthash/tests/test_hash.py b/swh/perfecthash/tests/test_hash.py
--- a/swh/perfecthash/tests/test_hash.py
+++ b/swh/perfecthash/tests/test_hash.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -18,10 +18,10 @@
os.truncate(f, 10 * 1024 * 1024)
s = Shard(f).create(2)
- keyA = b"A" * 32
+ keyA = b"A" * Shard.key_len()
objectA = b"AAAA"
s.write(keyA, objectA)
- keyB = b"B" * 32
+ keyB = b"B" * Shard.key_len()
objectB = b"BBBB"
s.write(keyB, objectB)
s.save()
@@ -48,7 +48,7 @@
#
def test_build_speed(request, tmpdir, payload):
start = time.time()
- os.system(f"cp {payload} {tmpdir}/shard ; rm {tmpdir}/shard")
+ os.system(f"cp {payload} {tmpdir}/shard")
baseline = time.time() - start
write_duration, build_duration, _ = shard_build(request, tmpdir, payload)
duration = write_duration + build_duration
@@ -108,8 +108,8 @@
size = 0
with open(payload, "rb") as f:
while True:
- key = f.read(32)
- if len(key) < 32:
+ key = f.read(Shard.key_len())
+ if len(key) < Shard.key_len():
break
assert key not in objects
object = f.read(random.randrange(512, object_max_size))
@@ -128,8 +128,8 @@
size = 0
with open(payload, "rb") as f:
while True:
- key = f.read(32)
- if len(key) < 32:
+ key = f.read(Shard.key_len())
+ if len(key) < Shard.key_len():
break
if key not in objects:
break

File Metadata

Mime Type
text/plain
Expires
Dec 19 2024, 11:13 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216061

Event Timeline