Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123742
D7031.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
6 KB
Subscribers
None
D7031.diff
View Options
diff --git a/swh/perfecthash/__init__.py b/swh/perfecthash/__init__.py
--- a/swh/perfecthash/__init__.py
+++ b/swh/perfecthash/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -33,6 +33,10 @@
def __del__(self):
lib.shard_destroy(self.shard)
+ @staticmethod
+ def key_len():
+ return lib.shard_key_len
+
def create(self, objects_count: int) -> "Shard":
"""Wipe out the content of the Read Shard. It must be followed by
**object_count** calls to the **write** method otherwise the content
@@ -62,7 +66,7 @@
"""Create the perfect hash table the **lookup** method
relies on to find the content of the objects.
- It must be called after **create** an **write** otherwise the
+ It must be called after **create** and **write** otherwise the
content of the Read Shard will be inconsistent.
Returns:
@@ -103,4 +107,6 @@
Returns:
0 on success, -1 on error.
"""
+ if len(key) != Shard.key_len():
+ raise ValueError(f"key length is {len(key)} instead of {Shard.key_len()}")
return lib.shard_object_write(self.shard, key, object, len(object))
diff --git a/swh/perfecthash/build.py b/swh/perfecthash/build.py
--- a/swh/perfecthash/build.py
+++ b/swh/perfecthash/build.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -29,6 +29,7 @@
int shard_lookup_object_size(shard_t *shard, const char *key, uint64_t *object_size);
int shard_lookup_object(shard_t *shard, char *object, uint64_t object_size);
+extern const int shard_key_len;
"""
)
diff --git a/swh/perfecthash/hash.h b/swh/perfecthash/hash.h
--- a/swh/perfecthash/hash.h
+++ b/swh/perfecthash/hash.h
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2021 The Software Heritage developers
+ * Copyright (C) 2021-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
@@ -11,7 +11,8 @@
#define SHARD_OFFSET_MAGIC 32
#define SHARD_OFFSET_HEADER 512
-#define SHARD_KEY_LEN 32
+#define SHARD_KEY_LEN 20
+extern const int shard_key_len;
#define SHARD_MAGIC "SWHShard"
#define SHARD_VERSION 1
diff --git a/swh/perfecthash/hash.c b/swh/perfecthash/hash.c
--- a/swh/perfecthash/hash.c
+++ b/swh/perfecthash/hash.c
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2021 The Software Heritage developers
+ * Copyright (C) 2021-2022 The Software Heritage developers
* See the AUTHORS file at the top-level directory of this distribution
* License: GNU General Public License version 3, or any later version
* See top-level LICENSE file for more information
@@ -18,6 +18,8 @@
#include "swh/perfecthash/hash.h"
+const int shard_key_len = SHARD_KEY_LEN;
+
#ifdef HASH_DEBUG
#define debug(...) printf(__VA_ARGS__)
#else
diff --git a/swh/perfecthash/test_hash.cpp b/swh/perfecthash/test_hash.cpp
--- a/swh/perfecthash/test_hash.cpp
+++ b/swh/perfecthash/test_hash.cpp
@@ -113,6 +113,10 @@
ASSERT_GE(close(open(tmpfile.c_str(), O_CREAT, 0777)), 0);
ASSERT_GE(truncate(tmpfile.c_str(), 10 * 1024 * 1024), 0);
+ std::random_device dev;
+ std::mt19937 prng(dev());
+ std::uniform_int_distribution<int> rand(0, 80 * 1024);
+
//
// Populate a Read Shard with multiple objects (objects_count)
// The object content and their keys are from a random source
@@ -125,8 +129,8 @@
int objects_count = 10;
ASSERT_GE(shard_create(shard, objects_count), 0);
for (int i = 0; i < objects_count; i++) {
- std::string key = gen_random(32);
- std::string object = gen_random(50);
+ std::string key = gen_random(SHARD_KEY_LEN);
+ std::string object = gen_random(rand(prng));
key2object[key] = object;
std::cout << key << std::endl;
ASSERT_GE(shard_object_write(shard, key.c_str(), object.c_str(),
diff --git a/swh/perfecthash/tests/test_hash.py b/swh/perfecthash/tests/test_hash.py
--- a/swh/perfecthash/tests/test_hash.py
+++ b/swh/perfecthash/tests/test_hash.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -18,10 +18,10 @@
os.truncate(f, 10 * 1024 * 1024)
s = Shard(f).create(2)
- keyA = b"A" * 32
+ keyA = b"A" * Shard.key_len()
objectA = b"AAAA"
s.write(keyA, objectA)
- keyB = b"B" * 32
+ keyB = b"B" * Shard.key_len()
objectB = b"BBBB"
s.write(keyB, objectB)
s.save()
@@ -48,7 +48,7 @@
#
def test_build_speed(request, tmpdir, payload):
start = time.time()
- os.system(f"cp {payload} {tmpdir}/shard ; rm {tmpdir}/shard")
+ os.system(f"cp {payload} {tmpdir}/shard")
baseline = time.time() - start
write_duration, build_duration, _ = shard_build(request, tmpdir, payload)
duration = write_duration + build_duration
@@ -108,8 +108,8 @@
size = 0
with open(payload, "rb") as f:
while True:
- key = f.read(32)
- if len(key) < 32:
+ key = f.read(Shard.key_len())
+ if len(key) < Shard.key_len():
break
assert key not in objects
object = f.read(random.randrange(512, object_max_size))
@@ -128,8 +128,8 @@
size = 0
with open(payload, "rb") as f:
while True:
- key = f.read(32)
- if len(key) < 32:
+ key = f.read(Shard.key_len())
+ if len(key) < Shard.key_len():
break
if key not in objects:
break
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 19 2024, 11:13 PM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216061
Attached To
D7031: the key has a fixed len: do not hardcode it
Event Timeline
Log In to Comment