diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -12,3 +12,9 @@
 version.txt
 .mypy_cache/
 .vscode/
+*.o
+swh/perfecthash/test_hash
+swh/perfecthash/hash.gcda
+swh/perfecthash/hash.gcno
+swh/perfecthash/test_hash.lcov
+swh/perfecthash/html
diff --git a/AUTHORS b/AUTHORS
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,3 +1,3 @@
-Copyright (C) 2019 The Software Heritage developers
+Copyright (C) 2021 The Software Heritage developers
 
 See http://www.softwareheritage.org/ for more information.
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,6 @@
 include Makefile
 include requirements*.txt
 include version.txt
-include README.md
+include README.rst
 recursive-include swh py.typed
+include swh/perfecthash/hash.[ch]
diff --git a/conftest.py b/conftest.py
new file mode 100644
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,22 @@
+def pytest_addoption(parser):
+    parser.addoption(
+        "--shard-size", default=10, type=int, help="Size of the Read Shard file in MB",
+    )
+    parser.addoption(
+        "--shard-path", default="/tmp/payload", help="Path of the Read Shard file",
+    )
+    parser.addoption(
+        "--shard-count",
+        default=2,
+        type=int,
+        help="Number of Read Shard files for lookup tests",
+    )
+    parser.addoption(
+        "--object-max-size",
+        default=10 * 1024,
+        type=int,
+        help="Maximum size of anobject in bytes",
+    )
+    parser.addoption(
+        "--lookups", default=10, type=int, help="Number of lookups to perform",
+    )
diff --git a/docs/README.rst b/docs/README.rst
deleted file mode 100644
--- a/docs/README.rst
+++ /dev/null
@@ -1,4 +0,0 @@
-Software Heritage - Python module template
-==========================================
-
-Python module template, used as skeleton to create new modules.
diff --git a/docs/benchmarks.rst b/docs/benchmarks.rst
new file mode 100644
--- /dev/null
+++ b/docs/benchmarks.rst
@@ -0,0 +1,93 @@
+Benchmarks
+==========
+
+The tests (tox) run the benchmark code with very small files. The benchmarks is about running the same
+code with Read Shards that have the expected size in production (100GB) and verify:
+
+* The time required to build a Read Shard is less than 5 times the
+  time to copy a file of the same size.  It guards against regressions
+  that would have a significant performance impact.
+
+* The time required to build and write the perfect hash table is
+  always a fraction of the time required to copy the content of the
+  objects.
+
+* It is possible to perform at least 10,000 object lookups per second.
+
+
+Build performances
+------------------
+
+In both cases the distribution of the object sizes is uniform.
+
+With a small number of large (<100MB) objects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The goal is to verify that manipulating up to the largest supported
+object size actually works and does not trigger problems in the
+extreme case that all objects have the maximum size.
+
+* time tox -e py3 -- --basetemp=/mnt/pytest -s --shard-path /mnt/payload --shard-size $((100 * 1024)) --object-max-size $((100 * 1024 * 1024)) -k test_build_speed
+  number of objects = 2057, total size = 107374116576
+  baseline 165.85, write_duration 327.19, build_duration 0.0062, total_duration 327.19
+
+With a large number of small (<4KB) objects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It approximates the maximum number of objects a Read Shard can
+contain. The goal is to verify that creating a perfect hash with this
+many items does not require more than a fraction of the time required
+to copy the objects.
+
+* time tox -e py3 -- --basetemp=/mnt/pytest -s --shard-path /mnt/payload --shard-size $((100 * 1024)) --object-max-size $((4 * 1024)) -k test_build_speed
+  number of objects = 45973694, total size = 105903024192
+  baseline 165.74, write_duration 495.07, build_duration 24.21, total_duration 519.28
+
+
+Object lookup performances
+--------------------------
+
+The machine has 200GB of RAM and can therefore approximately cache the
+content of two Read Shards which can have a significant impact on
+performances. To minize that effect, four Read Shard are created
+totaling 400GB. All objects are looked up in all shards to verify
+the lookup speed is greater than 10,000 objects per second.
+
+* time tox -e py3 -- --basetemp=/mnt/pytest -s -k test_lookup_speed --lookups $((100 * 1024 * 1024)) --shard-size $((100 * 1024)) --object-max-size $((4 * 1024)) swh/perfecthash/tests/test_hash.py  --shard-path /mnt/payload --shard-count 4
+  number of objects = 45972629, total size = 105903058272
+  key lookups speed = 148528.56/s
+
+
+Setup via Fed4Fire
+------------------
+
+* https://www.fed4fire.eu/
+* /opt/jFed/jFed-Experimenter
+* Create an experiment with one machine
+* Click on Topology Viewer and run the experiment (name test)
+* Once the provisionning is complete (Testing connectivity to resources on Grid5000) click Export As
+* Choose Export Configuration Management Settings
+* Save under test.zip and unzip
+* ssh -i id_rsa -F ssh-config node0
+* alias sudo=sudo-g5k
+
+Setup via Grid5000
+------------------
+
+* https://www.grid5000.fr/
+* oarsub -I -l "{cluster='dahu'}/host=1,walltime=1" -t deploy
+* kadeploy3 -f $OAR_NODE_FILE -e debian11-x64-base -k
+* ssh root@$(tail -1 $OAR_NODE_FILE)
+
+Common setup
+------------
+
+* mkfs.ext4 /dev/sdc
+* mount /dev/sdc /mnt
+* apt-get install -y python3-venv python3-dev libcmph-dev gcc git emacs-nox
+* git clone -b wip-benchmark https://git.easter-eggs.org/biceps/swh-perfecthash/
+* python3 -m venv bench
+* source bench/bin/activate
+* cd swh-perfecthash
+* pip install -r requirements.txt -r requirements-test.txt tox wheel
+* tox -e py3
diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,15 +1,17 @@
-.. _swh-py-template:
+.. _swh-perfecthash:
 
-.. include:: README.rst
+Software Heritage - Object storage perfect hash
+===============================================
+
+Low level management for read-only content-addressable object storage
+indexed with a perfect hash table.
 
-.. toctree::
-   :maxdepth: 2
-   :caption: Contents:
 
+Reference Documentation
+-----------------------
 
-Indices and tables
-------------------
+.. toctree::
+   :maxdepth: 2
 
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+   /apidoc/swh.perfecthash
+   benchmarks
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -11,5 +11,8 @@
 [mypy-pytest.*]
 ignore_missing_imports = True
 
-# [mypy-add_your_lib_here.*]
-# ignore_missing_imports = True
+[mypy-_hash_cffi.*]
+ignore_missing_imports = True
+
+[mypy-cffi.*]
+ignore_missing_imports = True
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@
 # should match https://pypi.python.org/pypi names. For the full spec or
 # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
 
+cffi
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -48,9 +48,10 @@
     packages=find_packages(),  # packages's modules
     install_requires=parse_requirements(None, "swh"),
     tests_require=parse_requirements("test"),
-    setup_requires=["setuptools-scm"],
+    setup_requires=["setuptools-scm", "cffi"],
     use_scm_version=True,
     extras_require={"testing": parse_requirements("test")},
+    cffi_modules=["swh/perfecthash/build.py:ffibuilder"],
     classifiers=[
         "Programming Language :: Python :: 3",
         "Intended Audience :: Developers",
diff --git a/swh/perfecthash/.clang-format b/swh/perfecthash/.clang-format
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/.clang-format
@@ -0,0 +1,2 @@
+---
+IndentWidth: 4
diff --git a/swh/perfecthash/Makefile b/swh/perfecthash/Makefile
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/Makefile
@@ -0,0 +1,19 @@
+CFLAGS=-DHASH_DEBUG -Wall -I../.. -g -std=c++17 -fprofile-arcs -ftest-coverage
+LDFLAGS=-lcmph -lgtest -lpthread -lstdc++ -lstdc++fs -fprofile-arcs -ftest-coverage
+
+test_hash: hash.o test_hash.o
+	$(CXX) -o $@ $^ $(LDFLAGS)
+
+hash.c: hash.h
+test_hash.cpp: hash.h
+
+format:
+	clang-format -i hash.c hash.h test_hash.cpp
+
+check: test_hash
+	valgrind --leak-check=full --tool=memcheck ./test_hash
+	lcov -d . -c -o test_hash.lcov
+	rm -fr html ; genhtml -o html test_hash.lcov
+
+clean:
+	rm -f *.o test_hash
diff --git a/swh/perfecthash/__init__.py b/swh/perfecthash/__init__.py
--- a/swh/perfecthash/__init__.py
+++ b/swh/perfecthash/__init__.py
@@ -0,0 +1,105 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import NewType
+
+from _hash_cffi import lib
+from cffi import FFI
+
+Key = NewType("Key", bytes)
+HashObject = NewType("HashObject", bytes)
+
+
+class Shard:
+    """Low level management for files indexed with a perfect hash table.
+
+    This class allows creating a Read Shard by adding key/object pairs
+    and looking up the content of an object when given the key.
+    """
+
+    def __init__(self, path: str):
+        """Initialize with an existing Read Shard.
+
+        Args:
+            path: path to an existing Read Shard file or device
+
+        """
+        self.ffi = FFI()
+        self.shard = lib.shard_init(path.encode("utf-8"))
+
+    def __del__(self):
+        lib.shard_destroy(self.shard)
+
+    def create(self, objects_count: int) -> "Shard":
+        """Wipe out the content of the Read Shard. It must be followed by
+        **object_count** calls to the **write** method otherwise the content
+        of the Read Shard will be inconsistent. When all objects are inserted,
+        the Read Shard must be made persistent by calling the **save** method.
+
+        Args:
+            objects_count: number of objects in the Read Shard.
+
+        Returns:
+            self.
+
+        """
+        assert lib.shard_create(self.shard, objects_count) != -1
+        return self
+
+    def load(self):
+        """Open the Read Shard file in read-only mode.
+
+        Returns:
+            self.
+        """
+        assert lib.shard_load(self.shard) != -1
+        return self
+
+    def save(self):
+        """Create the perfect hash table the **lookup** method
+        relies on to find the content of the objects.
+
+        It must be called after **create** an **write** otherwise the
+        content of the Read Shard will be inconsistent.
+
+        Returns:
+            0 on success, -1 on error.
+        """
+        return lib.shard_save(self.shard)
+
+    def lookup(self, key: Key) -> HashObject:
+        """Fetch the object matching the key in the Read Shard.
+
+        Fetching an object is O(1): one lookup in the index to obtain
+        the offset of the object in the Read Shard and one read to get
+        the payload.
+
+        Args:
+            key: the key associated with the object to retrieve.
+
+        Returns:
+           the object as bytes.
+        """
+        object_size_pointer = self.ffi.new("size_t*")
+        lib.shard_lookup_object_size(self.shard, key, object_size_pointer)
+        object_size = object_size_pointer[0]
+        object_pointer = self.ffi.new("char[]", object_size)
+        lib.shard_lookup_object(self.shard, object_pointer, object_size)
+        return self.ffi.buffer(object_pointer, object_size)
+
+    def write(self, key: Key, object: HashObject) -> int:
+        """Add the key/object pair to the Read Shard.
+
+        The **create** method must have been called prior to calling
+        the **write** method.
+
+        Args:
+            key: the unique key associated with the object.
+            object: the object
+
+        Returns:
+            0 on success, -1 on error.
+        """
+        return lib.shard_object_write(self.shard, key, object, len(object))
diff --git a/swh/perfecthash/build.py b/swh/perfecthash/build.py
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/build.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from cffi import FFI
+
+ffibuilder = FFI()
+
+# cdef() expects a single string declaring the C types, functions and
+# globals needed to use the shared object. It must be in valid C syntax.
+ffibuilder.cdef(
+    """
+typedef struct shard_t shard_t;
+
+shard_t* shard_init(const char* path);
+int shard_destroy(shard_t* shard);
+
+int shard_create(shard_t* shard, size_t objects_count);
+int shard_object_write(shard_t* shard, const char* key,
+    const char* object, size_t object_size);
+int shard_save(shard_t* shard);
+
+int shard_load(shard_t* shard);
+int shard_lookup_object_size(shard_t *shard, const char *key, size_t *object_size);
+int shard_lookup_object(shard_t *shard, char *object, size_t object_size);
+
+"""
+)
+
+ffibuilder.set_source(
+    "_hash_cffi",
+    """
+    #include "swh/perfecthash/hash.h"
+    """,
+    sources=["swh/perfecthash/hash.c"],
+    include_dirs=["."],
+    libraries=["cmph"],
+)  # library name, for the linker
+
+if __name__ == "__main__":
+    ffibuilder.compile(verbose=True)
diff --git a/swh/perfecthash/hash.h b/swh/perfecthash/hash.h
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/hash.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2021  The Software Heritage developers
+ * See the AUTHORS file at the top-level directory of this distribution
+ * License: GNU General Public License version 3, or any later version
+ * See top-level LICENSE file for more information
+ */
+
+#include <cmph.h>
+#include <cmph_types.h>
+
+/*
+ * The Read Shard has the following structure:
+ *
+ * - bytes [0, objects_position[
+ *      The header shard_header_t
+ * - bytes [objects_position, index_position[
+ *      objects_count times the size of the object (size_t) followed by the
+ * object itself
+ * - bytes [index_position, hash_position[
+ *      An array of size_t object positions in the range [objects_position,
+ * index_position[ The size of the array is provided by cmph_size after building
+ * the hash function
+ * - bytes [hash_position, ...[
+ *      The hash function, as written by cmph_dump
+ */
+
+#define SHARD_OFFSET_HEADER 512
+#define SHARD_KEY_LEN 32
+
+typedef struct {
+    size_t objects_count;
+    size_t objects_position;
+    size_t objects_size;
+    size_t index_position;
+    size_t index_size;
+    size_t hash_position;
+} shard_header_t;
+
+typedef struct {
+    char key[SHARD_KEY_LEN];
+    size_t object_offset;
+} shard_index_t;
+
+typedef struct {
+    char *path;
+    FILE *f;
+    shard_header_t header;
+    cmph_t *hash;
+
+    // The following fields are only used when creating the Read Shard
+    cmph_io_adapter_t *source;
+    cmph_config_t *config;
+    shard_index_t *index;
+    size_t index_offset;
+} shard_t;
+
+shard_t *shard_init(const char *path);
+int shard_destroy(shard_t *shard);
+
+int shard_create(shard_t *shard, size_t objects_count);
+int shard_object_write(shard_t *shard, const char *key, const char *object,
+                       size_t object_size);
+int shard_save(shard_t *shard);
+
+int shard_load(shard_t *shard);
+int shard_lookup_object_size(shard_t *shard, const char *key,
+                             size_t *object_size);
+int shard_lookup_object(shard_t *shard, char *object, size_t object_size);
diff --git a/swh/perfecthash/hash.c b/swh/perfecthash/hash.c
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/hash.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2021  The Software Heritage developers
+ * See the AUTHORS file at the top-level directory of this distribution
+ * License: GNU General Public License version 3, or any later version
+ * See top-level LICENSE file for more information
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <memory.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "swh/perfecthash/hash.h"
+
+#ifdef HASH_DEBUG
+#define debug(...) printf(__VA_ARGS__)
+#else
+#define debug(...)
+#endif
+
+size_t ntohq(size_t v) { return __builtin_bswap64(v); }
+
+size_t htonq(size_t v) { return __builtin_bswap64(v); }
+
+/***********************************************************
+ * wrappers around FILE functions that:
+ * - return -1 on error
+ * - print a meaningful message when an error occurs
+ *
+ */
+
+int shard_open(shard_t *shard, const char *mode) {
+    shard->f = fopen(shard->path, mode);
+    if (shard->f == NULL) {
+        printf("shard_open: open(%s, %s): %s\n", shard->path, mode,
+               strerror(errno));
+        return -1;
+    } else
+        return 0;
+}
+
+int shard_close(shard_t *shard) {
+    if (shard->f == NULL)
+        return 0;
+    int r = fclose(shard->f);
+    if (r < 0)
+        printf("shard_close: fclose(%p): %s\n", shard->f, strerror(errno));
+    return r;
+}
+
+int shard_seek(shard_t *shard, long offset, int whence) {
+    int r = fseek(shard->f, offset, whence);
+    if (r < 0)
+        printf("shard_seek: fseek(%p, %ld, %d): %s\n", shard->f, offset, whence,
+               strerror(errno));
+    return r;
+}
+
+size_t shard_tell(shard_t *shard) {
+    long r = ftell(shard->f);
+    if (r < 0)
+        printf("shard_tell: ftell(%p): %s\n", shard->f, strerror(errno));
+    return r;
+}
+
+int shard_read(shard_t *shard, void *ptr, size_t size) {
+    size_t read;
+    if ((read = fread(ptr, 1, size, shard->f)) != size) {
+        printf("shard_read: read %ld instead of %ld\n", read, size);
+        return -1;
+    }
+    return 0;
+}
+
+int shard_read_size_t(shard_t *shard, size_t *ptr) {
+    size_t n_size;
+    if (shard_read(shard, &n_size, sizeof(size_t)) < 0) {
+        printf("shard_read_size_t: shard_read\n");
+        return -1;
+    }
+    *ptr = ntohq(n_size);
+    return 0;
+}
+
+int shard_write(shard_t *shard, const void *ptr, size_t nmemb) {
+    size_t wrote;
+    if ((wrote = fwrite(ptr, 1, nmemb, shard->f)) != nmemb) {
+        printf("shard_write: wrote %ld instead of %ld\n", wrote, nmemb);
+        return -1;
+    }
+    return 0;
+}
+
+/***********************************************************
+ * load or save a shard_header_t
+ */
+
+int shard_header_print(shard_header_t *header) {
+#define PRINT(name) debug("shard_header_print: " #name " %ld\n", header->name)
+    PRINT(objects_count);
+    PRINT(objects_position);
+    PRINT(objects_size);
+    PRINT(index_position);
+    PRINT(index_size);
+    PRINT(hash_position);
+#undef PRINT
+    return 0;
+}
+
+int shard_header_load(shard_t *shard) {
+    if (shard_seek(shard, 0, SEEK_SET) < 0) {
+        printf("shard_header_load\n");
+        return -1;
+    }
+    shard_header_t header;
+    if (shard_read(shard, (void *)&header, sizeof(shard_header_t)) < 0) {
+        printf("shard_header_load\n");
+        return -1;
+    }
+#define LOAD(name) shard->header.name = ntohq(header.name)
+    LOAD(objects_count);
+    LOAD(objects_position);
+    LOAD(objects_size);
+    LOAD(index_position);
+    LOAD(index_size);
+    LOAD(hash_position);
+#undef LOAD
+    shard_header_print(&shard->header);
+    return 0;
+}
+
+int shard_header_save(shard_t *shard) {
+    if (shard_seek(shard, 0, SEEK_SET) < 0) {
+        printf("shard_header_save\n");
+        return -1;
+    }
+    shard_header_print(&shard->header);
+    shard_header_t header;
+#define SAVE(name) header.name = htonq(shard->header.name)
+    SAVE(objects_count);
+    SAVE(objects_position);
+    SAVE(objects_size);
+    SAVE(index_position);
+    SAVE(index_size);
+    SAVE(hash_position);
+#undef SAVE
+    if (shard_write(shard, (void *)&header, sizeof(shard_header_t)) < 0) {
+        printf("shard_header_save\n");
+        return -1;
+    }
+    return 0;
+}
+
+int shard_header_reset(shard_header_t *header) {
+    memset((void *)header, '\0', sizeof(shard_header_t));
+    header->objects_position = SHARD_OFFSET_HEADER;
+    return 0;
+}
+
+/***********************************************************
+ * Create the Read Shard
+ */
+
+int shard_object_write(shard_t *shard, const char *key, const char *object,
+                       size_t object_size) {
+    // save key & index to later build the hash
+    shard_index_t *index = &shard->index[shard->index_offset];
+    memcpy((void *)index->key, key, SHARD_KEY_LEN);
+    index->object_offset = shard_tell(shard);
+    shard->index_offset++;
+    // write the object size and the object itself
+    size_t n_object_size = htonq(object_size);
+    if (shard_write(shard, (void *)&n_object_size, sizeof(size_t)) < 0) {
+        printf("shard_object_write: object_size\n");
+        return -1;
+    }
+    if (shard_write(shard, (void *)object, object_size) < 0) {
+        printf("shard_object_write: object\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int io_read(void *data, char **key, cmph_uint32 *keylen) {
+    shard_t *shard = (shard_t *)data;
+    *key = shard->index[shard->index_offset].key;
+    *keylen = SHARD_KEY_LEN;
+    shard->index_offset++;
+    return shard->index_offset >= shard->header.objects_count ? -1 : 0;
+}
+
+static void io_dispose(void *data, char *key, cmph_uint32 keylen) {}
+
+static void io_rewind(void *data) {
+    shard_t *shard = (shard_t *)data;
+    shard->index_offset = 0;
+}
+
+static cmph_io_adapter_t *io_adapter(shard_t *shard) {
+    cmph_io_adapter_t *key_source =
+        (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+    if (key_source == NULL)
+        return NULL;
+    key_source->data = (void *)shard;
+    key_source->nkeys = shard->header.objects_count;
+    key_source->read = io_read;
+    key_source->dispose = io_dispose;
+    key_source->rewind = io_rewind;
+    return key_source;
+}
+
+int shard_hash_create(shard_t *shard) {
+    shard->source = io_adapter(shard);
+    shard->config = cmph_config_new(shard->source);
+    cmph_config_set_algo(shard->config, CMPH_CHD_PH);
+    cmph_config_set_keys_per_bin(shard->config, 1);
+    cmph_config_set_b(shard->config, 4);
+    shard->hash = cmph_new(shard->config);
+    return 0;
+}
+
+int shard_index_save(shard_t *shard) {
+    shard->header.index_position =
+        shard->header.objects_position + shard->header.objects_size;
+    debug("shard_index_save: index_position %ld\n",
+          shard->header.index_position);
+    assert(shard->header.index_position == shard_tell(shard));
+    cmph_uint32 count = cmph_size(shard->hash);
+    debug("shard_index_save: count = %d\n", count);
+    shard->header.index_size = count * sizeof(size_t);
+    size_t *index = (size_t *)calloc(1, shard->header.index_size);
+    for (size_t i = 0; i < shard->index_offset; i++) {
+        cmph_uint32 h =
+            cmph_search(shard->hash, shard->index[i].key, SHARD_KEY_LEN);
+        debug("shard_index_save: i = %ld, h = %d, offset = %ld\n", i, h,
+              shard->index[i].object_offset);
+        assert(h < count);
+        index[h] = htonq(shard->index[i].object_offset);
+    }
+    size_t index_size = shard->header.index_size;
+    if (shard_write(shard, (void *)index, index_size) < 0) {
+        printf("shard_index_save\n");
+        return -1;
+    }
+    free(index);
+    return 0;
+}
+
+int shard_hash_save(shard_t *shard) {
+    shard->header.hash_position =
+        shard->header.index_position + shard->header.index_size;
+    debug("shard_hash_save: hash_position %ld\n", shard->header.hash_position);
+    cmph_dump(shard->hash, shard->f);
+    return 0;
+}
+
+int shard_save(shard_t *shard) {
+    shard->header.objects_size =
+        shard_tell(shard) - shard->header.objects_position;
+    return (shard_hash_create(shard) < 0 || shard_index_save(shard) < 0 ||
+            shard_hash_save(shard) < 0 || shard_header_save(shard) < 0)
+               ? -1
+               : 0;
+    return 0;
+}
+
+int shard_reset(shard_t *shard) {
+    if (shard_header_reset(&shard->header) < 0)
+        return -1;
+    return shard_seek(shard, SHARD_OFFSET_HEADER, SEEK_SET);
+}
+
+int shard_create(shard_t *shard, size_t objects_count) {
+    if (shard_open(shard, "w+") < 0)
+        return -1;
+    if (shard_reset(shard) < 0)
+        return -1;
+    shard->header.objects_count = objects_count;
+    shard->index =
+        (shard_index_t *)malloc(sizeof(shard_index_t) * objects_count);
+    return 0;
+}
+
+/**********************************************************
+ * Lookup objects from a Read Shard
+ */
+
+int shard_lookup_object_size(shard_t *shard, const char *key,
+                             size_t *object_size) {
+    debug("shard_lookup_object_size\n");
+    cmph_uint32 h = cmph_search(shard->hash, key, SHARD_KEY_LEN);
+    debug("shard_lookup_object_size: h = %d\n", h);
+    size_t index_offset = shard->header.index_position + h * sizeof(size_t);
+    debug("shard_lookup_object_size: index_offset = %ld\n", index_offset);
+    if (shard_seek(shard, index_offset, SEEK_SET) < 0) {
+        printf("shard_lookup_object_size: index_offset\n");
+        return -1;
+    }
+    size_t object_offset;
+    if (shard_read_size_t(shard, &object_offset) < 0) {
+        printf("shard_lookup_object_size: object_offset\n");
+        return -1;
+    }
+    debug("shard_lookup_object_size: object_offset = %ld\n", object_offset);
+    if (shard_seek(shard, object_offset, SEEK_SET) < 0) {
+        printf("shard_lookup_object_size: object_offset\n");
+        return -1;
+    }
+    if (shard_read_size_t(shard, object_size) < 0) {
+        printf("shard_lookup_object_size: object_size\n");
+        return -1;
+    }
+    debug("shard_lookup_object_size: object_size = %ld\n", *object_size);
+    return 0;
+}
+
+int shard_lookup_object(shard_t *shard, char *object, size_t object_size) {
+    if (shard_read(shard, (void *)object, object_size) < 0) {
+        printf("shard_lookup_object: object\n");
+        return -1;
+    }
+    return 0;
+}
+
+int shard_hash_load(shard_t *shard) {
+    if (shard_seek(shard, shard->header.hash_position, SEEK_SET) < 0) {
+        printf("shard_hash_load\n");
+        return -1;
+    }
+    debug("shard_hash_load: hash_position %ld\n", shard->header.hash_position);
+    shard->hash = cmph_load(shard->f);
+    if (shard->hash == NULL) {
+        printf("shard_hash_load: cmph_load\n");
+        return -1;
+    }
+    return 0;
+}
+
+int shard_load(shard_t *shard) {
+    debug("shard_load\n");
+    if (shard_open(shard, "r") < 0)
+        return -1;
+    if (shard_header_load(shard) < 0)
+        return -1;
+    return shard_hash_load(shard);
+}
+
+/**********************************************************
+ * Initialize and destroy a Read Shard
+ */
+
+shard_t *shard_init(const char *path) {
+    debug("shard_init\n");
+    shard_t *shard = (shard_t *)malloc(sizeof(shard_t));
+    if (shard == NULL)
+        return NULL;
+    memset((void *)shard, '\0', sizeof(shard_t));
+    shard->path = strdup(path);
+    return shard;
+}
+
+int shard_destroy(shard_t *shard) {
+    if (shard->source)
+        free(shard->source);
+    if (shard->config)
+        cmph_config_destroy(shard->config);
+    if (shard->hash)
+        cmph_destroy(shard->hash);
+    if (shard->index)
+        free(shard->index);
+    free(shard->path);
+    int r = shard_close(shard);
+    free(shard);
+    return r;
+}
diff --git a/swh/perfecthash/test_hash.cpp b/swh/perfecthash/test_hash.cpp
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/test_hash.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2021  The Software Heritage developers
+ * See the AUTHORS file at the top-level directory of this distribution
+ * License: GNU General Public License version 3, or any later version
+ * See top-level LICENSE file for more information
+ */
+
+#include <experimental/filesystem>
+#include <fcntl.h>
+#include <gtest/gtest.h>
+#include <random>
+#include <sys/types.h>
+#include <unistd.h>
+
+extern "C" {
+#include "hash.h"
+}
+
+using namespace std::experimental;
+
+filesystem::path
+create_temporary_directory(unsigned long long max_tries = 1000) {
+    auto tmp_dir = filesystem::temp_directory_path();
+    unsigned long long i = 0;
+    std::random_device dev;
+    std::mt19937 prng(dev());
+    std::uniform_int_distribution<int> rand(0);
+    filesystem::path path;
+    while (true) {
+        std::stringstream ss;
+        ss << std::hex << rand(prng);
+        path = tmp_dir / ss.str();
+        // true if the directory was created.
+        if (filesystem::create_directory(path)) {
+            break;
+        }
+        if (i == max_tries) {
+            throw std::runtime_error("could not find non-existing directory");
+        }
+        i++;
+    }
+    return path;
+}
+
+std::string gen_random(const int len) {
+
+    std::string tmp_s;
+    static const char alphanum[] = "0123456789"
+                                   "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                                   "abcdefghijklmnopqrstuvwxyz";
+
+    tmp_s.reserve(len);
+    std::random_device dev;
+    std::mt19937 prng(dev());
+    std::uniform_int_distribution<int> rand(0, strlen(alphanum) - 1);
+
+    for (int i = 0; i < len; ++i)
+        tmp_s += alphanum[rand(prng)];
+
+    return tmp_s;
+}
+
+TEST(HashTest, One) {
+    auto tmpdir = create_temporary_directory();
+    filesystem::path tmpfile = tmpdir / std::string("shard");
+    ASSERT_GE(close(open(tmpfile.c_str(), O_CREAT, 0777)), 0);
+    ASSERT_GE(truncate(tmpfile.c_str(), 10 * 1024 * 1024), 0);
+
+    //
+    // Create a Read Shard and write a single object
+    //
+    shard_t *shard = shard_init(tmpfile.c_str());
+    ASSERT_NE(shard, nullptr);
+    ASSERT_GE(shard_create(shard, 1), 0);
+    const char *keyA = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+    const char *objectA = "AAAAA";
+    size_t objectA_size = strlen(objectA);
+    ASSERT_GE(shard_object_write(shard, keyA, objectA, objectA_size), 0);
+    ASSERT_GE(shard_save(shard), 0);
+    size_t found_size = 0;
+    ASSERT_GE(shard_lookup_object_size(shard, keyA, &found_size), 0);
+    ASSERT_EQ(objectA_size, found_size);
+    char *found = (char *)malloc(found_size);
+    ASSERT_GE(shard_lookup_object(shard, found, found_size), 0);
+    ASSERT_EQ(memcmp((const void *)objectA, (const void *)found, found_size),
+              0);
+    free(found);
+    ASSERT_GE(shard_destroy(shard), 0);
+
+    //
+    // Open the Read Shard created above and verify the object can be
+    // looked up.
+    //
+    shard = shard_init(tmpfile.c_str());
+    ASSERT_NE(shard, nullptr);
+    ASSERT_GE(shard_load(shard), 0);
+    found_size = 0;
+    ASSERT_GE(shard_lookup_object_size(shard, keyA, &found_size), 0);
+    ASSERT_EQ(objectA_size, found_size);
+    found = (char *)malloc(found_size);
+    ASSERT_GE(shard_lookup_object(shard, found, found_size), 0);
+    ASSERT_EQ(memcmp((const void *)objectA, (const void *)found, found_size),
+              0);
+    free(found);
+    ASSERT_GE(shard_destroy(shard), 0);
+
+    filesystem::remove_all(tmpdir);
+}
+
+TEST(HashTest, Many) {
+    auto tmpdir = create_temporary_directory();
+    filesystem::path tmpfile = tmpdir / std::string("shard");
+    ASSERT_GE(close(open(tmpfile.c_str(), O_CREAT, 0777)), 0);
+    ASSERT_GE(truncate(tmpfile.c_str(), 10 * 1024 * 1024), 0);
+
+    //
+    // Populate a Read Shard with multiple objects (objects_count)
+    // The object content and their keys are from a random source
+    // A map is kept in memory in key2object for verification
+    //
+    std::map<std::string, std::string> key2object;
+
+    shard_t *shard = shard_init(tmpfile.c_str());
+    ASSERT_NE(shard, nullptr);
+    int objects_count = 10;
+    ASSERT_GE(shard_create(shard, objects_count), 0);
+    for (int i = 0; i < objects_count; i++) {
+        std::string key = gen_random(32);
+        std::string object = gen_random(50);
+        key2object[key] = object;
+        std::cout << key << std::endl;
+        ASSERT_GE(shard_object_write(shard, key.c_str(), object.c_str(),
+                                     object.length()),
+                  0);
+    }
+    ASSERT_GE(shard_save(shard), 0);
+    ASSERT_GE(shard_destroy(shard), 0);
+
+    //
+    // Open the Read Shard for lookups, lookup every key inserted
+    // and verify the matching object has the expected content.
+    //
+    shard = shard_init(tmpfile.c_str());
+    ASSERT_NE(shard, nullptr);
+    ASSERT_GE(shard_load(shard), 0);
+    for (std::pair<std::string, std::string> p : key2object) {
+        size_t found_size = 0;
+        ASSERT_GE(shard_lookup_object_size(shard, p.first.c_str(), &found_size),
+                  0);
+        ASSERT_EQ(p.second.length(), found_size);
+        char *found = (char *)malloc(found_size);
+        ASSERT_GE(shard_lookup_object(shard, found, found_size), 0);
+        ASSERT_EQ(memcmp((const void *)p.second.c_str(), (const void *)found,
+                         found_size),
+                  0);
+        free(found);
+    }
+    ASSERT_GE(shard_destroy(shard), 0);
+
+    filesystem::remove_all(tmpdir);
+}
+
+int main(int argc, char **argv) {
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/swh/perfecthash/tests/test_hash.py b/swh/perfecthash/tests/test_hash.py
new file mode 100644
--- /dev/null
+++ b/swh/perfecthash/tests/test_hash.py
@@ -0,0 +1,144 @@
+# Copyright (C) 2021  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import random
+import time
+
+import pytest
+
+from swh.perfecthash import Shard
+
+
+def test_all(tmpdir):
+    f = f"{tmpdir}/shard"
+    open(f, "w").close()
+    os.truncate(f, 10 * 1024 * 1024)
+
+    s = Shard(f).create(2)
+    keyA = b"A" * 32
+    objectA = b"AAAA"
+    s.write(keyA, objectA)
+    keyB = b"B" * 32
+    objectB = b"BBBB"
+    s.write(keyB, objectB)
+    s.save()
+    del s
+
+    s = Shard(f).load()
+    assert s.lookup(keyA) == objectA
+    assert s.lookup(keyB) == objectB
+    del s
+
+
+@pytest.fixture
+def payload(request):
+    size = request.config.getoption("--shard-size")
+    path = request.config.getoption("--shard-path")
+    if not os.path.exists(path) or os.path.getsize(path) != size * 1024 * 1024:
+        os.system(f"dd if=/dev/urandom of={path} count={size} bs=1024k")
+    return path
+
+
+#
+# PYTHONMALLOC=malloc valgrind --tool=memcheck .tox/py3/bin/pytest \
+#    -k test_build_speed swh/perfecthash/tests/test_hash.py |& tee /tmp/v
+#
+def test_build_speed(request, tmpdir, payload):
+    start = time.time()
+    os.system(f"cp {payload} {tmpdir}/shard ; rm {tmpdir}/shard")
+    baseline = time.time() - start
+    write_duration, build_duration, _ = shard_build(request, tmpdir, payload)
+    duration = write_duration + build_duration
+    print(
+        f"baseline {baseline}, "
+        f"write_duration {write_duration}, "
+        f"build_duration {build_duration}, "
+        f"total_duration {duration}"
+    )
+    assert duration < baseline * 5
+
+
+def test_lookup_speed(request, tmpdir, payload):
+    _, _, objects = shard_build(request, tmpdir, payload)
+    for i in range(request.config.getoption("--shard-count")):
+        os.system(f"cp {tmpdir}/shard {tmpdir}/shard{i}")
+    start = time.time()
+    shard_lookups(request, tmpdir, objects)
+    duration = time.time() - start
+
+    lookups = request.config.getoption("--lookups")
+    key_per_sec = lookups / duration
+    print(f"key lookups speed = {key_per_sec:.2f}/s")
+
+
+def shard_lookups(request, tmpdir, objects):
+    shard_path = f"{tmpdir}/shard"
+    shards = []
+    for i in range(request.config.getoption("--shard-count")):
+        shards.append(Shard(f"{shard_path}{i}").load())
+    lookups = request.config.getoption("--lookups")
+    count = 0
+    while True:
+        for key, object_size in objects.items():
+            if count >= lookups:
+                return
+            for shard in shards:
+                object = shard.lookup(key)
+                assert len(object) == object_size
+                count += 1
+
+
+def shard_build(request, tmpdir, payload):
+    shard_size = request.config.getoption("--shard-size") * 1024 * 1024
+    shard_path = f"{tmpdir}/shard"
+    open(shard_path, "w").close()
+    os.truncate(shard_path, shard_size * 2)
+
+    object_max_size = request.config.getoption("--object-max-size")
+    objects = {}
+    count = 0
+    size = 0
+    with open(payload, "rb") as f:
+        while True:
+            key = f.read(32)
+            if len(key) < 32:
+                break
+            assert key not in objects
+            object = f.read(random.randrange(512, object_max_size))
+            if len(object) < 512:
+                break
+            objects[key] = len(object)
+            size += len(object)
+            count += 1
+
+    print(f"number of objects = {count}, total size = {size}")
+    assert size < shard_size
+    start = time.time()
+    shard = Shard(shard_path).create(len(objects))
+
+    count = 0
+    size = 0
+    with open(payload, "rb") as f:
+        while True:
+            key = f.read(32)
+            if len(key) < 32:
+                break
+            if key not in objects:
+                break
+            object = f.read(objects[key])
+            assert len(object) == objects[key]
+            count += 1
+            size += len(object)
+            assert shard.write(key, object) >= 0, (
+                f"object count {count}/{len(objects)}, "
+                f"size {size}, "
+                f"object size {len(object)}"
+            )
+    write_duration = time.time() - start
+    start = time.time()
+    shard.save()
+    build_duration = time.time() - start
+    return write_duration, build_duration, objects
diff --git a/swh/perfecthash/tests/test_nothing.py b/swh/perfecthash/tests/test_nothing.py
deleted file mode 100644
--- a/swh/perfecthash/tests/test_nothing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def test_nothing():
-    # Placeholder; remove this when we add actual tests
-    pass
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist=black,flake8,mypy,py3
+envlist=black,flake8,mypy,py3,c
 
 [testenv]
 extras =
@@ -19,6 +19,12 @@
 commands =
   {envpython} -m black --check swh
 
+[testenv:c]
+whitelist_externals = make
+usedevelop = true
+commands =
+  make -C swh/perfecthash check
+
 [testenv:flake8]
 skip_install = true
 deps =