diff --git a/AUTHORS b/AUTHORS --- a/AUTHORS +++ b/AUTHORS @@ -1,3 +1,3 @@ -Copyright (C) 2019 The Software Heritage developers +Copyright (C) 2021 The Software Heritage developers See http://www.softwareheritage.org/ for more information. diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,6 @@ include Makefile include requirements*.txt include version.txt -include README.md +include README.rst recursive-include swh py.typed +include swh/perfecthash/hash.[ch] diff --git a/docs/index.rst b/docs/index.rst --- a/docs/index.rst +++ b/docs/index.rst @@ -1,4 +1,4 @@ -.. _swh-py-template: +.. _swh-perfecthash: .. include:: README.rst diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -11,5 +11,8 @@ [mypy-pytest.*] ignore_missing_imports = True -# [mypy-add_your_lib_here.*] -# ignore_missing_imports = True +[mypy-_hash_cffi.*] +ignore_missing_imports = True + +[mypy-cffi.*] +ignore_missing_imports = True diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ # should match https://pypi.python.org/pypi names. For the full spec or # dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html +cffi diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -48,9 +48,10 @@ packages=find_packages(), # packages's modules install_requires=parse_requirements(None, "swh"), tests_require=parse_requirements("test"), - setup_requires=["setuptools-scm"], + setup_requires=["setuptools-scm", "cffi"], use_scm_version=True, extras_require={"testing": parse_requirements("test")}, + cffi_modules=["swh/perfecthash/build.py:ffibuilder"], classifiers=[ "Programming Language :: Python :: 3", "Intended Audience :: Developers", diff --git a/swh/perfecthash/build.py b/swh/perfecthash/build.py new file mode 100644 --- /dev/null +++ b/swh/perfecthash/build.py @@ -0,0 +1,24 @@ +from cffi import FFI + +ffibuilder = FFI() + +# cdef() expects a single string declaring the C types, functions and +# globals needed to use the shared object. It must be in valid C syntax. +ffibuilder.cdef( + """ +int build(char* path); +""" +) + +ffibuilder.set_source( + "_hash_cffi", + """ + #include "swh/perfecthash/hash.h" + """, + sources=["swh/perfecthash/hash.c"], + include_dirs=["."], + libraries=["cmph"], +) # library name, for the linker + +if __name__ == "__main__": + ffibuilder.compile(verbose=True) diff --git a/swh/perfecthash/hash.h b/swh/perfecthash/hash.h new file mode 100644 --- /dev/null +++ b/swh/perfecthash/hash.h @@ -0,0 +1 @@ +#include diff --git a/swh/perfecthash/hash.c b/swh/perfecthash/hash.c new file mode 100644 --- /dev/null +++ b/swh/perfecthash/hash.c @@ -0,0 +1,137 @@ +#include "swh/perfecthash/hash.h" + +int build(char *path) { + return 0; +} + +#if 0 + +#include +#include + +#include + +typedef struct { + size_t offset_hash; + size_t count; +} shard_header_t; + +int header_init(shard_header_t *shard, int fd) { + if (read(fd, sizeof(shared_header_t), (char*)shard) < 0) { + perror("read"); + return -1; + } + return 0; +} + +int header_save(shard_header_t *shard, int fd) { + if (seek(fd, 0) < 0) { + perror("seek"); + return -1; + } + if (write(fd, (char*)shard, sizeof(shard_header_t)) < 0) { + perror("write"); + return -1; + } + return 0; +} + +typedef struct { + void *addr; + shard_header_t header; + size_t size; + size_t offset; +} shard_t; + +int shard_init(shard_t *shard, int fd) +{ + struct stat sb; + if (fstat(fd, &sb) == -1) { + perror("fstat"); + return -1; + } + shard->size = sb.st_size; + shard->addr = mmap(NULL, shard->size, PROT_READ, MAP_PRIVATE, fd, 0); + if (shard->addr == NULL) { + perror("mmap"); + return -1; + } + header_init(&shard->header, fd); + shard->header.offset_hash = sb.st_size; + header_save(shard, fd) + shard_rewind(shard); + return 0; +} + +int shard_uninit(shard_t *shard) +{ + return munmap(shard->addr, shard->size); +} + +int shard_read(shard_t *shard, char **key, cmph_uint32 *keylen) { + *key = (char *)(shard->data); + *keylen = (cmph_uint32)SHARD_SHA256_LEN; + size_t size = *(size_t *)(shard->data + SHARD_SHA256_LEN); + offset += SHARD_SHA256_LEN + sizeof(size_t) + size; +} + +void shard_rewind(shard_t *shard) { + shard->offset = SHARD_OFFSET_HEADER + SHARD_SIZE_HEADER; +} + +static int io_read(void *data, char **key, cmph_uint32 *keylen) { + shard_t *shard = (shard_t *)data; + return shard_read(shard, key, keylen); +} + +static void io_dispose(void *data, char *key, cmph_uint32 keylen) { +} + +static void io_rewind(void *data) { + shard_rewind((shard_t *)data); +} + +cmph_io_adapter_t *io_adapter(int fd) { + cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t)); + if (key_source == NULL) + return NULL + cmph_io_adapter_t * shard = (shard_t *)malloc(sizeof(shard_t)); + if (shard == NULL) + return NULL; + if (shard_init(shard, fd) < 0) + return NULL; + + key_source->data = (void *)shard; + key_source->nkeys = shard->header.count; + key_source->read = io_read; + key_source->dispose = io_dispose; + key_source->rewind = io_rewind; + return key_source; +} + +int build(char *path) { + int fd = open(path, "r"); + if (fd < 0) { + perror("open"); + return -1; + } + cmph_io_adapter_t *source = io_adapter(fd); + shard_t *shard = (shard_t *)source->data; + cmph_config_t *config = cmph_config_new(source); + cmph_config_set_algo(config, CMPH_CHD_PH); + // cmph_config_set_keys_per_bin + // cmph_config_set_b + cmph_t *hash = cmph_new(config); + FILE* mphf_fd = fopen(path, "a"); + fseek(mphf_fd, shard->header.offset_hash); + cmph_config_destroy(config); + cmph_dump(hash, mphf_fd); + cmph_destroy(hash); + fclose(mphf_fd); +} + +cmph_t *load(char *path) { + +} + +#endif diff --git a/swh/perfecthash/tests/test_hash.py b/swh/perfecthash/tests/test_hash.py new file mode 100644 --- /dev/null +++ b/swh/perfecthash/tests/test_hash.py @@ -0,0 +1,5 @@ +from _hash_cffi import lib + + +def test_build(): + assert lib.build(b"path") == 0