diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..1271b63
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,5 @@
+include Makefile
+include requirements.txt
+include requirements-swh.txt
+include version.txt
+include README.md
diff --git a/es_config/elasticsearch.keystore b/es_config/elasticsearch.keystore
new file mode 100644
index 0000000..463724a
Binary files /dev/null and b/es_config/elasticsearch.keystore differ
diff --git a/es_config/elasticsearch.yml b/es_config/elasticsearch.yml
new file mode 100644
index 0000000..df04d7c
--- /dev/null
+++ b/es_config/elasticsearch.yml
@@ -0,0 +1,89 @@
+# ======================== Elasticsearch Configuration =========================
+#
+# NOTE: Elasticsearch comes with reasonable defaults for most settings.
+#       Before you set out to tweak and tune the configuration, make sure you
+#       understand what are you trying to accomplish and the consequences.
+#
+# The primary way of configuring a node is via this file. This template lists
+# the most important settings you may want to configure for a production cluster.
+#
+# Please consult the documentation for further information on configuration options:
+# https://www.elastic.co/guide/en/elasticsearch/reference/index.html
+#
+# ---------------------------------- Cluster -----------------------------------
+#
+# Use a descriptive name for your cluster:
+#
+#cluster.name: my-application
+#
+# ------------------------------------ Node ------------------------------------
+#
+# Use a descriptive name for the node:
+#
+#node.name: node-1
+node.name: node-1
+#
+# Add custom attributes to the node:
+#
+#node.attr.rack: r1
+#
+# ----------------------------------- Paths ------------------------------------
+#
+# Path to directory where to store the data (separate multiple locations by comma):
+#
+path.data: /tmp/elasticsearch
+#
+# Path to log files:
+#
+path.logs: /tmp/elasticsearch
+#
+# ----------------------------------- Memory -----------------------------------
+#
+# Lock the memory on startup:
+#
+#bootstrap.memory_lock: true
+#
+# Make sure that the heap size is set to about half the memory available
+# on the system and that the owner of the process is allowed to use this
+# limit.
+#
+# Elasticsearch performs poorly when the system is swapping the memory.
+#
+# ---------------------------------- Network -----------------------------------
+#
+# Set the bind address to a specific IP (IPv4 or IPv6):
+#
+#network.host: 192.168.0.1
+#
+# Set a custom port for HTTP:
+#
+#http.port: 9200
+#
+# For more information, consult the network module documentation.
+#
+# --------------------------------- Discovery ----------------------------------
+#
+# Pass an initial list of hosts to perform discovery when this node is started:
+# The default list of hosts is ["127.0.0.1", "[::1]"]
+#
+#discovery.seed_hosts: ["host1", "host2"]
+#
+# Bootstrap the cluster using an initial set of master-eligible nodes:
+#
+#cluster.initial_master_nodes: ["node-1", "node-2"]
+#
+# For more information, consult the discovery and cluster formation module documentation.
+#
+# ---------------------------------- Gateway -----------------------------------
+#
+# Block initial recovery after a full cluster restart until N nodes are started:
+#
+#gateway.recover_after_nodes: 3
+#
+# For more information, consult the gateway module documentation.
+#
+# ---------------------------------- Various -----------------------------------
+#
+# Require explicit names when deleting indices:
+#
+#action.destructive_requires_name: true
diff --git a/es_config/jvm.options b/es_config/jvm.options
new file mode 100644
index 0000000..d16eba3
--- /dev/null
+++ b/es_config/jvm.options
@@ -0,0 +1,106 @@
+## JVM configuration
+
+################################################################
+## IMPORTANT: JVM heap size
+################################################################
+##
+## You should always set the min and max JVM heap
+## size to the same value. For example, to set
+## the heap to 4 GB, set:
+##
+## -Xms4g
+## -Xmx4g
+##
+## See https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html
+## for more information
+##
+################################################################
+
+# Xms represents the initial size of total heap space
+# Xmx represents the maximum size of total heap space
+
+-Xms1g
+-Xmx1g
+
+################################################################
+## Expert settings
+################################################################
+##
+## All settings below this section are considered
+## expert settings. Don't tamper with them unless
+## you understand what you are doing
+##
+################################################################
+
+## GC configuration
+-XX:+UseConcMarkSweepGC
+-XX:CMSInitiatingOccupancyFraction=75
+-XX:+UseCMSInitiatingOccupancyOnly
+
+## G1GC Configuration
+# NOTE: G1GC is only supported on JDK version 10 or later.
+# To use G1GC uncomment the lines below.
+# 10-:-XX:-UseConcMarkSweepGC
+# 10-:-XX:-UseCMSInitiatingOccupancyOnly
+# 10-:-XX:+UseG1GC
+# 10-:-XX:InitiatingHeapOccupancyPercent=75
+
+## DNS cache policy
+# cache ttl in seconds for positive DNS lookups noting that this overrides the
+# JDK security property networkaddress.cache.ttl; set to -1 to cache forever
+-Des.networkaddress.cache.ttl=60
+# cache ttl in seconds for negative DNS lookups noting that this overrides the
+# JDK security property networkaddress.cache.negative ttl; set to -1 to cache
+# forever
+-Des.networkaddress.cache.negative.ttl=10
+
+## optimizations
+
+# pre-touch memory pages used by the JVM during initialization
+-XX:+AlwaysPreTouch
+
+## basic
+
+# explicitly set the stack size
+-Xss1m
+
+# set to headless, just in case
+-Djava.awt.headless=true
+
+# ensure UTF-8 encoding by default (e.g. filenames)
+-Dfile.encoding=UTF-8
+
+# use our provided JNA always versus the system one
+-Djna.nosys=true
+
+# turn off a JDK optimization that throws away stack traces for common
+# exceptions because stack traces are important for debugging
+-XX:-OmitStackTraceInFastThrow
+
+# flags to configure Netty
+-Dio.netty.noUnsafe=true
+-Dio.netty.noKeySetOptimization=true
+-Dio.netty.recycler.maxCapacityPerThread=0
+
+# log4j 2
+-Dlog4j.shutdownHookEnabled=false
+-Dlog4j2.disable.jmx=true
+
+-Djava.io.tmpdir=${ES_TMPDIR}
+
+## heap dumps
+
+# generate a heap dump when an allocation from the Java heap fails
+# heap dumps are created in the working directory of the JVM
+-XX:+HeapDumpOnOutOfMemoryError
+
+# specify an alternative path for heap dumps; ensure the directory exists and
+# has sufficient space
+-XX:HeapDumpPath=/var/lib/elasticsearch
+
+# specify an alternative path for JVM fatal error logs
+-XX:ErrorFile=/var/log/elasticsearch/hs_err_pid%p.log
+
+# due to internationalization enhancements in JDK 9 Elasticsearch need to set the provider to COMPAT otherwise
+# time/date parsing will break in an incompatible way for some date patterns and locals
+9-:-Djava.locale.providers=COMPAT
diff --git a/es_config/log4j2.properties b/es_config/log4j2.properties
new file mode 100644
index 0000000..511b66c
--- /dev/null
+++ b/es_config/log4j2.properties
@@ -0,0 +1,260 @@
+status = error
+
+# log action execution errors for easier debugging
+logger.action.name = org.elasticsearch.action
+logger.action.level = debug
+
+appender.console.type = Console
+appender.console.name = console
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
+
+######## Server JSON ############################
+appender.rolling.type = RollingFile
+appender.rolling.name = rolling
+appender.rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_server.json
+appender.rolling.layout.type = ESJsonLayout
+appender.rolling.layout.type_name = server
+
+appender.rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}-%i.json.gz
+appender.rolling.policies.type = Policies
+appender.rolling.policies.time.type = TimeBasedTriggeringPolicy
+appender.rolling.policies.time.interval = 1
+appender.rolling.policies.time.modulate = true
+appender.rolling.policies.size.type = SizeBasedTriggeringPolicy
+appender.rolling.policies.size.size = 128MB
+appender.rolling.strategy.type = DefaultRolloverStrategy
+appender.rolling.strategy.fileIndex = nomax
+appender.rolling.strategy.action.type = Delete
+appender.rolling.strategy.action.basepath = ${sys:es.logs.base_path}
+appender.rolling.strategy.action.condition.type = IfFileName
+appender.rolling.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-*
+appender.rolling.strategy.action.condition.nested_condition.type = IfAccumulatedFileSize
+appender.rolling.strategy.action.condition.nested_condition.exceeds = 2GB
+################################################
+######## Server -  old style pattern ###########
+appender.rolling_old.type = RollingFile
+appender.rolling_old.name = rolling_old
+appender.rolling_old.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}.log
+appender.rolling_old.layout.type = PatternLayout
+appender.rolling_old.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
+
+appender.rolling_old.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}-%d{yyyy-MM-dd}-%i.log.gz
+appender.rolling_old.policies.type = Policies
+appender.rolling_old.policies.time.type = TimeBasedTriggeringPolicy
+appender.rolling_old.policies.time.interval = 1
+appender.rolling_old.policies.time.modulate = true
+appender.rolling_old.policies.size.type = SizeBasedTriggeringPolicy
+appender.rolling_old.policies.size.size = 128MB
+appender.rolling_old.strategy.type = DefaultRolloverStrategy
+appender.rolling_old.strategy.fileIndex = nomax
+appender.rolling_old.strategy.action.type = Delete
+appender.rolling_old.strategy.action.basepath = ${sys:es.logs.base_path}
+appender.rolling_old.strategy.action.condition.type = IfFileName
+appender.rolling_old.strategy.action.condition.glob = ${sys:es.logs.cluster_name}-*
+appender.rolling_old.strategy.action.condition.nested_condition.type = IfAccumulatedFileSize
+appender.rolling_old.strategy.action.condition.nested_condition.exceeds = 2GB
+################################################
+
+rootLogger.level = info
+rootLogger.appenderRef.console.ref = console
+rootLogger.appenderRef.rolling.ref = rolling
+rootLogger.appenderRef.rolling_old.ref = rolling_old
+
+######## Deprecation JSON #######################
+appender.deprecation_rolling.type = RollingFile
+appender.deprecation_rolling.name = deprecation_rolling
+appender.deprecation_rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_deprecation.json
+appender.deprecation_rolling.layout.type = ESJsonLayout
+appender.deprecation_rolling.layout.type_name = deprecation
+
+appender.deprecation_rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_deprecation-%i.json.gz
+appender.deprecation_rolling.policies.type = Policies
+appender.deprecation_rolling.policies.size.type = SizeBasedTriggeringPolicy
+appender.deprecation_rolling.policies.size.size = 1GB
+appender.deprecation_rolling.strategy.type = DefaultRolloverStrategy
+appender.deprecation_rolling.strategy.max = 4
+#################################################
+######## Deprecation -  old style pattern #######
+appender.deprecation_rolling_old.type = RollingFile
+appender.deprecation_rolling_old.name = deprecation_rolling_old
+appender.deprecation_rolling_old.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_deprecation.log
+appender.deprecation_rolling_old.layout.type = PatternLayout
+appender.deprecation_rolling_old.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
+
+appender.deprecation_rolling_old.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _deprecation-%i.log.gz
+appender.deprecation_rolling_old.policies.type = Policies
+appender.deprecation_rolling_old.policies.size.type = SizeBasedTriggeringPolicy
+appender.deprecation_rolling_old.policies.size.size = 1GB
+appender.deprecation_rolling_old.strategy.type = DefaultRolloverStrategy
+appender.deprecation_rolling_old.strategy.max = 4
+#################################################
+logger.deprecation.name = org.elasticsearch.deprecation
+logger.deprecation.level = warn
+logger.deprecation.appenderRef.deprecation_rolling.ref = deprecation_rolling
+logger.deprecation.appenderRef.deprecation_rolling_old.ref = deprecation_rolling_old
+logger.deprecation.additivity = false
+
+######## Search slowlog JSON ####################
+appender.index_search_slowlog_rolling.type = RollingFile
+appender.index_search_slowlog_rolling.name = index_search_slowlog_rolling
+appender.index_search_slowlog_rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs\
+  .cluster_name}_index_search_slowlog.json
+appender.index_search_slowlog_rolling.layout.type = ESJsonLayout
+appender.index_search_slowlog_rolling.layout.type_name = index_search_slowlog
+
+appender.index_search_slowlog_rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs\
+  .cluster_name}_index_search_slowlog-%i.json.gz
+appender.index_search_slowlog_rolling.policies.type = Policies
+appender.index_search_slowlog_rolling.policies.size.type = SizeBasedTriggeringPolicy
+appender.index_search_slowlog_rolling.policies.size.size = 1GB
+appender.index_search_slowlog_rolling.strategy.type = DefaultRolloverStrategy
+appender.index_search_slowlog_rolling.strategy.max = 4
+#################################################
+######## Search slowlog -  old style pattern ####
+appender.index_search_slowlog_rolling_old.type = RollingFile
+appender.index_search_slowlog_rolling_old.name = index_search_slowlog_rolling_old
+appender.index_search_slowlog_rolling_old.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_search_slowlog.log
+appender.index_search_slowlog_rolling_old.layout.type = PatternLayout
+appender.index_search_slowlog_rolling_old.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
+
+appender.index_search_slowlog_rolling_old.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_search_slowlog-%i.log.gz
+appender.index_search_slowlog_rolling_old.policies.type = Policies
+appender.index_search_slowlog_rolling_old.policies.size.type = SizeBasedTriggeringPolicy
+appender.index_search_slowlog_rolling_old.policies.size.size = 1GB
+appender.index_search_slowlog_rolling_old.strategy.type = DefaultRolloverStrategy
+appender.index_search_slowlog_rolling_old.strategy.max = 4
+#################################################
+logger.index_search_slowlog_rolling.name = index.search.slowlog
+logger.index_search_slowlog_rolling.level = trace
+logger.index_search_slowlog_rolling.appenderRef.index_search_slowlog_rolling.ref = index_search_slowlog_rolling
+logger.index_search_slowlog_rolling.appenderRef.index_search_slowlog_rolling_old.ref = index_search_slowlog_rolling_old
+logger.index_search_slowlog_rolling.additivity = false
+
+######## Indexing slowlog JSON ##################
+appender.index_indexing_slowlog_rolling.type = RollingFile
+appender.index_indexing_slowlog_rolling.name = index_indexing_slowlog_rolling
+appender.index_indexing_slowlog_rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_indexing_slowlog.json
+appender.index_indexing_slowlog_rolling.layout.type = ESJsonLayout
+appender.index_indexing_slowlog_rolling.layout.type_name = index_indexing_slowlog
+
+appender.index_indexing_slowlog_rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_indexing_slowlog-%i.json.gz
+appender.index_indexing_slowlog_rolling.policies.type = Policies
+appender.index_indexing_slowlog_rolling.policies.size.type = SizeBasedTriggeringPolicy
+appender.index_indexing_slowlog_rolling.policies.size.size = 1GB
+appender.index_indexing_slowlog_rolling.strategy.type = DefaultRolloverStrategy
+appender.index_indexing_slowlog_rolling.strategy.max = 4
+#################################################
+######## Indexing slowlog -  old style pattern ##
+appender.index_indexing_slowlog_rolling_old.type = RollingFile
+appender.index_indexing_slowlog_rolling_old.name = index_indexing_slowlog_rolling_old
+appender.index_indexing_slowlog_rolling_old.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_indexing_slowlog.log
+appender.index_indexing_slowlog_rolling_old.layout.type = PatternLayout
+appender.index_indexing_slowlog_rolling_old.layout.pattern = [%d{ISO8601}][%-5p][%-25c{1.}] [%node_name]%marker %m%n
+
+appender.index_indexing_slowlog_rolling_old.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}\
+  _index_indexing_slowlog-%i.log.gz
+appender.index_indexing_slowlog_rolling_old.policies.type = Policies
+appender.index_indexing_slowlog_rolling_old.policies.size.type = SizeBasedTriggeringPolicy
+appender.index_indexing_slowlog_rolling_old.policies.size.size = 1GB
+appender.index_indexing_slowlog_rolling_old.strategy.type = DefaultRolloverStrategy
+appender.index_indexing_slowlog_rolling_old.strategy.max = 4
+#################################################
+
+logger.index_indexing_slowlog.name = index.indexing.slowlog.index
+logger.index_indexing_slowlog.level = trace
+logger.index_indexing_slowlog.appenderRef.index_indexing_slowlog_rolling.ref = index_indexing_slowlog_rolling
+logger.index_indexing_slowlog.appenderRef.index_indexing_slowlog_rolling_old.ref = index_indexing_slowlog_rolling_old
+logger.index_indexing_slowlog.additivity = false
+
+
+appender.audit_rolling.type = RollingFile
+appender.audit_rolling.name = audit_rolling
+appender.audit_rolling.fileName = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_audit.json
+appender.audit_rolling.layout.type = PatternLayout
+appender.audit_rolling.layout.pattern = {\
+                "type":"audit", \
+                "timestamp":"%d{yyyy-MM-dd'T'HH:mm:ss,SSSZ}"\
+                %varsNotEmpty{, "node.name":"%enc{%map{node.name}}{JSON}"}\
+                %varsNotEmpty{, "node.id":"%enc{%map{node.id}}{JSON}"}\
+                %varsNotEmpty{, "host.name":"%enc{%map{host.name}}{JSON}"}\
+                %varsNotEmpty{, "host.ip":"%enc{%map{host.ip}}{JSON}"}\
+                %varsNotEmpty{, "event.type":"%enc{%map{event.type}}{JSON}"}\
+                %varsNotEmpty{, "event.action":"%enc{%map{event.action}}{JSON}"}\
+                %varsNotEmpty{, "user.name":"%enc{%map{user.name}}{JSON}"}\
+                %varsNotEmpty{, "user.run_by.name":"%enc{%map{user.run_by.name}}{JSON}"}\
+                %varsNotEmpty{, "user.run_as.name":"%enc{%map{user.run_as.name}}{JSON}"}\
+                %varsNotEmpty{, "user.realm":"%enc{%map{user.realm}}{JSON}"}\
+                %varsNotEmpty{, "user.run_by.realm":"%enc{%map{user.run_by.realm}}{JSON}"}\
+                %varsNotEmpty{, "user.run_as.realm":"%enc{%map{user.run_as.realm}}{JSON}"}\
+                %varsNotEmpty{, "user.roles":%map{user.roles}}\
+                %varsNotEmpty{, "origin.type":"%enc{%map{origin.type}}{JSON}"}\
+                %varsNotEmpty{, "origin.address":"%enc{%map{origin.address}}{JSON}"}\
+                %varsNotEmpty{, "realm":"%enc{%map{realm}}{JSON}"}\
+                %varsNotEmpty{, "url.path":"%enc{%map{url.path}}{JSON}"}\
+                %varsNotEmpty{, "url.query":"%enc{%map{url.query}}{JSON}"}\
+                %varsNotEmpty{, "request.method":"%enc{%map{request.method}}{JSON}"}\
+                %varsNotEmpty{, "request.body":"%enc{%map{request.body}}{JSON}"}\
+                %varsNotEmpty{, "request.id":"%enc{%map{request.id}}{JSON}"}\
+                %varsNotEmpty{, "action":"%enc{%map{action}}{JSON}"}\
+                %varsNotEmpty{, "request.name":"%enc{%map{request.name}}{JSON}"}\
+                %varsNotEmpty{, "indices":%map{indices}}\
+                %varsNotEmpty{, "opaque_id":"%enc{%map{opaque_id}}{JSON}"}\
+                %varsNotEmpty{, "x_forwarded_for":"%enc{%map{x_forwarded_for}}{JSON}"}\
+                %varsNotEmpty{, "transport.profile":"%enc{%map{transport.profile}}{JSON}"}\
+                %varsNotEmpty{, "rule":"%enc{%map{rule}}{JSON}"}\
+                %varsNotEmpty{, "event.category":"%enc{%map{event.category}}{JSON}"}\
+                }%n
+# "node.name" node name from the `elasticsearch.yml` settings
+# "node.id" node id which should not change between cluster restarts
+# "host.name" unresolved hostname of the local node
+# "host.ip" the local bound ip (i.e. the ip listening for connections)
+# "event.type" a received REST request is translated into one or more transport requests. This indicates which processing layer generated the event "rest" or "transport" (internal)
+# "event.action" the name of the audited event, eg. "authentication_failed", "access_granted", "run_as_granted", etc.
+# "user.name" the subject name as authenticated by a realm
+# "user.run_by.name" the original authenticated subject name that is impersonating another one.
+# "user.run_as.name" if this "event.action" is of a run_as type, this is the subject name to be impersonated as.
+# "user.realm" the name of the realm that authenticated "user.name"
+# "user.run_by.realm" the realm name of the impersonating subject ("user.run_by.name")
+# "user.run_as.realm" if this "event.action" is of a run_as type, this is the realm name the impersonated user is looked up from
+# "user.roles" the roles array of the user; these are the roles that are granting privileges
+# "origin.type" it is "rest" if the event is originating (is in relation to) a REST request; possible other values are "transport" and "ip_filter"
+# "origin.address" the remote address and port of the first network hop, i.e. a REST proxy or another cluster node
+# "realm" name of a realm that has generated an "authentication_failed" or an "authentication_successful"; the subject is not yet authenticated
+# "url.path" the URI component between the port and the query string; it is percent (URL) encoded
+# "url.query" the URI component after the path and before the fragment; it is percent (URL) encoded
+# "request.method" the method of the HTTP request, i.e. one of GET, POST, PUT, DELETE, OPTIONS, HEAD, PATCH, TRACE, CONNECT
+# "request.body" the content of the request body entity, JSON escaped
+# "request.id" a synthentic identifier for the incoming request, this is unique per incoming request, and consistent across all audit events generated by that request
+# "action" an action is the most granular operation that is authorized and this identifies it in a namespaced way (internal)
+# "request.name" if the event is in connection to a transport message this is the name of the request class, similar to how rest requests are identified by the url path (internal)
+# "indices" the array of indices that the "action" is acting upon
+# "opaque_id" opaque value conveyed by the "X-Opaque-Id" request header
+# "x_forwarded_for" the addresses from the "X-Forwarded-For" request header, as a verbatim string value (not an array)
+# "transport.profile" name of the transport profile in case this is a "connection_granted" or "connection_denied" event
+# "rule" name of the applied rulee if the "origin.type" is "ip_filter"
+# "event.category" fixed value "elasticsearch-audit"
+
+appender.audit_rolling.filePattern = ${sys:es.logs.base_path}${sys:file.separator}${sys:es.logs.cluster_name}_audit-%d{yyyy-MM-dd}.json
+appender.audit_rolling.policies.type = Policies
+appender.audit_rolling.policies.time.type = TimeBasedTriggeringPolicy
+appender.audit_rolling.policies.time.interval = 1
+appender.audit_rolling.policies.time.modulate = true
+
+logger.xpack_security_audit_logfile.name = org.elasticsearch.xpack.security.audit.logfile.LoggingAuditTrail
+logger.xpack_security_audit_logfile.level = info
+logger.xpack_security_audit_logfile.appenderRef.audit_rolling.ref = audit_rolling
+logger.xpack_security_audit_logfile.additivity = false
+
+logger.xmlsig.name = org.apache.xml.security.signature.XMLSignature
+logger.xmlsig.level = error
+logger.samlxml_decrypt.name = org.opensaml.xmlsec.encryption.support.Decrypter
+logger.samlxml_decrypt.level = fatal
+logger.saml2_decrypt.name = org.opensaml.saml.saml2.encryption.Decrypter
+logger.saml2_decrypt.level = fatal
diff --git a/jvm.options b/jvm.options
new file mode 100644
index 0000000..e69de29
diff --git a/log4j2.properties b/log4j2.properties
new file mode 100644
index 0000000..e69de29
diff --git a/requirements-swh.txt b/requirements-swh.txt
new file mode 100644
index 0000000..ae53050
--- /dev/null
+++ b/requirements-swh.txt
@@ -0,0 +1,4 @@
+# Add here internal Software Heritage dependencies, one per line.
+swh.core
+swh.journal
+swh.model
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 0000000..f03976e
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,2 @@
+pytest
+pytest-elasticsearch
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..fff18ad
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+# Add here external Python modules dependencies, one per line. Module names
+# should match https://pypi.python.org/pypi names. For the full spec or
+# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html
+vcversioner
+click
+elasticsearch>=7.0.0,<8.0.0
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..a66caa1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+# Copyright (C) 2015-2018  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from setuptools import setup, find_packages
+
+from os import path
+from io import open
+
+here = path.abspath(path.dirname(__file__))
+
+# Get the long description from the README file
+with open(path.join(here, 'README.md'), encoding='utf-8') as f:
+    long_description = f.read()
+
+
+def parse_requirements(name=None):
+    if name:
+        reqf = 'requirements-%s.txt' % name
+    else:
+        reqf = 'requirements.txt'
+
+    requirements = []
+    if not path.exists(reqf):
+        return requirements
+
+    with open(reqf) as f:
+        for line in f.readlines():
+            line = line.strip()
+            if not line or line.startswith('#'):
+                continue
+            requirements.append(line)
+    return requirements
+
+
+setup(
+    name='swh.search',
+    description='Software Heritage search service',
+    long_description=long_description,
+    long_description_content_type='text/markdown',
+    author='Software Heritage developers',
+    author_email='swh-devel@inria.fr',
+    url='https://forge.softwareheritage.org/diffusion/DSEA',
+    packages=find_packages(),  # packages's modules
+    install_requires=parse_requirements() + parse_requirements('swh'),
+    tests_require=parse_requirements('test'),
+    entry_points='''
+        [swh.cli.subcommands]
+        search=swh.search.cli:search
+    ''',
+    setup_requires=['vcversioner'],
+    extras_require={'testing': parse_requirements('test')},
+    vcversioner={},
+    include_package_data=True,
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+        "Operating System :: OS Independent",
+        "Development Status :: 3 - Alpha",
+    ],
+    project_urls={
+        'Bug Reports': 'https://forge.softwareheritage.org/maniphest',
+        'Funding': 'https://www.softwareheritage.org/donate',
+        'Source': 'https://forge.softwareheritage.org/source/swh-search',
+    },
+)
diff --git a/swh/__init__.py b/swh/__init__.py
new file mode 100644
index 0000000..69e3be5
--- /dev/null
+++ b/swh/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/swh/search/__init__.py b/swh/search/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/search/cli.py b/swh/search/cli.py
new file mode 100644
index 0000000..69778b5
--- /dev/null
+++ b/swh/search/cli.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import click
+
+from swh.core.cli import CONTEXT_SETTINGS
+
+
+@click.group(name='search', context_settings=CONTEXT_SETTINGS)
+@click.pass_context
+def search(ctx):
+    '''Software Heritage Search tools.'''
+    pass
diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py
new file mode 100644
index 0000000..cf34c72
--- /dev/null
+++ b/swh/search/elasticsearch.py
@@ -0,0 +1,145 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import base64
+from typing import Iterable, Dict, List, Iterator
+
+from elasticsearch import Elasticsearch
+from elasticsearch.client import IndicesClient
+from elasticsearch.helpers import bulk, scan
+import msgpack
+
+from swh.model import model
+from swh.model.identifiers import origin_identifier
+
+
+class ElasticSearch:
+    def __init__(self, hosts: List[str]):
+        self._backend = Elasticsearch(hosts=hosts)
+
+    def check(self):
+        self._backend.ping()
+
+    def initialize(self) -> None:
+        self._backend.indices.create(
+            index='origin',
+            body={
+                'mappings': {
+                    'properties': {
+                        'url': {
+                            'type': 'text',
+                            # TODO: consider removing fielddata when
+                            # swh-storage allows querying by hash, so the
+                            # full URL does not have to be stored in ES'
+                            # memory. See:
+                            # https://www.elastic.co/guide/en/elasticsearch/reference/current/fielddata.html#before-enabling-fielddata
+                            'fielddata': True,
+                            'analyzer': 'simple',
+                            'fields': {
+                                'as_you_type': {
+                                    'type': 'search_as_you_type',
+                                    'analyzer': 'simple',
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        )
+
+    def origin_add(self, origins: Iterable[model.Origin]) -> None:
+        origins = (origin.to_dict() for origin in origins)
+        '''
+        for origin in origins:
+            self._backend.index(
+                index='origin',
+                id=origin_identifier(origin),
+                body=origin,
+            )
+        self._backend.indices.refresh(index='origin')
+        '''
+        actions = [
+            {
+                '_id': origin_identifier(origin),
+                '_index': 'origin',
+                '_source': origin,
+            }
+            for origin in origins
+        ]
+        res = bulk(self._backend, actions, index='origin', refresh='wait_for')
+
+    def origin_dump(self) -> Iterator[model.Origin]:
+        results = list(scan(self._backend, index='*'))
+        for hit in results:
+            yield self._backend.termvectors(
+                index='origin', id=hit['_id'],
+                fields=['url', 'url.as_you_type', 'url.as_you_type._2gram'
+                        'url.as_you_type._3gram', 'url._2gram', 'url._3gram'])
+
+    def origin_search(
+            self, url_substring: str, cursor: str = None, count: int = 50
+            ) -> Dict[str, object]:
+        """Searches for origins matching the `url_substring`.
+
+        Args:
+            url_substring (str): Part of thr URL to search for
+            cursor (str): `cursor` is opaque value used for pagination.
+            count (int): number of results to return.
+
+        Returns:
+            a dictionary with keys:
+            * `cursor`:
+              opaque value used for fetching more results. `None` if there
+              are no more result.
+            * `results`:
+              list of dictionaries with key:
+              * `url`: URL of a matching origin
+        """
+        body = {
+            'query': {
+                'multi_match': {
+                    'query': url_substring,
+                    'type': 'bool_prefix',
+                    'fields': [
+                        'url.as_you_type',
+                        'url.as_you_type._2gram',
+                        'url.as_you_type._3gram',
+                    ]
+                }
+            },
+            'size': count,
+            'sort': [
+                {'_score': 'desc'},
+                {'url': 'asc'},
+            ]
+        }
+        if cursor:
+            cursor = msgpack.decode(base64.b64decode(cursor))
+            body['search_after'] = [cursor['_score'], cursor['url']]
+
+        res = self._backend.search(
+            index='origin',
+            body=body,
+            size=count,
+        )
+
+        hits = res['hits']['hits']
+
+        if len(hits) == count:
+            last_hit = hits[-1]
+            next_cursor = {
+                'score': last_hit['_score'],
+                'url': last_hit['_source']['url'],
+            }
+            next_cursor = base64.b64encode(msgpack.dumps(next_cursor))
+        else:
+            next_cursor = None
+
+        return {
+            'cursor': next_cursor,
+            'results': [
+                {'url': hit['_source']['url'] for hit in hits}
+            ]
+        }
diff --git a/swh/search/tests/__init__.py b/swh/search/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swh/search/tests/conftest.py b/swh/search/tests/conftest.py
new file mode 100644
index 0000000..c37cef7
--- /dev/null
+++ b/swh/search/tests/conftest.py
@@ -0,0 +1,108 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+import socket
+import subprocess
+import time
+
+import elasticsearch
+import pytest
+
+def free_port():
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.bind(('127.0.0.1', 0))
+    port = sock.getsockname()[1]
+    sock.close()
+    return port
+
+
+def wait_for_peer(addr, port):
+    while True:
+        try:
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect((addr, port))
+        except ConnectionRefusedError:
+            time.sleep(0.1)
+        else:
+            sock.close()
+            break
+
+
+CONFIG_TEMPLATE = '''
+node.name: node-1
+path.data: {data}
+path.logs: {logs}
+network.host: 127.0.0.1
+http.port: {http_port}
+transport.port: {transport_port}
+'''
+
+def _run_elasticsearch(conf_dir, data_dir, logs_dir, http_port, transport_port):
+    es_home = '/usr/share/elasticsearch'
+
+    with open(conf_dir + '/elasticsearch.yml', 'w') as fd:
+        fd.write(CONFIG_TEMPLATE.format(
+            data=data_dir,
+            logs=logs_dir,
+            http_port=http_port,
+            transport_port=transport_port))
+
+    with open(conf_dir + '/log4j2.properties', 'w') as fd:
+        pass
+
+    cmd = [
+        '/usr/share/elasticsearch/jdk/bin/java',
+        '-Des.path.home={}'.format(es_home),
+        '-Des.path.conf={}'.format(conf_dir),
+        '-Des.bundled_jdk=true',
+        '-Dlog4j2.disable.jmx=true',
+        '-cp', '{}/lib/*'.format(es_home),
+        'org.elasticsearch.bootstrap.Elasticsearch',
+    ]
+
+    host = '127.0.0.1:{}'.format(http_port)
+
+    with open(logs_dir + '/output.txt', 'w') as fd:
+        p = subprocess.Popen(cmd) #, stdout=fd, stderr=fd)
+
+    wait_for_peer('127.0.0.1', http_port)
+
+    client = elasticsearch.Elasticsearch([host])
+    assert client.ping()
+
+    return p
+
+@pytest.fixture(scope='session')
+def elasticsearch_session(tmpdir_factory):
+    tmpdir = tmpdir_factory.mktemp('elasticsearch')
+    es_conf = tmpdir.mkdir('conf')
+
+    http_port = free_port()
+    http_port = 9200
+    transport_port = free_port()
+
+    p = _run_elasticsearch(
+        conf_dir=str(es_conf),
+        data_dir=str(tmpdir.mkdir('data')),
+        logs_dir=str(tmpdir.mkdir('logs')),
+        http_port=http_port,
+        transport_port=transport_port,
+    )
+
+    yield '127.0.0.1:{}'.format(http_port)
+
+    # Check ES didn't stop
+    assert p.returncode is None, p.returncode
+
+    p.kill()
+    p.wait()
+
+
+@pytest.fixture(scope='function')
+def elasticsearch_host(elasticsearch_session):
+    client = elasticsearch.Elasticsearch([elasticsearch_session])
+    client.indices.delete(index='*')
+    yield elasticsearch_session
diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py
new file mode 100644
index 0000000..f23d78f
--- /dev/null
+++ b/swh/search/tests/test_search.py
@@ -0,0 +1,37 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import unittest
+
+import pytest
+
+from swh.model.model import Origin
+from swh.search.elasticsearch import ElasticSearch
+
+
+def test_origin_url_unique_substring(elasticsearch_host):
+    search = ElasticSearch([elasticsearch_host])
+    search.initialize()
+    search.origin_add([
+        Origin(url='http://foobar.baz', type=None),
+        Origin(url='http://barbaz.qux', type=None),
+    ])
+    search.origin_dump()
+
+    results = search.origin_search('foobar')
+    assert results == {'cursor': None, 'results': [{'url': 'http://foobar.baz'}]}
+
+    results = search.origin_search('barb')
+    assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]}
+
+    # 'bar' is part of 'foobar', but is not the beginning of it
+    results = search.origin_search('bar')
+    assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]}
+
+    results = search.origin_search('barbaz')
+    assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]}
+
+    results = search.origin_search('qux')
+    assert results == {'cursor': None, 'results': [{'url': 'http://barbaz.qux'}]}
diff --git a/tox.ini b/tox.ini
index 335f4ed..5b56c67 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,23 +1,24 @@
 [tox]
 envlist=check-manifest,flake8,py3
 
 [testenv:py3]
 deps =
   .[testing]
   pytest-cov
+  https://github.com/ClearcodeHQ/pytest-elasticsearch/tarball/master#egg=pytest-elasticsearch
 commands =
   pytest --cov=swh --cov-branch {posargs}
 
 [testenv:flake8]
 skip_install = true
 deps =
   flake8
 commands =
   {envpython} -m flake8
 
 [testenv:check-manifest]
 skip_install = true
 deps =
   check-manifest
 commands =
   {envpython} -m check_manifest {toxinidir}