Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123758
D6796.id24859.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
62 KB
Subscribers
None
D6796.id24859.diff
View Options
diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@
version.txt
.tox
.mypy_cache/
+winery-test-environment/context
+
+
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,4 @@
include requirements*.txt
include version.txt
recursive-include swh py.typed
+exclude winery-test-environment
diff --git a/docs/index.rst b/docs/index.rst
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -13,4 +13,5 @@
:maxdepth: 2
cli
+ winery
/apidoc/swh.objstorage
diff --git a/docs/winery.rst b/docs/winery.rst
new file mode 100644
--- /dev/null
+++ b/docs/winery.rst
@@ -0,0 +1,42 @@
+.. _swh-objstorage-winery:
+
+Winery backend
+==============
+
+The Winery backend implements the `Ceph based object storage architecture <https://wiki.softwareheritage.org/wiki/A_practical_approach_to_efficiently_store_100_billions_small_objects_in_Ceph>`__.
+
+Benchmarks
+----------
+
+Setup via Fed4Fire
+~~~~~~~~~~~~~~~~~~
+
+* https://www.fed4fire.eu/
+
+Create a base rspec specification.
+
+* /opt/jFed/jFed-Experimenter
+* In the General Tab
+* Create an experiment (New)
+* Add one Physical Node by dragging it
+* Right click on the node and choose "Configure Node"
+* Select testbed: Grid 5000
+* Node => Specific hardware type: dahu-grenoble
+* Disk image => Bullseye base
+* Save under sample.rspec
+* Manually edit to duplicate the nodes
+
+Run the experiment.
+
+* /opt/jFed/jFed-Experimenter
+* In the General Tab
+* Open Local and load winery-test-environment/fed4fire.rspec
+* Edit ceph1 node to check if the Specific hardware type is dahu-grenoble
+* Click on Topology Viewer
+* Run
+* Give a unique name to the experiment
+* Start experiment
+* Once the provisionning is complete (Testing connectivity to resources on Grid5000) click "Export As"
+* Choose "Export Configuration Management Settings"
+* Save under /tmp/test.zip
+* ( cd winery-test-environment ; ./fed4fire.sh /tmp/test.zip )
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -19,3 +19,12 @@
[mypy-requests_toolbelt.*]
ignore_missing_imports = True
+
+[mypy-psycopg2.*]
+ignore_missing_imports = True
+
+[mypy-swh.perfecthash.*]
+ignore_missing_imports = True
+
+[mypy-sh.*]
+ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,3 @@
swh.core[http] >= 0.3
swh.model >= 0.0.27
+swh.perfecthash
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,7 +1,11 @@
apache-libcloud
azure-storage-blob >= 12.0, != 12.9.0 # version 12.9.0 breaks mypy https://github.com/Azure/azure-sdk-for-python/pull/20891
pytest
+pytest-asyncio
+pytest-mock
requests_mock[fixture] >= 1.9
requests_toolbelt
types-pyyaml
types-requests
+pytest-postgresql < 4.0.0 # version 4.0 depends on psycopg 3. https://github.com/ClearcodeHQ/pytest-postgresql/blob/main/CHANGES.rst#400
+
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,8 @@
aiohttp >= 3
click
requests
+psycopg2
+sh
# optional dependencies
# apache-libcloud
diff --git a/swh/objstorage/backends/winery/__init__.py b/swh/objstorage/backends/winery/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/__init__.py
@@ -0,0 +1 @@
+from .objstorage import WineryObjStorage # noqa: F401
diff --git a/swh/objstorage/backends/winery/bench.py b/swh/objstorage/backends/winery/bench.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/bench.py
@@ -0,0 +1,145 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import asyncio
+import concurrent.futures
+import logging
+import os
+import random
+import time
+
+from swh.objstorage.factory import get_objstorage
+
+LOGGER = logging.getLogger(__name__)
+
+
+def work(kind, args):
+ return Worker(args).run(kind)
+
+
+class Worker(object):
+ def __init__(self, args):
+ self.args = args
+
+ def run(self, kind):
+ getattr(self, kind)()
+ return kind
+
+ def ro(self):
+ self.winery = get_objstorage(
+ cls="winery",
+ readonly=True,
+ base_dsn=self.args["base_dsn"],
+ shard_dsn=self.args["shard_dsn"],
+ shard_max_size=self.args["shard_max_size"],
+ )
+ with self.winery.base.db.cursor() as c:
+ while True:
+ c.execute(
+ "SELECT signature FROM signature2shard WHERE inflight = FALSE "
+ "ORDER BY random() LIMIT %s",
+ (self.args["ro_worker_max_request"],),
+ )
+ if c.rowcount > 0:
+ break
+ LOGGER.info(f"Worker(ro, {os.getpid()}): empty, waiting")
+ time.sleep(1)
+ LOGGER.info(f"Worker(ro, {os.getpid()}): requesting {c.rowcount} objects")
+ start = time.time()
+ for row in c:
+ obj_id = row[0].tobytes()
+ assert self.winery.get(obj_id) is not None
+ elapsed = time.time() - start
+ LOGGER.info(f"Worker(ro, {os.getpid()}): finished ({elapsed:.2f}s)")
+
+ def payloads_define(self):
+ self.payloads = [
+ 3 * 1024,
+ 3 * 1024,
+ 3 * 1024,
+ 3 * 1024,
+ 3 * 1024,
+ 10 * 1024,
+ 13 * 1024,
+ 16 * 1024,
+ 70 * 1024,
+ 80 * 1024,
+ ]
+
+ def rw(self):
+ self.winery = get_objstorage(
+ cls="winery",
+ base_dsn=self.args["base_dsn"],
+ shard_dsn=self.args["shard_dsn"],
+ shard_max_size=self.args["shard_max_size"],
+ )
+ self.payloads_define()
+ random_content = open("/dev/urandom", "rb")
+ LOGGER.info(f"Worker(rw, {os.getpid()}): start")
+ start = time.time()
+ count = 0
+ while len(self.winery.packers) == 0:
+ content = random_content.read(random.choice(self.payloads))
+ self.winery.add(content=content)
+ count += 1
+ LOGGER.info(f"Worker(rw, {os.getpid()}): packing {count} objects")
+ packer = self.winery.packers[0]
+ packer.join()
+ assert packer.exitcode == 0
+ elapsed = time.time() - start
+ LOGGER.info(f"Worker(rw, {os.getpid()}): finished ({elapsed:.2f}s)")
+
+
+class Bench(object):
+ def __init__(self, args):
+ self.args = args
+
+ def timer_start(self):
+ self.start = time.time()
+
+ def timeout(self):
+ return time.time() - self.start > self.args["duration"]
+
+ async def run(self):
+ self.timer_start()
+
+ loop = asyncio.get_running_loop()
+
+ workers_count = self.args["rw_workers"] + self.args["ro_workers"]
+ with concurrent.futures.ProcessPoolExecutor(
+ max_workers=workers_count
+ ) as executor:
+
+ LOGGER.info("Bench.run: running")
+
+ self.count = 0
+ workers = set()
+
+ def create_worker(kind):
+ self.count += 1
+ LOGGER.info(f"Bench.run: launched {kind} worker number {self.count}")
+ return loop.run_in_executor(executor, work, kind, self.args)
+
+ for kind in ["rw"] * self.args["rw_workers"] + ["ro"] * self.args[
+ "ro_workers"
+ ]:
+ workers.add(create_worker(kind))
+
+ while len(workers) > 0:
+ LOGGER.info(f"Bench.run: waiting for {len(workers)} workers")
+ current = workers
+ done, pending = await asyncio.wait(
+ current, return_when=asyncio.FIRST_COMPLETED
+ )
+ workers = pending
+ for task in done:
+ kind = task.result()
+ LOGGER.info(f"Bench.run: worker {kind} complete")
+ if not self.timeout():
+ workers.add(create_worker(kind))
+
+ LOGGER.info("Bench.run: finished")
+
+ return self.count
diff --git a/swh/objstorage/backends/winery/database.py b/swh/objstorage/backends/winery/database.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/database.py
@@ -0,0 +1,84 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import time
+
+import psycopg2
+
+LOGGER = logging.getLogger(__name__)
+
+
+class Database:
+ def __init__(self, dsn):
+ self.dsn = dsn
+
+ def create_database(self, database):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ f"SELECT datname FROM pg_catalog.pg_database WHERE datname = '{database}'"
+ )
+ if c.rowcount == 0:
+ try:
+ c.execute(f"CREATE DATABASE {database}")
+ except psycopg2.errors.UniqueViolation:
+ # someone else created the database, it is fine
+ pass
+ c.close()
+
+ def drop_database(self, database):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ # https://wiki.postgresql.org/wiki/Psycopg2_Tutorial
+ # If you want to drop the database you would need to
+ # change the isolation level of the database.
+ db.set_isolation_level(0)
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ "SELECT pg_terminate_backend(pg_stat_activity.pid)"
+ "FROM pg_stat_activity "
+ "WHERE pg_stat_activity.datname = %s;",
+ (database,),
+ )
+ #
+ # Dropping the database may fail because the server takes time
+ # to notice a connection was dropped and/or a named cursor is
+ # in the process of being deleted. It can happen here or even
+ # when deleting all database with the psql cli
+ # and there are no bench process active.
+ #
+ # ERROR: database "i606428a5a6274d1ab09eecc4d019fef7" is being
+ # accessed by other users DETAIL: There is 1 other session
+ # using the database.
+ #
+ # See:
+ # https://stackoverflow.com/questions/5108876/kill-a-postgresql-session-connection
+ #
+ # https://www.postgresql.org/docs/current/sql-dropdatabase.html
+ #
+ # WITH (FORCE) added in postgresql 13 but may also fail because the
+ # named cursor may not be handled as a client.
+ #
+ for i in range(60):
+ try:
+ c.execute(f"DROP DATABASE IF EXISTS {database}")
+ break
+ except psycopg2.errors.ObjectInUse:
+ LOGGER.warning(f"{database} database drop fails, waiting")
+ time.sleep(10)
+ continue
+ raise Exception("database drop fails {database}")
+ c.close()
+
+ def list_databases(self):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ with db.cursor() as c:
+ c.execute(
+ "SELECT datname FROM pg_database "
+ "WHERE datistemplate = false and datname != 'postgres'"
+ )
+ return [r[0] for r in c.fetchall()]
diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -0,0 +1,133 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+from multiprocessing import Process
+
+from swh.objstorage import exc
+from swh.objstorage.objstorage import ObjStorage, compute_hash
+
+from .roshard import ROShard
+from .rwshard import RWShard
+from .sharedbase import SharedBase
+
+LOGGER = logging.getLogger(__name__)
+
+
+def pack(shard, **kwargs):
+ return Packer(shard, **kwargs).run()
+
+
+class Packer:
+ def __init__(self, shard, **kwargs):
+ self.args = kwargs
+ self.shard = shard
+ self.init()
+
+ def init(self):
+ self.rw = RWShard(self.shard, **self.args)
+ self.ro = ROShard(self.shard, **self.args)
+
+ def uninit(self):
+ del self.ro
+ self.rw.uninit()
+
+ def run(self):
+ shard = self.ro.create(self.rw.count())
+ for obj_id, content in self.rw.all():
+ shard.write(obj_id, content)
+ shard.save()
+ base = SharedBase(**self.args)
+ base.shard_packing_ends(self.shard)
+ base.uninit()
+ self.rw.uninit()
+ self.rw.drop()
+ return True
+
+
+class WineryObjStorage(ObjStorage):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.args = kwargs
+ self.packers = []
+ self.init()
+
+ def init(self):
+ self.base = SharedBase(**self.args)
+ if not self.args.get("readonly"):
+ self.shard = RWShard(self.base.whoami, **self.args)
+
+ def uninit(self):
+ if not self.args.get("readonly"):
+ self.shard.uninit()
+ self.base.uninit()
+
+ def roshard(self, name):
+ shard = ROShard(name, **self.args)
+ shard.load()
+ return shard
+
+ def get(self, obj_id):
+ shard_info = self.base.get(obj_id)
+ if shard_info is None:
+ raise exc.ObjNotFoundError(obj_id)
+ name, readonly = shard_info
+ if readonly:
+ shard = self.roshard(name)
+ content = shard.get(obj_id)
+ del shard
+ else:
+ shard = RWShard(name, **self.args)
+ content = shard.get(obj_id)
+ if content is None:
+ raise exc.ObjNotFoundError(obj_id)
+ return content
+
+ def check_config(self, *, check_write):
+ return True
+
+ def __contains__(self, obj_id):
+ return self.base.contains(obj_id)
+
+ def add(self, content, obj_id=None, check_presence=True):
+ if obj_id is None:
+ obj_id = compute_hash(content)
+
+ if check_presence and obj_id in self:
+ return obj_id
+
+ shard = self.base.add_phase_1(obj_id)
+ if shard != self.base.id:
+ # this object is the responsibility of another shard
+ return obj_id
+
+ self.shard.add(obj_id, content)
+ self.base.add_phase_2(obj_id)
+
+ if self.shard.is_full():
+ self.pack()
+
+ return obj_id
+
+ def check(self, obj_id):
+ # load all shards readonly == NULL and not locked (i.e. packer
+ # was interrupted for whatever reason) run pack for each of them
+ pass
+
+ def delete(self, obj_id):
+ raise PermissionError("Delete is not allowed.")
+
+ def pack(self):
+ self.base.shard_packing_starts()
+ p = Process(target=pack, args=(self.shard.name,), kwargs=self.args)
+ self.uninit()
+ p.start()
+ self.packers.append(p)
+ self.init()
+
+ def __del__(self):
+ for p in self.packers:
+ p.kill()
+ p.join()
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -0,0 +1,119 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+
+import sh
+
+from swh.perfecthash import Shard
+
+LOGGER = logging.getLogger(__name__)
+
+
+class Pool(object):
+ name = "shards"
+
+ def __init__(self, **kwargs):
+ self.args = kwargs
+
+ def init(self):
+ self.rbd = sh.sudo.bake("rbd", f"--pool={self.name}")
+ self.ceph = sh.sudo.bake("ceph")
+ self.image_size = self.args["shard_max_size"] * 2
+
+ def uninit(self):
+ pass
+
+ def image_list(self):
+ try:
+ self.rbd.ls()
+ except sh.ErrorReturnCode_2 as e:
+ if "No such file or directory" in e.args[0]:
+ return []
+ else:
+ raise
+ return [image.strip() for image in self.rbd.ls()]
+
+ def image_path(self, image):
+ return f"/dev/rbd/{self.name}/{image}"
+
+ def image_create(self, image):
+ LOGGER.info(f"rdb --pool {self.name} create --size={self.image_size} {image}")
+ self.rbd.create(
+ f"--size={self.image_size}", f"--data-pool={self.name}-data", image
+ )
+ self.rbd.feature.disable(
+ f"{self.name}/{image}", "object-map", "fast-diff", "deep-flatten"
+ )
+ self.image_map(image, "rw")
+
+ def image_map(self, image, options):
+ self.rbd.device("map", "-o", options, image)
+ sh.sudo("chmod", "777", self.image_path(image))
+
+ def image_remap_ro(self, image):
+ self.image_unmap(image)
+ self.image_map(image, "ro")
+
+ def image_unmap(self, image):
+ self.rbd.device.unmap(f"{self.name}/{image}", _ok_code=(0, 22))
+
+ def image_delete(self, image):
+ self.image_unmap(image)
+ LOGGER.info(f"rdb --pool {self.name} remove {image}")
+ self.rbd.remove(image)
+
+ def images_clobber(self):
+ for image in self.image_list():
+ image = image.strip()
+ self.image_unmap(image)
+
+ def clobber(self):
+ self.images_clobber()
+ self.pool_clobber()
+
+ def pool_clobber(self):
+ LOGGER.info(f"ceph osd pool delete {self.name}")
+ self.ceph.osd.pool.delete(self.name, self.name, "--yes-i-really-really-mean-it")
+ data = f"{self.name}-data"
+ LOGGER.info(f"ceph osd pool delete {data}")
+ self.ceph.osd.pool.delete(data, data, "--yes-i-really-really-mean-it")
+
+ def pool_create(self):
+ data = f"{self.name}-data"
+ LOGGER.info(f"ceph osd pool create {data}")
+ self.ceph.osd(
+ "erasure-code-profile",
+ "set",
+ "--force",
+ data,
+ "k=4",
+ "m=2",
+ "crush-failure-domain=host",
+ )
+ self.ceph.osd.pool.create(data, "200", "erasure", data)
+ self.ceph.osd.pool.set(data, "allow_ec_overwrites", "true")
+ self.ceph.osd.pool.set(data, "pg_autoscale_mode", "off")
+ LOGGER.info(f"ceph osd pool create {self.name}")
+ self.ceph.osd.pool.create(self.name)
+
+
+class ROShard:
+ def __init__(self, name, **kwargs):
+ self.pool = Pool(shard_max_size=kwargs["shard_max_size"])
+ self.pool.init()
+ self.name = name
+
+ def create(self, count):
+ self.pool.image_create(self.name)
+ self.shard = Shard(self.pool.image_path(self.name))
+ return self.shard.create(count)
+
+ def load(self):
+ self.shard = Shard(self.pool.image_path(self.name))
+ return self.shard.load() == self.shard
+
+ def get(self, key):
+ return self.shard.lookup(key)
diff --git a/swh/objstorage/backends/winery/rwshard.py b/swh/objstorage/backends/winery/rwshard.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/rwshard.py
@@ -0,0 +1,87 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import psycopg2
+
+from .database import Database
+
+
+class RWShard(Database):
+ def __init__(self, name, **kwargs):
+ super().__init__(kwargs["shard_dsn"])
+ self._name = name
+ self.create_database(self.name)
+ self.db = self.create_table(f"{self.dsn}/{self.name}")
+ self.size = self.total_size()
+ self.limit = kwargs["shard_max_size"]
+
+ def uninit(self):
+ if hasattr(self, "db"):
+ self.db.close()
+ del self.db
+
+ @property
+ def name(self):
+ return self._name
+
+ def is_full(self):
+ return self.size > self.limit
+
+ def drop(self):
+ self.drop_database(self.name)
+
+ def create_table(self, dsn):
+ db = psycopg2.connect(dsn)
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS objects("
+ "key BYTEA PRIMARY KEY, "
+ "content BYTEA "
+ ")"
+ )
+ c.close()
+ return db
+
+ def total_size(self):
+ with self.db.cursor() as c:
+ c.execute("SELECT SUM(LENGTH(content)) FROM objects")
+ size = c.fetchone()[0]
+ if size is None:
+ return 0
+ else:
+ return size
+
+ def add(self, obj_id, content):
+ try:
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO objects (key, content) VALUES (%s, %s)",
+ (obj_id, content),
+ )
+ self.db.commit()
+ self.size += len(content)
+ except psycopg2.errors.UniqueViolation:
+ pass
+
+ def get(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute("SELECT content FROM objects WHERE key = %s", (obj_id,))
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()[0].tobytes()
+
+ def all(self):
+ with self.db.cursor() as c:
+ c.execute("SELECT key,content FROM objects")
+ for row in c:
+ yield row[0].tobytes(), row[1].tobytes()
+
+ def count(self):
+ with self.db.cursor() as c:
+ c.execute("SELECT COUNT(*) FROM objects")
+ return c.fetchone()[0]
diff --git a/swh/objstorage/backends/winery/sharedbase.py b/swh/objstorage/backends/winery/sharedbase.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/sharedbase.py
@@ -0,0 +1,188 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import uuid
+
+import psycopg2
+
+from .database import Database
+
+
+class SharedBase(Database):
+ def __init__(self, **kwargs):
+ super().__init__(kwargs["base_dsn"])
+ database = "sharedbase"
+ self.create_database(database)
+ self.db = self.create_table(f"{self.dsn}/{database}")
+ self._whoami = None
+
+ def uninit(self):
+ self.db.close()
+ del self.db
+
+ def create_table(self, dsn):
+ db = psycopg2.connect(dsn)
+ db.autocommit = True
+ c = db.cursor()
+ lock = 314116
+ c.execute("SELECT pg_advisory_lock(%s)", (lock,))
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS shards("
+ "id SERIAL PRIMARY KEY, "
+ "readonly BOOLEAN NOT NULL, "
+ "packing BOOLEAN NOT NULL, "
+ "name CHAR(32) NOT NULL "
+ ")"
+ )
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS signature2shard("
+ "signature BYTEA PRIMARY KEY, "
+ "inflight BOOLEAN NOT NULL, "
+ "shard INTEGER NOT NULL"
+ ")"
+ )
+ c.close()
+ db.close() # so the pg_advisory_lock is released
+ db = psycopg2.connect(dsn)
+ db.autocommit = True
+ return db
+
+ @property
+ def whoami(self):
+ self.set_whoami()
+ return self._whoami
+
+ @property
+ def id(self):
+ self.set_whoami()
+ return self._whoami_id
+
+ def set_whoami(self):
+ if self._whoami is not None:
+ return
+
+ while True:
+ self._whoami, self._whoami_id = self.lock_a_shard()
+ if self._whoami is not None:
+ return self._whoami
+ self.create_shard()
+
+ def lock_a_shard(self):
+ with self.db.cursor() as c:
+ c.execute(
+ "SELECT name FROM shards WHERE readonly = FALSE and packing = FALSE "
+ "LIMIT 1 FOR UPDATE SKIP LOCKED"
+ )
+ if c.rowcount == 0:
+ return None, None
+ name = c.fetchone()[0]
+ return self.lock_shard(name)
+
+ def lock_shard(self, name):
+ self.whoami_lock = self.db.cursor()
+ try:
+ self.whoami_lock.execute(
+ "SELECT name, id FROM shards "
+ "WHERE readonly = FALSE AND packing = FALSE AND name = %s "
+ "FOR UPDATE NOWAIT",
+ (name,),
+ )
+ return self.whoami_lock.fetchone()
+ except psycopg2.Error:
+ return None
+
+ def unlock_shard(self):
+ del self.whoami_lock
+
+ def create_shard(self):
+ name = uuid.uuid4().hex
+ #
+ # ensure the first character is not a number so it can be used as a
+ # database name.
+ #
+ name = "i" + name[1:]
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO shards (name, readonly, packing) "
+ "VALUES (%s, FALSE, FALSE)",
+ (name,),
+ )
+ self.db.commit()
+
+ def shard_packing_starts(self):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE shards SET packing = TRUE WHERE name = %s", (self.whoami,)
+ )
+ self.unlock_shard()
+
+ def shard_packing_ends(self, name):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE shards SET readonly = TRUE, packing = FALSE " "WHERE name = %s",
+ (name,),
+ )
+
+ def get_shard_info(self, id):
+ with self.db.cursor() as c:
+ c.execute("SELECT name, readonly FROM shards WHERE id = %s", (id,))
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()
+
+ def get_shard_info_by_name(self, name):
+ with self.db.cursor() as c:
+ c.execute("SELECT readonly, packing FROM shards WHERE name = %s", (name,))
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()
+
+ def list_shards(self):
+ with self.db.cursor() as c:
+ c.execute("SELECT name, readonly, packing FROM shards")
+ for row in c:
+ yield row[0], row[1], row[2]
+
+ def contains(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute(
+ "SELECT shard FROM signature2shard WHERE "
+ "signature = %s AND inflight = FALSE",
+ (obj_id,),
+ )
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()[0]
+
+ def get(self, obj_id):
+ id = self.contains(obj_id)
+ if id is None:
+ return None
+ return self.get_shard_info(id)
+
+ def add_phase_1(self, obj_id):
+ try:
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO signature2shard (signature, shard, inflight) "
+ "VALUES (%s, %s, TRUE)",
+ (obj_id, self.id),
+ )
+ self.db.commit()
+ return self.id
+ except psycopg2.errors.UniqueViolation:
+ return self.contains(obj_id)
+
+ def add_phase_2(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE signature2shard SET inflight = FALSE "
+ "WHERE signature = %s AND shard = %s",
+ (obj_id, self.id),
+ )
+ self.db.commit()
diff --git a/swh/objstorage/factory.py b/swh/objstorage/factory.py
--- a/swh/objstorage/factory.py
+++ b/swh/objstorage/factory.py
@@ -13,6 +13,7 @@
from swh.objstorage.backends.noop import NoopObjStorage
from swh.objstorage.backends.pathslicing import PathSlicingObjStorage
from swh.objstorage.backends.seaweedfs import SeaweedFilerObjStorage
+from swh.objstorage.backends.winery import WineryObjStorage
from swh.objstorage.multiplexer import MultiplexerObjStorage, StripingObjStorage
from swh.objstorage.multiplexer.filter import add_filters
from swh.objstorage.objstorage import ID_HASH_LENGTH, ObjStorage # noqa
@@ -27,6 +28,7 @@
"seaweedfs": SeaweedFilerObjStorage,
"random": RandomGeneratorObjStorage,
"http": HTTPReadOnlyObjStorage,
+ "winery": WineryObjStorage,
"noop": NoopObjStorage,
}
diff --git a/swh/objstorage/tests/conftest.py b/swh/objstorage/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/conftest.py
@@ -0,0 +1,35 @@
+def pytest_configure(config):
+ config.addinivalue_line("markers", "shard_max_size: winery backend")
+
+
+def pytest_addoption(parser):
+ parser.addoption(
+ "--winery-bench-rw-workers",
+ type=int,
+ help="Number of Read/Write workers",
+ default=1,
+ )
+ parser.addoption(
+ "--winery-bench-ro-workers",
+ type=int,
+ help="Number of Readonly workers",
+ default=1,
+ )
+ parser.addoption(
+ "--winery-bench-duration",
+ type=int,
+ help="Duration of the benchmarks in seconds",
+ default=1,
+ )
+ parser.addoption(
+ "--winery-shard-max-size",
+ type=int,
+ help="Size of the shard in bytes",
+ default=10 * 1024 * 1024,
+ )
+ parser.addoption(
+ "--winery-bench-ro-worker-max-request",
+ type=int,
+ help="Number of requests a ro worker performs",
+ default=1,
+ )
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -0,0 +1,221 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import time
+
+import pytest
+import sh
+
+from swh.objstorage import exc
+from swh.objstorage.backends.winery.bench import Bench, work
+from swh.objstorage.backends.winery.database import Database
+from swh.objstorage.backends.winery.objstorage import Packer, pack
+from swh.objstorage.backends.winery.roshard import Pool
+from swh.objstorage.factory import get_objstorage
+
+
+@pytest.fixture
+def needs_ceph():
+ try:
+ sh.ceph("--version")
+ except sh.CommandNotFound:
+ pytest.skip("the ceph CLI was not found")
+
+
+@pytest.fixture
+def ceph_pool(needs_ceph):
+ pool = Pool(shard_max_size=10 * 1024 * 1024)
+ pool.init()
+ pool.clobber()
+ pool.pool_create()
+
+ yield pool
+
+ pool.images_clobber()
+ pool.clobber()
+
+
+@pytest.fixture
+def winery(request, postgresql):
+ marker = request.node.get_closest_marker("shard_max_size")
+ if marker is None:
+ shard_max_size = 1024
+ else:
+ shard_max_size = marker.args[0]
+ dsn = (
+ f"postgres://{postgresql.info.user}"
+ f":@{postgresql.info.host}:{postgresql.info.port}"
+ )
+ winery = get_objstorage(
+ cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=shard_max_size
+ )
+ yield winery
+ winery.uninit()
+ #
+ # pytest-postgresql will not remove databases that it did not
+ # create between tests (only at the very end).
+ #
+ d = Database(dsn)
+ for database in d.list_databases():
+ if database != postgresql.info.dbname and database != "tests_tmpl":
+ d.drop_database(database)
+
+
+def test_winery_sharedbase(winery):
+ base = winery.base
+ shard1 = base.whoami
+ assert shard1 is not None
+ assert shard1 == base.whoami
+
+ id1 = base.id
+ assert id1 is not None
+ assert id1 == base.id
+
+
+def test_winery_add_get(winery):
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ obj_id = winery.add(content=content)
+ assert obj_id.hex() == "0c8c841f7d9fd4874d841506d3ffc16808b1d579"
+ assert winery.add(content=content, obj_id=obj_id) == obj_id
+ assert winery.add(content=content, obj_id=obj_id, check_presence=False) == obj_id
+ assert winery.base.whoami == shard
+ assert winery.get(obj_id) == content
+ with pytest.raises(exc.ObjNotFoundError):
+ winery.get(b"unknown")
+ winery.shard.drop()
+
+
+@pytest.mark.shard_max_size(1)
+def test_winery_add_and_pack(winery, mocker):
+ mocker.patch("swh.objstorage.backends.winery.objstorage.pack", return_value=True)
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ obj_id = winery.add(content=content)
+ assert obj_id.hex() == "0c8c841f7d9fd4874d841506d3ffc16808b1d579"
+ assert winery.base.whoami != shard
+ assert len(winery.packers) == 1
+ packer = winery.packers[0]
+ packer.join()
+ assert packer.exitcode == 0
+
+
+def test_winery_delete(winery):
+ with pytest.raises(PermissionError):
+ winery.delete(None)
+
+
+def test_winery_get_shard_info(winery):
+ assert winery.base.get_shard_info(1234) is None
+ assert winery.base.get_shard_info_by_name("nothing") is None
+
+
+@pytest.mark.shard_max_size(10 * 1024 * 1024)
+def test_winery_packer(winery, ceph_pool):
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ winery.add(content=content)
+ winery.base.shard_packing_starts()
+ packer = Packer(shard, **winery.args)
+ try:
+ assert packer.run() is True
+ finally:
+ packer.uninit()
+
+ readonly, packing = winery.base.get_shard_info_by_name(shard)
+ assert readonly is True
+ assert packing is False
+
+
+@pytest.mark.shard_max_size(10 * 1024 * 1024)
+def test_winery_get_object(winery, ceph_pool):
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ obj_id = winery.add(content=content)
+ winery.base.shard_packing_starts()
+ assert pack(shard, **winery.args) is True
+ winery.base.get_shard_info_by_name(shard)
+ assert winery.get(obj_id) == content
+
+
+def test_winery_ceph_pool(needs_ceph):
+ name = "IMAGE"
+ pool = Pool(shard_max_size=10 * 1024 * 1024)
+ pool.init()
+ pool.clobber()
+ pool.pool_create()
+ pool.image_create(name)
+ p = pool.image_path(name)
+ assert p.endswith(name)
+ something = "SOMETHING"
+ open(p, "w").write(something)
+ assert open(p).read(len(something)) == something
+ assert pool.image_list() == [name]
+ pool.image_remap_ro(name)
+ pool.images_clobber()
+ assert pool.image_list() == [name]
+ pool.clobber()
+ assert pool.image_list() == []
+
+
+@pytest.mark.shard_max_size(10 * 1024 * 1024)
+def test_winery_bench_work(winery, ceph_pool, tmpdir):
+ #
+ # rw worker creates a shard
+ #
+ whoami = winery.base.whoami
+ shards_info = list(winery.base.list_shards())
+ assert len(shards_info) == 1
+ shard, readonly, packing = shards_info[0]
+ assert (readonly, packing) == (False, False)
+ winery.args["dir"] = str(tmpdir)
+ assert work("rw", winery.args) == "rw"
+ shards_info = {
+ name: (readonly, packing)
+ for name, readonly, packing in winery.base.list_shards()
+ }
+ assert len(shards_info) == 2
+ assert shards_info[whoami] == (True, False)
+ #
+ # ro worker reads a shard
+ #
+ winery.args["ro_worker_max_request"] = 1
+ assert work("ro", winery.args) == "ro"
+
+
+@pytest.mark.asyncio
+async def test_winery_bench_real(pytestconfig, postgresql, ceph_pool):
+ dsn = (
+ f"postgres://{postgresql.info.user}"
+ f":@{postgresql.info.host}:{postgresql.info.port}"
+ )
+ kwargs = {
+ "rw_workers": pytestconfig.getoption("--winery-bench-rw-workers"),
+ "ro_workers": pytestconfig.getoption("--winery-bench-ro-workers"),
+ "shard_max_size": pytestconfig.getoption("--winery-shard-max-size"),
+ "ro_worker_max_request": pytestconfig.getoption(
+ "--winery-bench-ro-worker-max-request"
+ ),
+ "duration": pytestconfig.getoption("--winery-bench-duration"),
+ "base_dsn": dsn,
+ "shard_dsn": dsn,
+ }
+ assert await Bench(kwargs).run() == kwargs["rw_workers"] + kwargs["ro_workers"]
+
+
+@pytest.mark.asyncio
+async def test_winery_bench_fake(pytestconfig, mocker):
+ kwargs = {
+ "rw_workers": pytestconfig.getoption("--winery-bench-rw-workers"),
+ "ro_workers": pytestconfig.getoption("--winery-bench-ro-workers"),
+ "duration": pytestconfig.getoption("--winery-bench-duration"),
+ }
+
+ def run(kind):
+ time.sleep(kwargs["duration"] * 2)
+ return kind
+
+ mocker.patch("swh.objstorage.backends.winery.bench.Worker.run", side_effect=run)
+ assert await Bench(kwargs).run() == kwargs["rw_workers"] + kwargs["ro_workers"]
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist=flake8,py3,mypy
+envlist=flake8,py3,winery,mypy
[testenv]
extras =
@@ -10,7 +10,12 @@
commands =
pytest --cov={envsitepackagesdir}/swh/objstorage \
{envsitepackagesdir}/swh/objstorage \
- --cov-branch {posargs}
+ --cov-branch --cov-report term-missing {posargs}
+
+[testenv:winery]
+allowlist_externals = bash
+commands =
+ bash {toxinidir}/winery-test-environment/remote-tox.sh {posargs}
[testenv:black]
skip_install = true
diff --git a/winery-test-environment/README.md b/winery-test-environment/README.md
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/README.md
@@ -0,0 +1,51 @@
+This purpose of these instructions is to run `tox -e py3` in an
+environment that has access to a ceph cluster. It enables tests that
+would be otherwise be skipped and increases code coverage.
+
+The environment is composed of eight machines named ceph1 to ceph8.
+
+# Installation
+
+* pip install -r requirements.txt
+* ansible-galaxy install geerlingguy.docker
+
+# Create the machines
+
+## libvirt
+
+* ensure virsh is available
+* ./build-vms.sh
+
+If the internet cnx is slow it may take a while before the OSD show up
+because they require downloading large docker images.
+
+## grid5000
+
+* follow the instructions at https://docs.softwareheritage.org/devel/swh-objstorage/winery.html
+* fed4fire.sh test7.zip
+
+# Install the machines
+
+* ansible-playbook -i inventory context/setup.yml ceph.yml bootstrap.yml osd.yml tests.yml
+
+# Run the tests
+
+It copies the content of the repository and "ssh ceph1 tox -e py3"
+
+* tox -e winery
+
+# Login into a machine
+
+For each host found in context/ssh-config
+
+* ssh -i context/cluster_key -F context/ssh-config ceph1
+
+# Destroy
+
+## libvirt
+
+* ./build-vms.sh stop $(seq 1 8)
+
+## grid5000
+
+It will expire on its own
diff --git a/winery-test-environment/ansible.cfg b/winery-test-environment/ansible.cfg
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/ansible.cfg
@@ -0,0 +1,7 @@
+[defaults]
+private_key_file = ./context/cluster_key
+host_key_checking = false
+
+[ssh_connection]
+ssh_args = -F context/ssh-config
+scp_if_ssh = True
diff --git a/winery-test-environment/bootstrap.yml b/winery-test-environment/bootstrap.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/bootstrap.yml
@@ -0,0 +1,31 @@
+- hosts: mon
+ gather_facts: no
+ become: true
+
+ tasks:
+
+ - name: scp context/ceph_key.*
+ copy:
+ src: "context/{{ item }}"
+ dest: "{{ item }}"
+ loop:
+ - ceph_key
+ - ceph_key.pub
+
+ - name: cephadm bootstrap
+ shell: |
+ set -ex
+ cephadm bootstrap --mon-ip {{ hostvars[groups['mon'][0]]['ansible_default_ipv4']['address'] }}
+ cephadm shell ceph cephadm clear-key
+ ceph config-key set mgr/cephadm/ssh_identity_key -i ceph_key
+ ceph config-key set mgr/cephadm/ssh_identity_pub -i ceph_key.pub
+ ceph orch apply osd --all-available-devices
+ args:
+ creates: /etc/ceph/ceph.pub
+
+ - name: cephadm shell ceph mgr fail
+ shell: |
+ set -ex
+ ceph config set mon mon_allow_pool_delete true
+ # does not work for some reason: must be done manually
+ cephadm shell ceph mgr fail # required for mgr/cephadm/ssh_identity* to be refreshed
diff --git a/winery-test-environment/build-vms.sh b/winery-test-environment/build-vms.sh
new file mode 100755
--- /dev/null
+++ b/winery-test-environment/build-vms.sh
@@ -0,0 +1,125 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+function ssh_key() {
+ if ! test -f cluster_key; then
+ ssh-keygen -f cluster_key -N '' -t rsa
+ fi
+}
+
+function stop() {
+ local ids="$@"
+
+ for id in $ids ; do
+ virsh destroy ceph$id
+ virsh undefine ceph$id
+ rm -f ceph$id.qcow2
+ rm -f disk$id*.img
+ done
+ virsh net-destroy ceph
+ virsh net-undefine ceph
+}
+
+function start() {
+ local ids="$@"
+
+ ssh_key
+ > ssh-config
+
+ if ! test -f debian-11.qcow2 ; then
+ sudo virt-builder debian-11 --output debian-11.qcow2 --size 10G --format qcow2 --install sudo --run-command 'dpkg-reconfigure --frontend=noninteractive openssh-server' --run-command 'useradd -s /bin/bash -m debian || true ; echo "debian ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-debian' --ssh-inject debian:file:cluster_key.pub --edit '/etc/network/interfaces: s/ens2/enp1s0/'
+ fi
+
+ if ! virsh net-list --name | grep ceph ; then
+ cat > ceph-net.xml <<EOF
+ <network>
+ <name>ceph</name>
+ <forward mode='nat'/>
+ <bridge name='virbrceph' stp='on' delay='0'/>
+ <ip address='10.11.12.1' netmask='255.255.255.0'>
+ <dhcp>
+ <range start='10.11.12.100' end='10.11.12.200'/>
+ <host mac='52:54:00:00:00:01' name='ceph1' ip='10.11.12.211'/>
+ <host mac='52:54:00:00:00:02' name='ceph2' ip='10.11.12.212'/>
+ <host mac='52:54:00:00:00:03' name='ceph3' ip='10.11.12.213'/>
+ <host mac='52:54:00:00:00:04' name='ceph4' ip='10.11.12.214'/>
+ <host mac='52:54:00:00:00:05' name='ceph5' ip='10.11.12.215'/>
+ <host mac='52:54:00:00:00:06' name='ceph6' ip='10.11.12.216'/>
+ <host mac='52:54:00:00:00:07' name='ceph7' ip='10.11.12.217'/>
+ <host mac='52:54:00:00:00:08' name='ceph8' ip='10.11.12.218'/>
+ <host mac='52:54:00:00:00:09' name='ceph9' ip='10.11.12.219'/>
+ </dhcp>
+ </ip>
+ </network>
+EOF
+ virsh net-define ceph-net.xml
+ virsh net-start ceph
+ fi
+
+
+ for id in $ids ; do
+ virsh destroy ceph$id
+ virsh undefine ceph$id
+ rm -f ceph$id.qcow2
+ cp --sparse=always debian-11.qcow2 ceph$id.qcow2
+ sudo virt-sysprep -a ceph$id.qcow2 --enable customize --hostname ceph$id
+ virt-install --network network=ceph,mac=52:54:00:00:00:0$id --boot hd --name ceph$id --memory 2048 --vcpus 1 --cpu host --disk path=$(pwd)/ceph$id.qcow2,bus=virtio,format=qcow2 --os-type=linux --os-variant=debian10 --graphics none --noautoconsole
+ case $id in
+ 1)
+ ;;
+ 2)
+ virsh detach-device ceph$id $BASE_DIR/rng.xml --live
+ for drive in b c ; do
+ #
+ # Without the sleep it fails with:
+ #
+ # error: Failed to attach disk
+ # error: internal error: No more available PCI slots
+ #
+ sleep 10
+ rm -f disk$id$drive.img
+ qemu-img create -f raw disk$id$drive.img 20G
+ sudo chown libvirt-qemu disk$id$drive.img
+ virsh attach-disk ceph$id --source $(pwd)/disk$id$drive.img --target vd$drive --persistent
+ done
+ ;;
+ *)
+ rm -f disk$id.img
+ qemu-img create -f raw disk$id.img 20G
+ sudo chown libvirt-qemu disk$id.img
+ virsh attach-disk ceph$id --source $(pwd)/disk$id.img --target vdb --persistent
+ ;;
+ esac
+ cat >> ssh-config <<EOF
+Host ceph$id
+ HostName 10.11.12.21$id
+ Port 22
+ User debian
+ IdentityFile $(pwd)/cluster_key
+ ForwardAgent yes
+ TCPKeepAlive yes
+ Compression no
+ CheckHostIP no
+ StrictHostKeyChecking no
+EOF
+ done
+}
+
+function restart() {
+ local ids="$@"
+ stop $ids
+ start $ids
+}
+
+BASE_DIR=$(pwd)
+mkdir -p context
+ln -sf $(pwd)/libvirt.yml context/setup.yml
+cd context
+
+if test "$1" ; then
+ "$@"
+else
+ restart 1 2 3 5 4 6 7 8
+fi
diff --git a/winery-test-environment/ceph.yml b/winery-test-environment/ceph.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/ceph.yml
@@ -0,0 +1,93 @@
+#
+# notes to install a client
+# https://docs.ceph.com/en/latest/cephadm/client-setup/
+# ceph config generate-minimal-conf > /etc/ceph/ceph.conf
+# ceph auth get-or-create client.admin > /etc/ceph/ceph.keyring
+#
+- hosts: localhost
+ gather_facts: false
+
+ pre_tasks:
+
+ - name: keygen ceph_key
+ shell: |
+ mkdir -p context
+ ssh-keygen -f context/ceph_key -N '' -t rsa
+ args:
+ creates: context/ceph_key
+
+- hosts: all
+ become: true
+
+ pre_tasks:
+
+ - name: mkdir /root/.ssh
+ file:
+ path: /root/.ssh
+ state: directory
+ mode: 0700
+
+ - name: touch /root/.ssh/authorized_keys
+ file:
+ path: /root/.ssh/authorized_keys
+ state: touch
+
+ - name: add context/ceph_key.pub to /root/.ssh/authorized_keys
+ lineinfile:
+ path: /root/.ssh/authorized_keys
+ line: "{{ lookup('file', 'context/ceph_key.pub') }}"
+
+ - name: apt install
+ apt:
+ name:
+ - htop
+ - iotop
+ - iftop
+ - iperf
+
+- hosts: ceph
+ become: true
+
+ pre_tasks:
+
+ - name: apt install lvm2 curl gnupg2
+ apt:
+ name:
+ - lvm2
+ - curl
+ - gnupg2
+
+ - name: apt-key https://download.ceph.com/keys/release.asc
+ apt_key:
+ url: https://download.ceph.com/keys/release.asc
+
+ - name: add repository
+ apt_repository:
+ repo: "deb https://download.ceph.com/debian-pacific/ bullseye main"
+ filename: ceph
+
+ - name: apt install cephadm ceph-common
+ apt:
+ name:
+ - cephadm
+ - ceph-common
+
+ roles:
+ - geerlingguy.docker
+
+- hosts: all
+ become: true
+ # so that lineinfile does not race against itself
+ serial: 1
+
+ tasks:
+
+ - name: "add {{ inventory_hostname }} to /etc/hosts"
+ lineinfile:
+ path: /etc/hosts
+ line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ inventory_hostname }}"
+ delegate_to: ceph1
+
+ - name: set hostname
+ hostname:
+ name: "{{ inventory_hostname }}"
diff --git a/winery-test-environment/fed4fire.rspec b/winery-test-environment/fed4fire.rspec
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/fed4fire.rspec
@@ -0,0 +1,59 @@
+<?xml version='1.0'?>
+<rspec xmlns="http://www.geni.net/resources/rspec/3" type="request" generated_by="jFed RSpec Editor" generated="2021-12-12T13:02:49.068+01:00" xmlns:emulab="http://www.protogeni.net/resources/rspec/ext/emulab/1" xmlns:delay="http://www.protogeni.net/resources/rspec/ext/delay/1" xmlns:jfed-command="http://jfed.iminds.be/rspec/ext/jfed-command/1" xmlns:client="http://www.protogeni.net/resources/rspec/ext/client/1" xmlns:jfed-ssh-keys="http://jfed.iminds.be/rspec/ext/jfed-ssh-keys/1" xmlns:jfed="http://jfed.iminds.be/rspec/ext/jfed/1" xmlns:sharedvlan="http://www.protogeni.net/resources/rspec/ext/shared-vlan/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.geni.net/resources/rspec/3 http://www.geni.net/resources/rspec/3/request.xsd ">
+ <node client_id="ceph1" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="58.0" y="93.0"/>
+ </node>
+ <node client_id="ceph2" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="158.0" y="93.0"/>
+ </node>
+ <node client_id="ceph3" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="258.0" y="93.0"/>
+ </node>
+ <node client_id="ceph4" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="358.0" y="93.0"/>
+ </node>
+ <node client_id="ceph5" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="458.0" y="93.0"/>
+ </node>
+ <node client_id="ceph6" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="58.0" y="193.0"/>
+ </node>
+ <node client_id="ceph7" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="158.0" y="193.0"/>
+ </node>
+ <node client_id="ceph8" exclusive="true" component_manager_id="urn:publicid:IDN+am.grid5000.fr+authority+am">
+ <sliver_type name="raw-pc">
+ <disk_image name="urn:publicid:IDN+am.grid5000.fr+image+kadeploy3:debian11-x64-base"/>
+ </sliver_type>
+ <hardware_type name="dahu-grenoble"/>
+ <location xmlns="http://jfed.iminds.be/rspec/ext/jfed/1" x="258.0" y="193.0"/>
+ </node>
+</rspec>
\ No newline at end of file
diff --git a/winery-test-environment/fed4fire.sh b/winery-test-environment/fed4fire.sh
new file mode 100755
--- /dev/null
+++ b/winery-test-environment/fed4fire.sh
@@ -0,0 +1,34 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+set -e
+
+function context() {
+ local fed4fire=$1
+
+ if ! test "$fed4fire" ; then
+ return
+ fi
+
+ rm -fr ./context/fed4fire
+ mkdir -p ./context/fed4fire
+ cp $fed4fire ./context/fed4fire/fed4fire.zip
+ local here=$(pwd)
+ (
+ cd ./context/fed4fire
+ unzip fed4fire.zip
+ sed -i \
+ -e 's|IdentityFile ./id_rsa$|IdentityFile '"${here}"'/context/cluster_key|' \
+ -e "s|-F ssh-config|-F ${here}/context/ssh-config|" \
+ ssh-config
+ cp ssh-config ..
+ mv id_rsa ../cluster_key
+ mv id_rsa.pub ../cluster_key.pub
+ )
+}
+
+ln -sf $(pwd)/grid5000.yml context/setup.yml
+
+context "$@"
diff --git a/winery-test-environment/grid5000.yml b/winery-test-environment/grid5000.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/grid5000.yml
@@ -0,0 +1,79 @@
+# https://www.grid5000.fr/w/Docker#Using_docker-cache.grid5000.fr
+
+- hosts: mon
+ gather_facts: no
+ become: true
+
+ tasks:
+
+ - name: Add the user 'debian'
+ user:
+ name: debian
+ group: debian
+
+ - name: Allow 'debian' group to have passwordless sudo
+ lineinfile:
+ dest: /etc/sudoers
+ state: present
+ regexp: '^%debian'
+ line: '%debian ALL=(ALL) NOPASSWD: ALL'
+ validate: visudo -cf %s
+
+ - name: mkdir /home/debian/.ssh
+ file:
+ path: /home/debian/.ssh
+ state: directory
+ mode: 0700
+ owner: debian
+ group: debian
+
+
+ - name: copy authorized_keys to /home/debian
+ shell: |
+ cp /root/.ssh/authorized_keys /home/debian/.ssh/authorized_keys
+ chown debian:debian /home/debian/.ssh/authorized_keys
+ chmod 0600 /home/debian/.ssh/authorized_keys
+
+- hosts: osd
+ become: true
+
+ tasks:
+
+ # do that before lvm gets a chance to investigate and get the wrong idea
+ # about /dev/sdc on grid5000 because there surely will be leftovers from
+ # whoever used the machine last
+ - name: zap /dev/sdc
+ shell: |
+ dd if=/dev/zero of=/dev/sdc count=100 bs=1024k
+
+- hosts: all
+ become: true
+
+ pre_tasks:
+
+ - name: mkdir /etc/docker
+ file:
+ path: /etc/docker
+ state: directory
+ mode: 755
+
+ roles:
+ - geerlingguy.docker
+
+ tasks:
+
+ - name: docker cache
+ copy:
+ content: |
+ {
+ "registry-mirrors": [
+ "http://docker-cache.grid5000.fr"
+ ],
+ "bip": "192.168.42.1/24"
+ }
+ dest: /etc/docker/daemon.json
+
+ - name: systemctl restart docker
+ service:
+ name: docker
+ state: restarted
diff --git a/winery-test-environment/inventory/group_vars/all/rw.yml b/winery-test-environment/inventory/group_vars/all/rw.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/inventory/group_vars/all/rw.yml
@@ -0,0 +1,5 @@
+---
+rw_disk1: /dev/vdb
+rw_disk2: /dev/vdc
+postgres_shared_buffers: 512MB
+postgres_effective_cache_size: 1GB
diff --git a/winery-test-environment/inventory/groups.yml b/winery-test-environment/inventory/groups.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/inventory/groups.yml
@@ -0,0 +1,13 @@
+---
+ceph:
+ children:
+ mon:
+ osd:
+
+mon:
+ hosts:
+ ceph1:
+
+rw:
+ hosts:
+ ceph2:
diff --git a/winery-test-environment/inventory/hosts.yml b/winery-test-environment/inventory/hosts.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/inventory/hosts.yml
@@ -0,0 +1,10 @@
+all:
+ hosts:
+ ceph1: {ansible_host: ceph1, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph2: {ansible_host: ceph2, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph3: {ansible_host: ceph3, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph4: {ansible_host: ceph4, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph5: {ansible_host: ceph5, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph6: {ansible_host: ceph6, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph7: {ansible_host: ceph7, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
+ ceph8: {ansible_host: ceph8, ansible_port: '22', ansible_python_interpreter: '/usr/bin/python3'}
diff --git a/winery-test-environment/inventory/osd.yml b/winery-test-environment/inventory/osd.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/inventory/osd.yml
@@ -0,0 +1,9 @@
+---
+osd:
+ hosts:
+ ceph3:
+ ceph4:
+ ceph5:
+ ceph6:
+ ceph7:
+ ceph8:
diff --git a/winery-test-environment/libvirt.yml b/winery-test-environment/libvirt.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/libvirt.yml
@@ -0,0 +1,5 @@
+# libvirt specific actions
+
+- hosts: mon
+ gather_facts: no
+ become: true
diff --git a/winery-test-environment/osd.yml b/winery-test-environment/osd.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/osd.yml
@@ -0,0 +1,40 @@
+---
+- hosts: osd
+ gather_facts: no
+ become: true
+
+ tasks:
+
+ - name: add host
+ shell: |
+ ceph orch host add {{ inventory_hostname }}
+ delegate_to: ceph1
+
+- hosts: osd
+ gather_facts: no
+ become: true
+
+ tasks:
+
+ - name: wait for host
+ shell: |
+ ceph orch host ls | grep '^{{ inventory_hostname }} '
+ delegate_to: ceph1
+ register: host
+ until: host is success
+ retries: 30
+ delay: 5
+
+- hosts: osd
+ gather_facts: no
+ become: true
+
+ tasks:
+
+ # the desired side effect here is twofold
+ # * device zap blocks until the osd daemon is ready on the target host
+ # * on grid5000 /dev/sdc needs to be applied
+ - name: zap /dev/sdc
+ shell: |
+ ceph orch device zap {{ inventory_hostname }} /dev/sdc --force || true
+ delegate_to: ceph1
diff --git a/winery-test-environment/remote-tox.sh b/winery-test-environment/remote-tox.sh
new file mode 100755
--- /dev/null
+++ b/winery-test-environment/remote-tox.sh
@@ -0,0 +1,31 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+set -ex
+
+DIR=winery-test-environment
+SSH="ssh -i ${DIR}/context/cluster_key -F ${DIR}/context/ssh-config"
+
+function sanity_check() {
+ if ! test -f ${DIR}/context/cluster_key ; then
+ echo "${DIR}/context/cluster_key does not exist"
+ echo "check ${DIR}/README.md for instructions."
+ return 1
+ fi
+}
+
+function copy() {
+ RSYNC_RSH="$SSH" rsync -av --exclude=.mypy_cache --exclude=.coverage --exclude=.eggs --exclude=swh.objstorage.egg-info --exclude=winery-test-environment/context --exclude=.tox --exclude='*~' --exclude=__pycache__ --exclude='*.py[co]' $(git rev-parse --show-toplevel)/ debian@ceph1:/home/debian/swh-objstorage/
+}
+
+function run() {
+ sanity_check || return 1
+
+ copy || return 1
+
+ $SSH -t debian@ceph1 bash -c "'cd swh-objstorage ; ../venv/bin/tox -e py3 -- -k test_winery $*'" || return 1
+}
+
+run "$@"
diff --git a/winery-test-environment/requirements.txt b/winery-test-environment/requirements.txt
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/requirements.txt
@@ -0,0 +1 @@
+ansible
diff --git a/winery-test-environment/rng.xml b/winery-test-environment/rng.xml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/rng.xml
@@ -0,0 +1,5 @@
+ <rng model='virtio'>
+ <backend model='random'>/dev/urandom</backend>
+ <alias name='rng0'/>
+ <address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/>
+ </rng>
diff --git a/winery-test-environment/rw.yml b/winery-test-environment/rw.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/rw.yml
@@ -0,0 +1,110 @@
+---
+- name: install and configure Read Write Storage
+ hosts: rw
+ become: true
+
+ pre_tasks:
+
+ - name: zap attached disks
+ shell: |
+ for disk in {{ rw_disk1 }} {{ rw_disk2 }} ; do
+ dd if=/dev/zero of=$disk count=100 bs=1024k
+ done
+ touch /etc/zapped.done
+ args:
+ creates: /etc/zapped.done
+
+ - name: apt install lvm2
+ apt:
+ name:
+ - lvm2
+
+ - name: vgcreate pg
+ lvg:
+ vg: pg
+ pvs: "{{ rw_disk1 }},{{ rw_disk2 }}"
+
+ - name: lvcreate pg
+ lvol:
+ vg: pg
+ lv: pg
+ size: +100%FREE
+
+ - name: mkfs /dev/mapper/pg-pg
+ filesystem:
+ fstype: ext4
+# force: yes
+ dev: /dev/mapper/pg-pg
+
+ - name: mkdir /var/lib/postgresql
+ file:
+ path: /var/lib/postgresql
+ state: directory
+ mode: 755
+
+ - name: mount /var/lib/postgresql
+ mount:
+ path: /var/lib/postgresql
+ src: /dev/mapper/pg-pg
+ fstype: ext4
+ state: mounted
+
+ - name: apt install postgres
+ apt:
+ name:
+ - postgresql
+ - postgresql-contrib
+ - libpq-dev
+ - python3-psycopg2
+ - acl
+
+ - name: postgresql.conf max_connections = 1000
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '^max_connections'
+ line: "max_connections = 1000"
+
+ #
+ # https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server
+ #
+ - name: postgresql.conf shared_buffers
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '^shared_buffers'
+ # 1/4 RAM
+ line: "shared_buffers = {{ postgres_shared_buffers }}"
+
+ - name: postgresql.conf effective_cache_size
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '.*effective_cache_size'
+ # 1/2 RAM
+ line: "effective_cache_size = {{ postgres_effective_cache_size }}"
+
+ - name: postgresql.conf random_page_cost
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '.*random_page_cost'
+ line: "random_page_cost = 2.0"
+
+ - name: listen on *
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ line: "listen_addresses = '*'"
+
+ - name: allow all connexions
+ lineinfile:
+ path: /etc/postgresql/13/main/pg_hba.conf
+ line: "host all all 0.0.0.0/0 trust"
+
+ - name: systemctl restart postgresql
+ service:
+ name: postgresql
+ state: restarted
+
+ - name: pg user testuser/testpassword
+ postgresql_user:
+ name: testuser
+ password: testpassword
+ role_attr_flags: SUPERUSER
+ become_user: postgres
diff --git a/winery-test-environment/tests.yml b/winery-test-environment/tests.yml
new file mode 100644
--- /dev/null
+++ b/winery-test-environment/tests.yml
@@ -0,0 +1,32 @@
+- name: install test environment
+ gather_facts: no
+ hosts: mon
+
+ pre_tasks:
+
+ - name: apt install
+ apt:
+ name:
+ - emacs-nox
+ - gcc
+ - libcap-dev
+ - libcmph-dev
+ - libpq-dev
+ - postgresql-client-common
+ - postgresql-13
+ - python3-pip
+ - python3-rbd
+ - rsync
+ - tmux
+ - virtualenv
+ become: true
+
+ - name: configure venv
+ shell: |
+ virtualenv venv
+ venv/bin/pip3 install tox
+ args:
+ creates: venv
+ chdir: /home/debian
+ become: true
+ become_user: debian
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 12:03 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228206
Attached To
D6796: winery: basic implementation of the backend
Event Timeline
Log In to Comment