Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9311873
D6796.id24733.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
38 KB
Subscribers
None
D6796.id24733.diff
View Options
diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,6 @@
version.txt
.tox
.mypy_cache/
+ceph_key*
+*.qcow2
+*.img
diff --git a/MANIFEST.in b/MANIFEST.in
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,3 +2,4 @@
include requirements*.txt
include version.txt
recursive-include swh py.typed
+exclude swh/objstorage/tests/winery
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -19,3 +19,12 @@
[mypy-requests_toolbelt.*]
ignore_missing_imports = True
+
+[mypy-psycopg2.*]
+ignore_missing_imports = True
+
+[mypy-swh.perfecthash.*]
+ignore_missing_imports = True
+
+[mypy-sh.*]
+ignore_missing_imports = True
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,3 @@
swh.core[http] >= 0.3
swh.model >= 0.0.27
+swh.perfecthash
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -5,3 +5,5 @@
requests_toolbelt
types-pyyaml
types-requests
+pytest-postgresql < 4.0.0 # version 4.0 depends on psycopg 3. https://github.com/ClearcodeHQ/pytest-postgresql/blob/main/CHANGES.rst#400
+
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,8 @@
aiohttp >= 3
click
requests
+psycopg2
+sh
# optional dependencies
# apache-libcloud
diff --git a/swh/objstorage/backends/winery/__init__.py b/swh/objstorage/backends/winery/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/__init__.py
@@ -0,0 +1 @@
+from .objstorage import WineryObjStorage # noqa: F401
diff --git a/swh/objstorage/backends/winery/database.py b/swh/objstorage/backends/winery/database.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/database.py
@@ -0,0 +1,75 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import time
+
+import psycopg2
+
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.ERROR)
+
+
+class Database:
+ def __init__(self, dsn):
+ self.dsn = dsn
+
+ def create_database(self, database):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ f"SELECT datname FROM pg_catalog.pg_database WHERE datname = '{database}'"
+ )
+ if c.rowcount == 0:
+ c.execute(f"CREATE DATABASE {database}")
+ c.close()
+
+ def drop_database(self, database):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ # https://wiki.postgresql.org/wiki/Psycopg2_Tutorial
+ # If you want to drop the database you would need to
+ # change the isolation level of the database.
+ db.set_isolation_level(0)
+ db.autocommit = True
+ c = db.cursor()
+ #
+ # Dropping the database may fail because the server takes time
+ # to notice a connection was dropped and/or a named cursor is
+ # in the process of being deleted. It can happen here or even
+ # when deleting all database with the psql cli
+ # and there are no bench process active.
+ #
+ # ERROR: database "i606428a5a6274d1ab09eecc4d019fef7" is being
+ # accessed by other users DETAIL: There is 1 other session
+ # using the database.
+ #
+ # See:
+ # https://stackoverflow.com/questions/5108876/kill-a-postgresql-session-connection
+ #
+ # https://www.postgresql.org/docs/current/sql-dropdatabase.html
+ #
+ # WITH (FORCE) added in postgresql 13 but may also fail because the
+ # named cursor may not be handled as a client.
+ #
+ for i in range(60):
+ try:
+ c.execute(f"DROP DATABASE IF EXISTS {database}")
+ break
+ except psycopg2.errors.ObjectInUse:
+ LOGGER.warning(f"{database} database drop fails, waiting")
+ time.sleep(10)
+ continue
+ raise Exception("database drop fails {database}")
+ c.close()
+
+ def list_databases(self):
+ db = psycopg2.connect(f"{self.dsn}/postgres")
+ with db.cursor() as c:
+ c.execute(
+ "SELECT datname FROM pg_database "
+ "WHERE datistemplate = false and datname != 'postgres'"
+ )
+ return [r[0] for r in c.fetchall()]
diff --git a/swh/objstorage/backends/winery/objstorage.py b/swh/objstorage/backends/winery/objstorage.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/objstorage.py
@@ -0,0 +1,98 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import concurrent.futures
+import logging
+
+from swh.objstorage import exc
+from swh.objstorage.objstorage import ObjStorage, compute_hash
+
+from .roshard import ROShard
+from .rwshard import RWShard
+from .sharedbase import SharedBase
+
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.ERROR)
+
+
+def pack(shard, **kwargs):
+ return Packer(shard, **kwargs).run()
+
+
+class Packer:
+ def __init__(self, shard, **kwargs):
+ self.args = kwargs
+ self.base = SharedBase(**self.args)
+ self.rw = RWShard(shard, **self.args)
+ self.ro = ROShard(shard, **self.args)
+
+ def run(self):
+ return True
+
+
+class WineryObjStorage(ObjStorage):
+ def __init__(self, **kwargs):
+ super().__init__(**kwargs)
+ self.args = kwargs
+ self.executor = concurrent.futures.ProcessPoolExecutor(max_workers=1)
+ self.packers = []
+ self.init()
+
+ def init(self):
+ self.base = SharedBase(**self.args)
+ self.shard = RWShard(self.base.whoami, **self.args)
+
+ def check_config(self, *, check_write):
+ return True
+
+ def __contains__(self, obj_id):
+ return self.base.contains(obj_id)
+
+ def add(self, content, obj_id=None, check_presence=True):
+ if obj_id is None:
+ obj_id = compute_hash(content)
+
+ if check_presence and obj_id in self:
+ return obj_id
+
+ shard = self.base.add_phase_1(obj_id)
+ if shard != self.base.id:
+ # this object is the responsibility of another shard
+ return obj_id
+
+ self.shard.add(obj_id, content)
+ self.base.add_phase_2(obj_id)
+
+ if self.shard.is_full():
+ self.pack()
+
+ return obj_id
+
+ def get(self, obj_id):
+ shard_info = self.base.get(obj_id)
+ if shard_info is None:
+ raise exc.ObjNotFoundError(obj_id)
+ name, readonly = shard_info
+ if readonly:
+ pass # ROShard
+ else:
+ shard = RWShard(name, **self.args)
+ content = shard.get(obj_id)
+ if content is None:
+ raise exc.ObjNotFoundError(obj_id)
+ return content
+
+ def check(self, obj_id):
+ # load all shards readonly == NULL and not locked (i.e. packer
+ # was interrupted for whatever reason) run pack for each of them
+ pass
+
+ def delete(self, obj_id):
+ raise PermissionError("Delete is not allowed.")
+
+ def pack(self):
+ self.base.shard_packing_starts()
+ self.packers.append(self.executor.submit(pack, self.shard.name, **self.args))
+ self.init()
diff --git a/swh/objstorage/backends/winery/roshard.py b/swh/objstorage/backends/winery/roshard.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/roshard.py
@@ -0,0 +1,106 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+
+import sh
+
+from swh.perfecthash import Shard
+
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.ERROR)
+
+
+class Pool(object):
+ name = "shards"
+
+ def __init__(self, **kwargs):
+ self.args = kwargs
+
+ def init(self):
+ self.rbd = sh.sudo.bake("rbd", f"--pool={self.name}")
+ self.ceph = sh.sudo.bake("ceph")
+ self.image_size = self.args["shard_max_size"] * 2
+
+ def uninit(self):
+ pass
+
+ def image_list(self):
+ try:
+ self.rbd.ls()
+ except sh.ErrorReturnCode_2 as e:
+ if "No such file or directory" in e.args[0]:
+ return []
+ else:
+ raise
+ return [image.strip() for image in self.rbd.ls()]
+
+ def image_path(self, image):
+ return f"/dev/rbd/{self.name}/{image}"
+
+ def image_create(self, image):
+ LOGGER.info(f"rdb --pool {self.name} create --size={self.image_size} {image}")
+ self.rbd.create(
+ f"--size={self.image_size}", f"--data-pool={self.name}-data", image
+ )
+ self.rbd.feature.disable(
+ f"{self.name}/{image}", "object-map", "fast-diff", "deep-flatten"
+ )
+ self.image_map(image, "rw")
+
+ def image_map(self, image, options):
+ self.rbd.device("map", "-o", options, image)
+ sh.sudo("chmod", "777", self.image_path(image))
+
+ def image_remap_ro(self, image):
+ self.image_unmap(image)
+ self.image_map(image, "ro")
+
+ def image_unmap(self, image):
+ self.rbd.device.unmap(f"{self.name}/{image}", _ok_code=(0, 22))
+
+ def image_delete(self, image):
+ self.image_unmap(image)
+ LOGGER.info(f"rdb --pool {self.name} remove {image}")
+ self.rbd.remove(image)
+
+ def images_clobber(self):
+ for image in self.image_list():
+ image = image.strip()
+ self.image_unmap(image)
+
+ def clobber(self):
+ self.images_clobber()
+ self.pool_clobber()
+
+ def pool_clobber(self):
+ LOGGER.info(f"ceph osd pool delete {self.name}")
+ self.ceph.osd.pool.delete(self.name, self.name, "--yes-i-really-really-mean-it")
+ data = f"{self.name}-data"
+ LOGGER.info(f"ceph osd pool delete {data}")
+ self.ceph.osd.pool.delete(data, data, "--yes-i-really-really-mean-it")
+
+ def pool_create(self):
+ data = f"{self.name}-data"
+ LOGGER.info(f"ceph osd pool create {data}")
+ self.ceph.osd(
+ "erasure-code-profile",
+ "set",
+ "--force",
+ data,
+ "k=4",
+ "m=2",
+ "crush-failure-domain=host",
+ )
+ self.ceph.osd.pool.create(data, "200", "erasure", data)
+ self.ceph.osd.pool.set(data, "allow_ec_overwrites", "true")
+ self.ceph.osd.pool.set(data, "pg_autoscale_mode", "off")
+ LOGGER.info(f"ceph osd pool create {self.name}")
+ self.ceph.osd.pool.create(self.name)
+
+
+class ROShard:
+ def __init__(self, name, **kwargs):
+ Shard()
diff --git a/swh/objstorage/backends/winery/rwshard.py b/swh/objstorage/backends/winery/rwshard.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/rwshard.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import psycopg2
+
+from .database import Database
+
+
+class RWShard(Database):
+ def __init__(self, name, **kwargs):
+ super().__init__(kwargs["shard_dsn"])
+ self._name = name
+ self.create_database(self.name)
+ self.db = self.create_table(f"{self.dsn}/{self.name}")
+ self.size = self.total_size()
+ self.limit = kwargs["shard_max_size"]
+
+ @property
+ def name(self):
+ return self._name
+
+ def is_full(self):
+ return self.size > self.limit
+
+ def create_table(self, dsn):
+ db = psycopg2.connect(dsn)
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS objects("
+ "key BYTEA PRIMARY KEY, "
+ "content BYTEA "
+ ")"
+ )
+ c.close()
+ return db
+
+ def total_size(self):
+ with self.db.cursor() as c:
+ c.execute("SELECT SUM(LENGTH(content)) FROM objects")
+ size = c.fetchone()[0]
+ if size is None:
+ return 0
+ else:
+ return size
+
+ def add(self, obj_id, content):
+ try:
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO objects (key, content) VALUES (%s, %s)",
+ (obj_id, content),
+ )
+ self.db.commit()
+ self.size += len(content)
+ except psycopg2.errors.UniqueViolation:
+ pass
+
+ def get(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute("SELECT content FROM objects WHERE key = %s", (obj_id,))
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()[0].tobytes()
diff --git a/swh/objstorage/backends/winery/sharedbase.py b/swh/objstorage/backends/winery/sharedbase.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/backends/winery/sharedbase.py
@@ -0,0 +1,161 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import uuid
+
+import psycopg2
+
+from .database import Database
+
+
+class SharedBase(Database):
+ def __init__(self, **kwargs):
+ super().__init__(kwargs["base_dsn"])
+ database = "sharedbase"
+ self.create_database(database)
+ self.db = self.create_table(f"{self.dsn}/{database}")
+ self._whoami = None
+
+ def create_table(self, dsn):
+ db = psycopg2.connect(dsn)
+ db.autocommit = True
+ c = db.cursor()
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS shards("
+ "id SERIAL PRIMARY KEY, "
+ "readonly BOOLEAN, "
+ "packing BOOLEAN, "
+ "name CHAR(32) NOT NULL "
+ ")"
+ )
+ c.execute(
+ "CREATE TABLE IF NOT EXISTS signature2shard("
+ "signature BYTEA PRIMARY KEY, "
+ "inflight BOOLEAN NOT NULL, "
+ "shard INTEGER NOT NULL"
+ ")"
+ )
+ c.close()
+ return db
+
+ @property
+ def whoami(self):
+ self.set_whoami()
+ return self._whoami
+
+ @property
+ def id(self):
+ self.set_whoami()
+ return self._whoami_id
+
+ def set_whoami(self):
+ if self._whoami is not None:
+ return
+
+ while True:
+ self._whoami, self._whoami_id = self.lock_a_shard()
+ if self._whoami is not None:
+ return self._whoami
+ self.create_shard()
+
+ def lock_a_shard(self):
+ with self.db.cursor() as c:
+ c.execute(
+ "SELECT name FROM shards WHERE readonly = FALSE and packing = FALSE "
+ "LIMIT 1 FOR UPDATE SKIP LOCKED"
+ )
+ if c.rowcount == 0:
+ return None, None
+ name = c.fetchone()[0]
+ return self.lock_shard(name)
+
+ def lock_shard(self, name):
+ self.whoami_lock = self.db.cursor()
+ try:
+ self.whoami_lock.execute(
+ "SELECT name, id FROM shards "
+ "WHERE readonly = FALSE AND packing = FALSE AND name = %s "
+ "FOR UPDATE NOWAIT",
+ (name,),
+ )
+ return self.whoami_lock.fetchone()
+ except Exception:
+ return None
+
+ def create_shard(self):
+ name = uuid.uuid4().hex
+ #
+ # ensure the first character is not a number so it can be used as a
+ # database name.
+ #
+ name = "i" + name[1:]
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO shards (name, readonly, packing) "
+ "VALUES (%s, FALSE, FALSE)",
+ (name,),
+ )
+ self.db.commit()
+
+ def shard_packing_starts(self):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE shards SET packing = TRUE WHERE name = %s", (self.whoami,)
+ )
+
+ def shard_packing_ends(self):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE shards SET readonly = TRUE, packing = FALSE " "WHERE name = %s",
+ (self.whoami,),
+ )
+
+ def get_shard_info(self, id):
+ with self.db.cursor() as c:
+ c.execute("SELECT name, readonly FROM shards WHERE id = %s", (id,))
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()
+
+ def contains(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute(
+ "SELECT shard FROM signature2shard WHERE "
+ "signature = %s AND inflight = FALSE",
+ (obj_id,),
+ )
+ if c.rowcount == 0:
+ return None
+ else:
+ return c.fetchone()[0]
+
+ def get(self, obj_id):
+ id = self.contains(obj_id)
+ if id is None:
+ return None
+ return self.get_shard_info(id)
+
+ def add_phase_1(self, obj_id):
+ try:
+ with self.db.cursor() as c:
+ c.execute(
+ "INSERT INTO signature2shard (signature, shard, inflight) "
+ "VALUES (%s, %s, TRUE)",
+ (obj_id, self.id),
+ )
+ self.db.commit()
+ return self.id
+ except psycopg2.errors.UniqueViolation:
+ return self.contains(obj_id)
+
+ def add_phase_2(self, obj_id):
+ with self.db.cursor() as c:
+ c.execute(
+ "UPDATE signature2shard SET inflight = FALSE "
+ "WHERE signature = %s AND shard = %s",
+ (obj_id, self.id),
+ )
+ self.db.commit()
diff --git a/swh/objstorage/factory.py b/swh/objstorage/factory.py
--- a/swh/objstorage/factory.py
+++ b/swh/objstorage/factory.py
@@ -13,6 +13,7 @@
from swh.objstorage.backends.noop import NoopObjStorage
from swh.objstorage.backends.pathslicing import PathSlicingObjStorage
from swh.objstorage.backends.seaweedfs import SeaweedFilerObjStorage
+from swh.objstorage.backends.winery import WineryObjStorage
from swh.objstorage.multiplexer import MultiplexerObjStorage, StripingObjStorage
from swh.objstorage.multiplexer.filter import add_filters
from swh.objstorage.objstorage import ID_HASH_LENGTH, ObjStorage # noqa
@@ -27,6 +28,7 @@
"seaweedfs": SeaweedFilerObjStorage,
"random": RandomGeneratorObjStorage,
"http": HTTPReadOnlyObjStorage,
+ "winery": WineryObjStorage,
"noop": NoopObjStorage,
}
diff --git a/swh/objstorage/tests/test_objstorage_winery.py b/swh/objstorage/tests/test_objstorage_winery.py
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/test_objstorage_winery.py
@@ -0,0 +1,120 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import concurrent.futures
+
+import pytest
+import sh
+
+from swh.objstorage import exc
+from swh.objstorage.backends.winery.database import Database
+from swh.objstorage.backends.winery.objstorage import Packer
+from swh.objstorage.backends.winery.roshard import Pool
+from swh.objstorage.factory import get_objstorage
+
+
+@pytest.fixture
+def needs_ceph():
+ try:
+ sh.ceph("--version")
+ except sh.CommandNotFound:
+ pytest.skip("the ceph CLI was not found")
+
+
+@pytest.fixture
+def dsn(postgresql):
+ dsn = (
+ f"postgres://{postgresql.info.user}"
+ f":@{postgresql.info.host}:{postgresql.info.port}"
+ )
+ yield dsn
+ # pytest-postgresql will not remove databases that it did not create
+ d = Database(dsn)
+ for database in d.list_databases():
+ if database != postgresql.info.dbname and database != "tests_tmpl":
+ d.drop_database(database)
+
+
+def test_winery_sharedbase(dsn):
+ base = get_objstorage(
+ cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=1
+ ).base
+ shard1 = base.whoami
+ assert shard1 is not None
+ assert shard1 == base.whoami
+
+ id1 = base.id
+ assert id1 is not None
+ assert id1 == base.id
+
+
+def test_winery_add_get(dsn):
+ winery = get_objstorage(
+ cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=1024
+ )
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ obj_id = winery.add(content=content)
+ assert obj_id.hex() == "0c8c841f7d9fd4874d841506d3ffc16808b1d579"
+ assert winery.add(content=content, obj_id=obj_id) == obj_id
+ assert winery.add(content=content, obj_id=obj_id, check_presence=False) == obj_id
+ assert winery.base.whoami == shard
+ assert winery.get(obj_id) == content
+ with pytest.raises(exc.ObjNotFoundError):
+ winery.get(b"unknown")
+
+
+def test_winery_add_and_pack(dsn):
+ winery = get_objstorage(cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=1)
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ obj_id = winery.add(content=content)
+ assert obj_id.hex() == "0c8c841f7d9fd4874d841506d3ffc16808b1d579"
+ assert winery.base.whoami != shard
+ assert len(winery.packers) == 1
+ done, not_done = concurrent.futures.wait(winery.packers)
+ assert len(not_done) == 0
+ assert len(done) == 1
+ packer = list(done)[0]
+ assert packer.exception(timeout=0) is None
+ assert packer.result() is True
+ winery.executor.shutdown()
+
+
+def test_winery_delete(dsn):
+ winery = get_objstorage(cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=1)
+ with pytest.raises(PermissionError):
+ winery.delete(None)
+
+
+def test_winery_packer(dsn):
+ winery = get_objstorage(
+ cls="winery", base_dsn=dsn, shard_dsn=dsn, shard_max_size=1024
+ )
+ shard = winery.base.whoami
+ content = b"SOMETHING"
+ winery.add(content=content)
+ winery.base.shard_packing_starts()
+ assert Packer(shard, **winery.args).run() is True
+
+
+def test_winery_pool(needs_ceph):
+ name = "IMAGE"
+ pool = Pool(shard_max_size=10 * 1024 * 1024)
+ pool.init()
+ pool.clobber()
+ pool.pool_create()
+ pool.image_create(name)
+ p = pool.image_path(name)
+ assert p.endswith(name)
+ something = "SOMETHING"
+ open(p, "w").write(something)
+ assert open(p).read(len(something)) == something
+ assert pool.image_list() == [name]
+ pool.image_remap_ro(name)
+ pool.images_clobber()
+ assert pool.image_list() == [name]
+ pool.clobber()
+ assert pool.image_list() == []
diff --git a/swh/objstorage/tests/winery/README.md b/swh/objstorage/tests/winery/README.md
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/README.md
@@ -0,0 +1,20 @@
+# Installation
+
+* ensure virsh is available
+* pip install -r requirements.txt
+
+# Create
+
+* ansible-galaxy install geerlingguy.docker
+* ./build-vms.sh
+* runner=debian@10.11.12.211
+* idx=debian@10.11.12.213
+* ansible-playbook -i inventory ceph.yml osd.yml rw.yml tests.yml
+
+# Running
+
+* rsync -av --exclude=.git --exclude=.mypy_cache --exclude=.coverage --exclude=.eggs --exclude=swh.objstorage.egg-info --exclude=*.img --exclude=*.qcow2 --exclude=.tox --exclude='*~' --exclude=__pycache__ --exclude='*.py[co]' $(git rev-parse --show-toplevel)/ $runner:swh-objstorage/ ; ssh -t $runner bash -c "'cd swh-objstorage ; ../venv/bin/tox -e py3 -- -k test_winery'"
+
+# Destroy
+
+* ./build-vms.sh stop $(seq 1 8)
diff --git a/swh/objstorage/tests/winery/build-vms.sh b/swh/objstorage/tests/winery/build-vms.sh
new file mode 100755
--- /dev/null
+++ b/swh/objstorage/tests/winery/build-vms.sh
@@ -0,0 +1,99 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+function stop() {
+ local ids="$@"
+
+ for id in $ids ; do
+ virsh destroy ceph$id
+ virsh undefine ceph$id
+ rm -f ceph$id.qcow2
+ done
+ virsh net-destroy ceph
+ virsh net-undefine ceph
+ rm -f ceph*.img
+}
+
+function start() {
+ local ids="$@"
+
+ if ! test -f debian-11.qcow2 ; then
+ sudo virt-builder debian-11 --output debian-11.qcow2 --size 10G --format qcow2 --install sudo --run-command 'dpkg-reconfigure --frontend=noninteractive openssh-server' --run-command 'useradd -s /bin/bash -m debian || true ; echo "debian ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-debian' --ssh-inject debian:file:$HOME/.ssh/id_rsa.pub --edit '/etc/network/interfaces: s/ens2/enp1s0/'
+ fi
+
+ if ! virsh net-list --name | grep ceph ; then
+ cat > /tmp/ceph-net.xml <<EOF
+ <network>
+ <name>ceph</name>
+ <forward mode='nat'/>
+ <bridge name='virbrceph' stp='on' delay='0'/>
+ <ip address='10.11.12.1' netmask='255.255.255.0'>
+ <dhcp>
+ <range start='10.11.12.100' end='10.11.12.200'/>
+ <host mac='52:54:00:00:00:01' name='ceph1' ip='10.11.12.211'/>
+ <host mac='52:54:00:00:00:02' name='ceph2' ip='10.11.12.212'/>
+ <host mac='52:54:00:00:00:03' name='ceph3' ip='10.11.12.213'/>
+ <host mac='52:54:00:00:00:04' name='ceph4' ip='10.11.12.214'/>
+ <host mac='52:54:00:00:00:05' name='ceph5' ip='10.11.12.215'/>
+ <host mac='52:54:00:00:00:06' name='ceph6' ip='10.11.12.216'/>
+ <host mac='52:54:00:00:00:07' name='ceph7' ip='10.11.12.217'/>
+ <host mac='52:54:00:00:00:08' name='ceph8' ip='10.11.12.218'/>
+ <host mac='52:54:00:00:00:09' name='ceph9' ip='10.11.12.219'/>
+ </dhcp>
+ </ip>
+ </network>
+EOF
+ virsh net-define /tmp/ceph-net.xml
+ virsh net-start ceph
+ fi
+
+
+ for id in $ids ; do
+ virsh destroy ceph$id
+ virsh undefine ceph$id
+ rm -f ceph$id.qcow2
+ cp --sparse=always debian-11.qcow2 ceph$id.qcow2
+ sudo virt-sysprep -a ceph$id.qcow2 --enable customize --hostname ceph$id
+ virt-install --network network=ceph,mac=52:54:00:00:00:0$id --boot hd --name ceph$id --memory 2048 --vcpus 1 --cpu host --disk path=$(pwd)/ceph$id.qcow2,bus=virtio,format=qcow2 --os-type=linux --os-variant=debian10 --graphics none --noautoconsole
+ case $id in
+ 1)
+ ;;
+ 2)
+ virsh detach-device ceph$id rng.xml --live
+ for drive in b c ; do
+ #
+ # Without the sleep it fails with:
+ #
+ # error: Failed to attach disk
+ # error: internal error: No more available PCI slots
+ #
+ sleep 10
+ rm -f disk$id$drive.img
+ qemu-img create -f raw disk$id$drive.img 20G
+ sudo chown libvirt-qemu disk$id$drive.img
+ virsh attach-disk ceph$id --source $(pwd)/disk$id$drive.img --target vd$drive --persistent
+ done
+ ;;
+ *)
+ rm -f disk$id.img
+ qemu-img create -f raw disk$id.img 20G
+ sudo chown libvirt-qemu disk$id.img
+ virsh attach-disk ceph$id --source $(pwd)/disk$id.img --target vdb --persistent
+ ;;
+ esac
+ done
+}
+
+function restart() {
+ local ids="$@"
+ stop $ids
+ start $ids
+}
+
+if test "$1" ; then
+ "$@"
+else
+ restart 1 2 3 5 4 6 7 8
+fi
diff --git a/swh/objstorage/tests/winery/ceph.yml b/swh/objstorage/tests/winery/ceph.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/ceph.yml
@@ -0,0 +1,121 @@
+#
+# notes to install a client
+# https://docs.ceph.com/en/latest/cephadm/client-setup/
+# ceph config generate-minimal-conf > /etc/ceph/ceph.conf
+# ceph auth get-or-create client.admin > /etc/ceph/ceph.keyring
+#
+- hosts: localhost
+ gather_facts: false
+
+ pre_tasks:
+
+ - name: keygen ceph_key
+ shell: |
+ ssh-keygen -f ceph_key -N '' -t rsa
+ args:
+ creates: ceph_key
+
+- hosts: all
+ become: true
+
+ pre_tasks:
+
+ - name: mkdir /root/.ssh
+ file:
+ path: /root/.ssh
+ state: directory
+ mode: 700
+
+ - name: touch /root/.ssh/authorized_keys
+ file:
+ path: /root/.ssh/authorized_keys
+ state: touch
+
+ - name: add ceph_key.pub to /root/.ssh/authorized_keys
+ lineinfile:
+ path: /root/.ssh/authorized_keys
+ line: "{{ lookup('file', 'ceph_key.pub') }}"
+
+ - name: apt install
+ apt:
+ name:
+ - htop
+ - iotop
+ - iftop
+ - iperf
+
+- hosts: ceph
+ become: true
+
+ pre_tasks:
+
+ - name: apt install lvm2 curl gnupg2
+ apt:
+ name:
+ - lvm2
+ - curl
+ - gnupg2
+
+ - name: apt-key https://download.ceph.com/keys/release.asc
+ apt_key:
+ url: https://download.ceph.com/keys/release.asc
+
+ - name: add repository
+ apt_repository:
+ repo: "deb https://download.ceph.com/debian-pacific/ bullseye main"
+ filename: ceph
+
+ - name: apt install cephadm ceph-common
+ apt:
+ name:
+ - cephadm
+ - ceph-common
+
+ roles:
+ - geerlingguy.docker
+
+- hosts: all
+ become: true
+
+ tasks:
+
+ - name: "add {{ inventory_hostname }} to /etc/hosts"
+ lineinfile:
+ path: /etc/hosts
+ line: "{{ hostvars[inventory_hostname]['ansible_default_ipv4']['address'] }} {{ inventory_hostname }}"
+ delegate_to: ceph1
+
+ - name: set hostname
+ hostname:
+ name: "{{ inventory_hostname }}"
+
+- hosts: mon
+ become: true
+
+ tasks:
+
+ - name: scp ceph_key.*
+ copy:
+ src: "{{ item }}"
+ dest: "{{ item }}"
+ loop:
+ - ceph_key
+ - ceph_key.pub
+
+ - name: cephadm bootstrap
+ shell: |
+ set -ex
+ cephadm bootstrap --mon-ip {{ hostvars[groups['mon'][0]]['ansible_default_ipv4']['address'] }}
+ cephadm shell ceph cephadm clear-key
+ ceph config-key set mgr/cephadm/ssh_identity_key -i ceph_key
+ ceph config-key set mgr/cephadm/ssh_identity_pub -i ceph_key.pub
+ ceph orch apply osd --all-available-devices
+ args:
+ creates: /etc/ceph/ceph.pub
+
+ - name: ceph configuration
+ shell: |
+ set -ex
+ ceph config set mon mon_allow_pool_delete true
+ # does not work for some reason: must be done manually
+ cephadm shell ceph mgr fail # required for mgr/cephadm/ssh_identity* to be refreshed
diff --git a/swh/objstorage/tests/winery/inventory/group_vars/all/rw.yml b/swh/objstorage/tests/winery/inventory/group_vars/all/rw.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/inventory/group_vars/all/rw.yml
@@ -0,0 +1,5 @@
+---
+rw_disk1: /dev/vdb
+rw_disk2: /dev/vdc
+postgres_shared_buffers: 512MB
+postgres_effective_cache_size: 1GB
diff --git a/swh/objstorage/tests/winery/inventory/groups.yml b/swh/objstorage/tests/winery/inventory/groups.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/inventory/groups.yml
@@ -0,0 +1,13 @@
+---
+ceph:
+ children:
+ mon:
+ osd:
+
+mon:
+ hosts:
+ ceph1:
+
+rw:
+ hosts:
+ ceph2:
diff --git a/swh/objstorage/tests/winery/inventory/hosts.yml b/swh/objstorage/tests/winery/inventory/hosts.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/inventory/hosts.yml
@@ -0,0 +1,10 @@
+all:
+ hosts:
+ ceph1: {ansible_host: 10.11.12.211, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph2: {ansible_host: 10.11.12.212, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph3: {ansible_host: 10.11.12.213, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph4: {ansible_host: 10.11.12.214, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph5: {ansible_host: 10.11.12.215, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph6: {ansible_host: 10.11.12.216, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph7: {ansible_host: 10.11.12.217, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
+ ceph8: {ansible_host: 10.11.12.218, ansible_port: '22', ansible_user: debian, ansible_python_interpreter: '/usr/bin/python3'}
diff --git a/swh/objstorage/tests/winery/inventory/osd.yml b/swh/objstorage/tests/winery/inventory/osd.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/inventory/osd.yml
@@ -0,0 +1,9 @@
+---
+osd:
+ hosts:
+ ceph3:
+ ceph4:
+ ceph5:
+ ceph6:
+ ceph7:
+ ceph8:
diff --git a/swh/objstorage/tests/winery/osd.yml b/swh/objstorage/tests/winery/osd.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/osd.yml
@@ -0,0 +1,38 @@
+---
+
+- hosts: osd
+ become: true
+
+ tasks:
+
+ - name: add host
+ shell: |
+ ceph orch host add {{ inventory_hostname }}
+ delegate_to: ceph1
+
+- hosts: osd
+ become: true
+
+ tasks:
+
+ - name: wait for host
+ shell: |
+ ceph orch host ls | grep '^{{ inventory_hostname }} '
+ delegate_to: ceph1
+ register: host
+ until: host is success
+ retries: 30
+ delay: 5
+
+- hosts: osd
+ become: true
+
+ tasks:
+
+ # the desired side effect here is twofold
+ # * device zap blocks until the osd daemon is ready on the target host
+ # * on grid5000 /dev/sdc needs to be applied
+ - name: zap /dev/sdc
+ shell: |
+ ceph orch device zap {{ inventory_hostname }} /dev/sdc --force || true
+ delegate_to: ceph1
diff --git a/swh/objstorage/tests/winery/requirements.txt b/swh/objstorage/tests/winery/requirements.txt
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/requirements.txt
@@ -0,0 +1 @@
+ansible
diff --git a/swh/objstorage/tests/winery/rng.xml b/swh/objstorage/tests/winery/rng.xml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/rng.xml
@@ -0,0 +1,5 @@
+ <rng model='virtio'>
+ <backend model='random'>/dev/urandom</backend>
+ <alias name='rng0'/>
+ <address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/>
+ </rng>
diff --git a/swh/objstorage/tests/winery/rw.yml b/swh/objstorage/tests/winery/rw.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/rw.yml
@@ -0,0 +1,110 @@
+---
+- name: install and configure Read Write Storage
+ hosts: rw
+ become: true
+
+ pre_tasks:
+
+ - name: zap attached disks
+ shell: |
+ for disk in {{ rw_disk1 }} {{ rw_disk2 }} ; do
+ dd if=/dev/zero of=$disk count=100 bs=1024k
+ done
+ touch /etc/zapped.done
+ args:
+ creates: /etc/zapped.done
+
+ - name: apt install lvm2
+ apt:
+ name:
+ - lvm2
+
+ - name: vgcreate pg
+ lvg:
+ vg: pg
+ pvs: "{{ rw_disk1 }},{{ rw_disk2 }}"
+
+ - name: lvcreate pg
+ lvol:
+ vg: pg
+ lv: pg
+ size: +100%FREE
+
+ - name: mkfs /dev/mapper/pg-pg
+ filesystem:
+ fstype: ext4
+# force: yes
+ dev: /dev/mapper/pg-pg
+
+ - name: mkdir /var/lib/postgresql
+ file:
+ path: /var/lib/postgresql
+ state: directory
+ mode: 755
+
+ - name: mount /var/lib/postgresql
+ mount:
+ path: /var/lib/postgresql
+ src: /dev/mapper/pg-pg
+ fstype: ext4
+ state: mounted
+
+ - name: apt install postgres
+ apt:
+ name:
+ - postgresql
+ - postgresql-contrib
+ - libpq-dev
+ - python3-psycopg2
+ - acl
+
+ - name: postgresql.conf max_connections = 1000
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '^max_connections'
+ line: "max_connections = 1000"
+
+ #
+ # https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server
+ #
+ - name: postgresql.conf shared_buffers
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '^shared_buffers'
+ # 1/4 RAM
+ line: "shared_buffers = {{ postgres_shared_buffers }}"
+
+ - name: postgresql.conf effective_cache_size
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '.*effective_cache_size'
+ # 1/2 RAM
+ line: "effective_cache_size = {{ postgres_effective_cache_size }}"
+
+ - name: postgresql.conf random_page_cost
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ regexp: '.*random_page_cost'
+ line: "random_page_cost = 2.0"
+
+ - name: listen on *
+ lineinfile:
+ path: /etc/postgresql/13/main/postgresql.conf
+ line: "listen_addresses = '*'"
+
+ - name: allow all connexions
+ lineinfile:
+ path: /etc/postgresql/13/main/pg_hba.conf
+ line: "host all all 0.0.0.0/0 trust"
+
+ - name: systemctl restart postgresql
+ service:
+ name: postgresql
+ state: restarted
+
+ - name: pg user testuser/testpassword
+ postgresql_user:
+ name: testuser
+ password: testpassword
+ role_attr_flags: SUPERUSER
+ become_user: postgres
diff --git a/swh/objstorage/tests/winery/tests.yml b/swh/objstorage/tests/winery/tests.yml
new file mode 100644
--- /dev/null
+++ b/swh/objstorage/tests/winery/tests.yml
@@ -0,0 +1,28 @@
+- name: install test environment
+ hosts: mon
+
+ pre_tasks:
+
+ - name: apt install
+ apt:
+ name:
+ - emacs-nox
+ - gcc
+ - libcap-dev
+ - libcmph-dev
+ - libpq-dev
+ - postgresql-client-common
+ - postgresql-13
+ - python3-pip
+ - python3-rbd
+ - rsync
+ - tmux
+ - virtualenv
+ become: true
+
+ - name: configure venv
+ shell: |
+ virtualenv venv
+ venv/bin/pip3 install tox
+ args:
+ creates: venv
diff --git a/tox.ini b/tox.ini
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@
commands =
pytest --cov={envsitepackagesdir}/swh/objstorage \
{envsitepackagesdir}/swh/objstorage \
- --cov-branch {posargs}
+ --cov-branch --cov-report term-missing {posargs}
[testenv:black]
skip_install = true
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Jul 2, 10:35 AM (2 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3228132
Attached To
D6796: winery: basic implementation of the backend
Event Timeline
Log In to Comment