diff --git a/README.rst b/README.rst
new file mode 120000
--- /dev/null
+++ b/README.rst
@@ -0,0 +1 @@
+docs/README.rst
\ No newline at end of file
diff --git a/conftest.py b/conftest.py
new file mode 100644
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1 @@
+pytest_plugins = ["swh.storage.pytest_plugin", "swh.core.db.pytest_plugin"]
diff --git a/docs/README.rst b/docs/README.rst
--- a/docs/README.rst
+++ b/docs/README.rst
@@ -3,3 +3,37 @@
 
 Tools to periodically checks data integrity in swh-storage and swh-objstorage,
 reports errors, and (try to) fix them.
+
+This is a work in progress; some of the components described below do not
+exist yet (cassandra storage checker, objstorage checker, recovery, and reinjection)
+
+The Scrubber package is made of the following parts:
+
+
+Checking
+--------
+
+Highly parallel processes continuously read objects from a data store,
+compute checksums, and write any failure in a database, along with the data of
+the corrupt object.
+
+There is one "checker" for each datastore package: storage (postgresql and cassandra),
+journal (kafka), and objstorage.
+
+
+Recovery
+--------
+
+Then, from time to time, jobs go through the list of known corrupt objects,
+and try to recover the original objects, through various means:
+
+* Brute-forcing variations until they match their checksum
+* Recovering from another data store
+* As a last resort, recovering from known origins, if any
+
+
+Reinjection
+-----------
+
+Finally, when an original object is recovered, it is reinjected in the original
+data store, replacing the corrupt one.
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,2 +1,5 @@
 # Add here internal Software Heritage dependencies, one per line.
 swh.core[http] >= 0.3  # [http] is required by swh.core.pytest_plugin
+swh.model >= 5.0.0
+swh.storage >= 1.1.0
+swh.journal >= 0.9.0
diff --git a/requirements-test.txt b/requirements-test.txt
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1 +1,4 @@
 pytest < 7.0.0  # v7.0.0 removed _pytest.tmpdir.TempdirFactory, which is used by some of the pytest plugins we use
+pytest-mock
+pyyaml
+types-pyyaml
diff --git a/swh/scrubber/__init__.py b/swh/scrubber/__init__.py
--- a/swh/scrubber/__init__.py
+++ b/swh/scrubber/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from .db import ScrubberDb
+
+
+def get_scrubber_db(cls: str, **kwargs) -> ScrubberDb:
+    if cls != "local":
+        raise ValueError(f"Unknown scrubber db class '{cls}', use 'local' instead.")
+
+    from .db import ScrubberDb
+
+    return ScrubberDb.connect(kwargs.pop("db"), **kwargs)
+
+
+get_datastore = get_scrubber_db
diff --git a/swh/scrubber/bar.py b/swh/scrubber/bar.py
deleted file mode 100644
--- a/swh/scrubber/bar.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (C) 2019  The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
diff --git a/swh/scrubber/check_storage.py b/swh/scrubber/check_storage.py
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/check_storage.py
@@ -0,0 +1,91 @@
+# Copyright (C) 2021-2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+"""Reads all objects in a swh-storage instance and recomputes their checksums."""
+
+import contextlib
+import dataclasses
+import logging
+from typing import Iterable, Union
+
+from swh.journal.serializers import value_to_kafka
+from swh.model.model import Directory, Release, Revision, Snapshot
+from swh.storage import backfill
+from swh.storage.interface import StorageInterface
+from swh.storage.postgresql.storage import Storage as PostgresqlStorage
+
+from .db import Datastore, ScrubberDb
+
+logger = logging.getLogger(__name__)
+
+ScrubbableObject = Union[Revision, Release, Snapshot, Directory]
+
+
+@contextlib.contextmanager
+def storage_db(storage):
+    db = storage.get_db()
+    try:
+        yield db
+    finally:
+        storage.put_db(db)
+
+
+@dataclasses.dataclass
+class StorageChecker:
+    db: ScrubberDb
+    storage: StorageInterface
+    object_type: str
+    start_object: str
+    end_object: str
+
+    _datastore = None
+
+    def datastore_info(self) -> Datastore:
+        if self._datastore is None:
+            if isinstance(self.storage, PostgresqlStorage):
+                with storage_db(self.storage) as db:
+                    self._datastore = Datastore(
+                        package="storage", cls="postgresql", instance=db.conn.dsn,
+                    )
+            else:
+                raise NotImplementedError(
+                    f"StorageChecker(storage={self.storage!r}).datastore()"
+                )
+        return self._datastore
+
+    def check_storage(self):
+        if isinstance(self.storage, PostgresqlStorage):
+            with storage_db(self.storage) as db:
+                return self._check_postgresql(db)
+        else:
+            raise NotImplementedError(
+                f"StorageChecker(storage={self.storage!r}).check_storage()"
+            )
+
+    def _check_postgresql(self, db):
+        for range_start, range_end in backfill.RANGE_GENERATORS[self.object_type](
+            self.start_object, self.end_object
+        ):
+            logger.info(
+                "Processing %s range %s to %s",
+                self.object_type,
+                backfill._format_range_bound(range_start),
+                backfill._format_range_bound(range_end),
+            )
+
+            objects = backfill.fetch(
+                db, self.object_type, start=range_start, end=range_end
+            )
+            objects = list(objects)
+
+            self.process_objects(objects)
+
+    def process_objects(self, objects: Iterable[ScrubbableObject]):
+        for object_ in objects:
+            real_id = object_.compute_hash()
+            if object_.id != real_id:
+                self.db.corrupt_object_add(
+                    self.datastore_info(), object_, value_to_kafka(object_.to_dict())
+                )
diff --git a/swh/scrubber/cli.py b/swh/scrubber/cli.py
--- a/swh/scrubber/cli.py
+++ b/swh/scrubber/cli.py
@@ -1,3 +1,10 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import os
+
 import click
 
 from swh.core.cli import CONTEXT_SETTINGS
@@ -5,7 +12,82 @@
 
 
 @swh_cli_group.group(name="scrubber", context_settings=CONTEXT_SETTINGS)
+@click.option(
+    "--config-file",
+    "-C",
+    default=None,
+    type=click.Path(exists=True, dir_okay=False,),
+    help="Configuration file.",
+)
+@click.pass_context
+def scrubber_cli_group(ctx, config_file):
+    """main command group of the datastore scrubber
+    """
+    from swh.core import config
+
+    from . import get_scrubber_db
+
+    if not config_file:
+        config_file = os.environ.get("SWH_CONFIG_FILENAME")
+
+    if config_file:
+        if not os.path.exists(config_file):
+            raise ValueError("%s does not exist" % config_file)
+        conf = config.read(config_file)
+    else:
+        conf = {}
+
+    if "scrubber_db" not in conf:
+        ctx.fail("You must have a scrubber_db configured in your config file.")
+
+    ctx.ensure_object(dict)
+    ctx.obj["config"] = conf
+    ctx.obj["db"] = get_scrubber_db(**conf["scrubber_db"])
+
+
+@scrubber_cli_group.group(name="check")
 @click.pass_context
-def scrubber_cli_group(ctx):
-    """main command of the datastore scrubber
+def scrubber_check_cli_group(ctx):
+    """group of commands which read from data stores and report errors.
     """
+    pass
+
+
+@scrubber_check_cli_group.command(name="storage")
+@click.option(
+    "--object-type",
+    type=click.Choice(
+        # use a hardcoded list to prevent having to load the
+        # replay module at cli loading time
+        [
+            "snapshot",
+            "revision",
+            "release",
+            "directory",
+            # TODO:
+            # "raw_extrinsic_metadata",
+            # "extid",
+        ]
+    ),
+)
+@click.option("--start-object", default="0" * 40)
+@click.option("--end-object", default="f" * 40)
+@click.pass_context
+def scrubber_check_storage(ctx, object_type: str, start_object: str, end_object: str):
+    conf = ctx.obj["config"]
+    if "storage" not in conf:
+        ctx.fail("You must have a storage configured in your config file.")
+
+    from swh.storage import get_storage
+
+    from .check_storage import StorageChecker
+
+    checker = StorageChecker(
+        db=ctx.obj["db"],
+        storage=get_storage(**conf["storage"]),
+        object_type=object_type,
+        start_object=start_object,
+        end_object=end_object,
+    )
+
+    checker.check_storage()
diff --git a/swh/scrubber/db.py b/swh/scrubber/db.py
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/db.py
@@ -0,0 +1,89 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+
+import dataclasses
+import datetime
+import functools
+from typing import Iterator, Union
+
+from swh.core.db import BaseDb
+from swh.model.model import Content, Directory, Release, Revision, Snapshot
+from swh.model.swhids import CoreSWHID
+
+
+@dataclasses.dataclass(frozen=True)
+class Datastore:
+    package: str
+    cls: str
+    instance: str
+
+
+@dataclasses.dataclass(frozen=True)
+class CorruptObject:
+    id: CoreSWHID
+    datastore: Datastore
+    first_occurrence: datetime.datetime
+    object_: bytes
+
+
+class ScrubberDb(BaseDb):
+    current_version = 1
+
+    @functools.lru_cache(1000)
+    def datastore_get_or_add(self, datastore: Datastore) -> int:
+        """Creates a datastore if it does not exist, and returns its id."""
+        cur = self.cursor()
+        cur.execute(
+            """
+            INSERT INTO datastore (package, class, instance)
+            VALUES (%s, %s, %s)
+            ON CONFLICT DO NOTHING
+            RETURNING id
+            """,
+            (datastore.package, datastore.cls, datastore.instance),
+        )
+        (id_,) = cur.fetchone()
+        return id_
+
+    def corrupt_object_add(
+        self,
+        datastore: Datastore,
+        object_: Union[Content, Directory, Revision, Release, Snapshot],
+        serialized_object: bytes,
+    ) -> None:
+        datastore_id = self.datastore_get_or_add(datastore)
+        cur = self.cursor()
+        cur.execute(
+            """
+            INSERT INTO corrupt_object (id, datastore, object)
+            VALUES (%s, %s, %s)
+            ON CONFLICT DO NOTHING
+            """,
+            (str(object_.swhid()), datastore_id, serialized_object),
+        )
+
+    def corrupt_object_iter(self) -> Iterator[CorruptObject]:
+        cur = self.cursor()
+        cur.execute(
+            """
+            SELECT
+                co.id, co.first_occurrence, co.object,
+                ds.package, ds.class, ds.instance
+            FROM corrupt_object AS co
+            INNER JOIN datastore AS ds ON (ds.id=co.datastore)
+            """
+        )
+
+        for row in cur:
+            (id, first_occurrence, object_, ds_package, ds_class, ds_instance) = row
+            yield CorruptObject(
+                id=CoreSWHID.from_string(id),
+                first_occurrence=first_occurrence,
+                object_=object_,
+                datastore=Datastore(
+                    package=ds_package, cls=ds_class, instance=ds_instance
+                ),
+            )
diff --git a/swh/scrubber/sql/20-enums.sql b/swh/scrubber/sql/20-enums.sql
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/sql/20-enums.sql
@@ -0,0 +1 @@
+create type datastore_type as enum ('storage', 'journal', 'objstorage');
diff --git a/swh/scrubber/sql/30-schema.sql b/swh/scrubber/sql/30-schema.sql
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/sql/30-schema.sql
@@ -0,0 +1,28 @@
+create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
+
+create table datastore
+(
+  id                bigserial not null,
+  package           datastore_type not null,
+  class             text,
+  instance          text
+);
+
+comment on table datastore is 'Each row identifies a data store being scrubbed';
+comment on column datastore.id is 'Internal identifier of the datastore';
+comment on column datastore.package is 'Name of the component using this datastore (storage/journal/objstorage)';
+comment on column datastore.class is 'For datastores with multiple backends, name of the backend (postgresql/cassandra for storage, kafka for journal, pathslicer/azure/winery/... for objstorage)';
+comment on column datastore.instance is 'Human-readable way to uniquely identify the datastore; eg. its URL or DSN.';
+
+create table corrupt_object
+(
+  id                    swhid not null,
+  datastore             int not null,
+  first_occurrence      timestamptz not null default now(),
+  object                bytea not null
+);
+
+comment on table corrupt_object is 'Each row identifies an object that was found to be corrupt';
+comment on column corrupt_object.datastore is 'Datastore the corrupt object was found in.';
+comment on column corrupt_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
+comment on column corrupt_object.object is 'Corrupt object, as found in the datastore (possibly msgpack-encoded, using the journal''s serializer)';
diff --git a/swh/scrubber/sql/60-indexes.sql b/swh/scrubber/sql/60-indexes.sql
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/sql/60-indexes.sql
@@ -0,0 +1,13 @@
+-- datastore
+
+create unique index concurrently datastore_pkey on datastore(id);
+alter table datastore add primary key using index datastore_pkey;
+
+create unique index concurrently datastore_package_class_instance on datastore(package, class, instance);
+
+-- corrupt_object
+
+alter table corrupt_object add constraint corrupt_object_datastore_fkey foreign key (datastore) references datastore(id) not valid;
+alter table corrupt_object validate constraint corrupt_object_datastore_fkey;
+
+create unique index corrupt_object_pkey on corrupt_object(id, datastore);
diff --git a/swh/scrubber/tests/conftest.py b/swh/scrubber/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/tests/conftest.py
@@ -0,0 +1,28 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from functools import partial
+
+import pytest
+from pytest_postgresql import factories
+
+from swh.core.db.pytest_plugin import initialize_database_for_module, postgresql_fact
+from swh.scrubber.db import ScrubberDb
+
+scrubber_postgresql_proc = factories.postgresql_proc(
+    dbname="scrubber",
+    load=[partial(initialize_database_for_module, modname="scrubber", version=1)],
+)
+
+postgresql_scrubber = postgresql_fact("scrubber_postgresql_proc")
+
+
+@pytest.fixture
+def scrubber_db(postgresql_scrubber):
+    db = ScrubberDb(postgresql_scrubber)
+    with db.conn.cursor() as cur:
+        cur.execute("TRUNCATE TABLE corrupt_object")
+        cur.execute("TRUNCATE TABLE datastore CASCADE")
+    yield db
diff --git a/swh/scrubber/tests/test_cli.py b/swh/scrubber/tests/test_cli.py
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/tests/test_cli.py
@@ -0,0 +1,64 @@
+# Copyright (C) 2020-2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import copy
+import tempfile
+from unittest.mock import MagicMock
+
+from click.testing import CliRunner
+import yaml
+
+from swh.scrubber.check_storage import storage_db
+from swh.scrubber.cli import scrubber_cli_group
+
+CLI_CONFIG = {
+    "storage": {
+        "cls": "postgresql",
+        "db": "<replaced at runtime>",
+        "objstorage": {"cls": "memory"},
+    },
+    "scrubber_db": {"cls": "local", "db": "<replaced at runtime>"},
+}
+
+
+def invoke(swh_storage, scrubber_db, args):
+    runner = CliRunner()
+
+    config = copy.deepcopy(CLI_CONFIG)
+    with storage_db(swh_storage) as db:
+        config["storage"]["db"] = db.conn.dsn
+
+    config["scrubber_db"]["db"] = scrubber_db.conn.dsn
+
+    with tempfile.NamedTemporaryFile("a", suffix=".yml") as config_fd:
+        yaml.dump(config, config_fd)
+        config_fd.seek(0)
+        args = ["-C" + config_fd.name] + list(args)
+        result = runner.invoke(scrubber_cli_group, args, catch_exceptions=False)
+    return result
+
+
+def test_check_storage(swh_storage, mocker, scrubber_db):
+    storage_checker = MagicMock()
+    StorageChecker = mocker.patch(
+        "swh.scrubber.check_storage.StorageChecker", return_value=storage_checker
+    )
+    get_scrubber_db = mocker.patch(
+        "swh.scrubber.get_scrubber_db", return_value=scrubber_db
+    )
+    result = invoke(
+        swh_storage, scrubber_db, ["check", "storage", "--object-type=snapshot"]
+    )
+    assert result.exit_code == 0, result.output
+    assert result.output == ""
+
+    get_scrubber_db.assert_called_once_with(cls="local", db=scrubber_db.conn.dsn)
+    StorageChecker.assert_called_once_with(
+        db=scrubber_db,
+        storage=StorageChecker.mock_calls[0][2]["storage"],
+        object_type="snapshot",
+        start_object="0" * 40,
+        end_object="f" * 40,
+    )
diff --git a/swh/scrubber/tests/test_nothing.py b/swh/scrubber/tests/test_nothing.py
deleted file mode 100644
--- a/swh/scrubber/tests/test_nothing.py
+++ /dev/null
@@ -1,3 +0,0 @@
-def test_nothing():
-    # Placeholder; remove this when we add actual tests
-    pass
diff --git a/swh/scrubber/tests/test_storage_postgresql.py b/swh/scrubber/tests/test_storage_postgresql.py
new file mode 100644
--- /dev/null
+++ b/swh/scrubber/tests/test_storage_postgresql.py
@@ -0,0 +1,105 @@
+# Copyright (C) 2022  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import datetime
+import unittest.mock
+
+import attr
+import pytest
+
+from swh.journal.serializers import kafka_to_value
+from swh.model import swhids
+from swh.model.tests import swh_model_data
+from swh.scrubber.check_storage import StorageChecker
+from swh.storage.backfill import byte_ranges
+
+# decorator to make swh.storage.backfill use less ranges, so tests run faster
+patch_byte_ranges = unittest.mock.patch(
+    "swh.storage.backfill.byte_ranges",
+    lambda numbits, start, end: byte_ranges(numbits // 8, start, end),
+)
+
+
+@patch_byte_ranges
+def test_no_corruption(scrubber_db, swh_storage):
+    swh_storage.directory_add(swh_model_data.DIRECTORIES)
+    swh_storage.revision_add(swh_model_data.REVISIONS)
+    swh_storage.release_add(swh_model_data.RELEASES)
+    swh_storage.snapshot_add(swh_model_data.SNAPSHOTS)
+
+    for object_type in ("snapshot", "release", "revision", "directory"):
+        StorageChecker(
+            db=scrubber_db,
+            storage=swh_storage,
+            object_type=object_type,
+            start_object="00" * 20,
+            end_object="ff" * 20,
+        ).check_storage()
+
+    assert list(scrubber_db.corrupt_object_iter()) == []
+
+
+@pytest.mark.parametrize("corrupt_idx", range(len(swh_model_data.SNAPSHOTS)))
+@patch_byte_ranges
+def test_corrupt_snapshot(scrubber_db, swh_storage, corrupt_idx):
+    snapshots = list(swh_model_data.SNAPSHOTS)
+    snapshots[corrupt_idx] = attr.evolve(snapshots[corrupt_idx], id=b"\x00" * 20)
+    swh_storage.snapshot_add(snapshots)
+
+    before_date = datetime.datetime.now(tz=datetime.timezone.utc)
+    for object_type in ("snapshot", "release", "revision", "directory"):
+        StorageChecker(
+            db=scrubber_db,
+            storage=swh_storage,
+            object_type=object_type,
+            start_object="00" * 20,
+            end_object="ff" * 20,
+        ).check_storage()
+    after_date = datetime.datetime.now(tz=datetime.timezone.utc)
+
+    corrupt_objects = list(scrubber_db.corrupt_object_iter())
+    assert len(corrupt_objects) == 1
+    assert corrupt_objects[0].id == swhids.CoreSWHID.from_string(
+        "swh:1:snp:0000000000000000000000000000000000000000"
+    )
+    assert corrupt_objects[0].datastore.package == "storage"
+    assert corrupt_objects[0].datastore.cls == "postgresql"
+    assert corrupt_objects[0].datastore.instance.startswith(
+        "user=postgres password=xxx dbname=storage host="
+    )
+    assert (
+        before_date - datetime.timedelta(seconds=5)
+        <= corrupt_objects[0].first_occurrence
+        <= after_date + datetime.timedelta(seconds=5)
+    )
+    assert (
+        kafka_to_value(corrupt_objects[0].object_) == snapshots[corrupt_idx].to_dict()
+    )
+
+
+@patch_byte_ranges
+def test_corrupt_snapshots(scrubber_db, swh_storage):
+    snapshots = list(swh_model_data.SNAPSHOTS)
+    for i in (0, 1):
+        snapshots[i] = attr.evolve(snapshots[i], id=bytes([i]) * 20)
+    swh_storage.snapshot_add(snapshots)
+
+    StorageChecker(
+        db=scrubber_db,
+        storage=swh_storage,
+        object_type="snapshot",
+        start_object="00" * 20,
+        end_object="ff" * 20,
+    ).check_storage()
+
+    corrupt_objects = list(scrubber_db.corrupt_object_iter())
+    assert len(corrupt_objects) == 2
+    assert {co.id for co in corrupt_objects} == {
+        swhids.CoreSWHID.from_string(swhid)
+        for swhid in [
+            "swh:1:snp:0000000000000000000000000000000000000000",
+            "swh:1:snp:0101010101010101010101010101010101010101",
+        ]
+    }