create domain swhid as text check (value ~ '^swh:[0-9]+:.*');
create table datastore
@@ -14,6 +18,11 @@
comment on column datastore.class is 'For datastores with multiple backends, name of the backend (postgresql/cassandra for storage, kafka for journal, pathslicer/azure/winery/... for objstorage)';
comment on column datastore.instance is 'Human-readable way to uniquely identify the datastore; eg. its URL or DSN.';
+
+-------------------------------------
+-- Inventory of objects with issues
+-------------------------------------
+
create table corrupt_object
(
id swhid not null,
@@ -27,6 +36,37 @@
comment on column corrupt_object.object is 'Corrupt object, as found in the datastore (possibly msgpack-encoded, using the journal''s serializer)';
comment on column corrupt_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
+
+create table missing_object
+(
+ id swhid not null,
+ datastore int not null,
+ first_occurrence timestamptz not null default now()
+);
+
+comment on table missing_object is 'Each row identifies an object that are missing but referenced by another object (aka "holes")';
+comment on column missing_object.datastore is 'Datastore where the hole is.';
+comment on column missing_object.first_occurrence is 'Moment the object was found to be corrupt for the first time';
+
+create table missing_object_reference
+(
+ missing_id swhid not null,
+ reference_id swhid not null,
+ datastore int not null,
+ first_occurrence timestamptz not null default now()
+);
+
+comment on table missing_object_reference is 'Each row identifies an object that points to an object that does not exist (aka a "hole")';
+comment on column missing_object_reference.missing_id is 'SWHID of the missing object.';
+comment on column missing_object_reference.reference_id is 'SWHID of the object referencing the missing object.';
+comment on column missing_object_reference.datastore is 'Datastore where the referencing object is.';
+comment on column missing_object_reference.first_occurrence is 'Moment the object was found to reference a missing object';
+create unique index concurrently missing_object_reference_missing_id_reference_id_datastore on missing_object_reference(missing_id, reference_id, datastore);
+create unique index concurrently missing_object_reference_reference_id_missing_id_datastore on missing_object_reference(reference_id, missing_id, datastore);
+
+-------------------------------------
+-- Issue resolution
+-------------------------------------
+
-- object_origin
create unique index concurrently object_origin_pkey on object_origin (object_id, origin_url);