-- Discovery of metadata during a listing or a, loading, depositt or external_catalog of an origin
-- also provides a translation to a defined json schema using a translation tool (indexer_configuration_id)
create table origin_metadata_history(
origin_id bigint not null,
discovery_date timestamptz not null,
translation_date timestamptz,
provenance_type text not null, -- TODO use an enum (?)
raw_metadata jsonb not null,
translated_metadata jsonb,
indexer_configuration_id bigint,
object_id bigserial not null -- PK object identifier
);
comment on table origin_metadata_history is 'keeps latest metadata concerning an origin';
comment on column origin_metadata_history.origin_id is 'the origin id for which the metadata was found';
comment on column origin_metadata_history.discovery_date is 'the date of retrieval';
comment on column origin_metadata_history.translation_date is 'the date of translation';
comment on column origin_metadata_history.provenance_type is 'lister, publisher, etc';
comment on column origin_metadata_history.raw_metadata is 'metadata in json format but with original terms';
comment on column origin_metadata_history.translated_metadata is 'metadata in defined terms in json schema';
comment on column origin_metadata_history.indexer_configuration_id is 'tool used for translation';
-- Materialized view of origin_metadata_history, storing the *current* value of
-- metadata, as last seen by SWH for a given origin.
create table origin_metadata(
origin_idid bigintserial not null,primary key-- PK object identifier
discovery_dateorigin_id timestamptzbigint not null references origin(id),
translation_datedate timestamptz not null,
provenance_type text not null, -- TODO use an enum (?)
raw_metadata jsonb not null,
translated_metadata jsonb,
indexer_configuration_id bigint
);
-- The external_metadata table keeps metadata found on an external catalog
-- libraries.io for example
create table external_metadata(
provenance text not null, -- PK, url
discovery_date timestamptz not null, -- PK
translated_date timestamptz ,
origin_id bigint references origin(id), -- can be null? metadata found about an origin ?
raw_metadata jsonb not null,
translated_metadata jsonb,
indexer_configuration_id bigint,
);
-- lists all metadata files in a revision
create table revision_metadata(
revision_id sha1_git primary key references revision(id),
content_entries bigint[], -- files identified in the revision as potentialy metadata
readme_entry sha1, -- IDEA: diret entry point for readme file in a revision
licence_entry sha1, -- IDEA: direct entry point for LICENSE file in a revision
-- redundant with content_metadata-- NOTES:
translated_metadata jsonb -- by file? aggregation of the content_metadata?
-- maybe this isn't needed as we do a content_metadata translation
-- and we can aggregate on the fly the content_entries
-- from the content metadata table-- translation_date is not needed because we wish to translate on the fly
indexer_configuration_id bigint references indexer_configuration(id),
-- not needed if translated_metadata is deleted
);-- having origin_metadata and origin_metadata_history tables is redundant and inefficient