Page MenuHomeSoftware Heritage

D219.id718.diff
No OneTemporary

D219.id718.diff

diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -272,3 +272,17 @@
alter table content_fossology_license add constraint content_fossology_license_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey;
+
+-- content_metadata
+create unique index concurrently content_metadata_pkey on content_metadata(id, indexer_configuration_id);
+alter table content_metadata add primary key using index content_metadata_pkey;
+
+alter table content_metadata add constraint content_metadata_id_fkey foreign key (id) references content(sha1) not valid;
+alter table content_metadata validate constraint content_metadata_id_fkey;
+
+alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey;
+
+-- origin_metadata_history and origin_metadata
+-- TODO PK: origin_id, discovery_date
+-- TODO FK: origin_id, indexer_configuration_id
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -463,3 +463,54 @@
comment on column content_fossology_license.id is 'Raw content identifier';
comment on column content_fossology_license.license_id is 'One of the content''s license identifier';
comment on column content_fossology_license.indexer_configuration_id is 'Tool used to compute the information';
+
+
+-- The table content_metadata provides a translation to files
+-- identified as potentially containning metadata with a translation tool (indexer_configuration_id)
+create table content_metadata(
+ id sha1 not null,
+ translated_metadata jsonb not null,
+ indexer_configuration_id bigint not null
+);
+
+comment on table content_metadata is 'metadata semantically translated from a content file';
+comment on column content_metadata.id is 'sha1 of content file';
+comment on column content_metadata.translated_metadata is 'result of translation with defined format';
+comment on column content_metadata.indexer_configuration_id is 'tool used for translation';
+
+
+-- Discovery of metadata during a listing or a deposit of an origin
+-- also provides a translation to a defined json schema using a translation tool (indexer_configuration_id)
+create table origin_metadata_history(
+ origin_id bigint not null,
+ discovery_date timestamptz not null,
+ translation_date timestamptz,
+ provenance_type text not null, -- TODO use an enum (?)
+ raw_metadata jsonb not null,
+ translated_metadata jsonb,
+ indexer_configuration_id bigint,
+ object_id bigserial -- short object identifier
+);
+
+
+comment on table origin_metadata_history is 'keeps latest metadata concerning an origin';
+comment on column origin_metadata_history.origin_id is 'the origin id for which the metadata was found';
+comment on column origin_metadata_history.discovery_date is 'the date of retrieval';
+comment on column origin_metadata_history.translation_date is 'the date of translation';
+comment on column origin_metadata_history.provenance_type is 'lister, publisher, etc'
+comment on column origin_metadata_history.raw_metadata is 'metadata in json format but with original terms';
+comment on column origin_metadata_history.translated_metadata is 'metadata in defined terms in json schema';
+comment on column origin_metadata_history.indexer_configuration_id is 'tool used for translation';
+
+
+-- Materialized view of origin_metadata_history, storing the *current* value of
+-- metadata, as last seen by SWH.
+create table origin_metadata(
+ origin_id bigint not null,
+ discovery_date timestamptz not null,
+ translation_date timestamptz,
+ provenance_type text not null, -- TODO use an enum (?)
+ raw_metadata jsonb not null,
+ translated_metadata jsonb,
+ indexer_configuration_id bigint,
+);

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 6:33 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226607

Event Timeline