Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7342987
D219.id718.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D219.id718.diff
View Options
diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql
--- a/sql/swh-indexes.sql
+++ b/sql/swh-indexes.sql
@@ -272,3 +272,17 @@
alter table content_fossology_license add constraint content_fossology_license_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey;
+
+-- content_metadata
+create unique index concurrently content_metadata_pkey on content_metadata(id, indexer_configuration_id);
+alter table content_metadata add primary key using index content_metadata_pkey;
+
+alter table content_metadata add constraint content_metadata_id_fkey foreign key (id) references content(sha1) not valid;
+alter table content_metadata validate constraint content_metadata_id_fkey;
+
+alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid;
+alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey;
+
+-- origin_metadata_history and origin_metadata
+-- TODO PK: origin_id, discovery_date
+-- TODO FK: origin_id, indexer_configuration_id
diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql
--- a/sql/swh-schema.sql
+++ b/sql/swh-schema.sql
@@ -463,3 +463,54 @@
comment on column content_fossology_license.id is 'Raw content identifier';
comment on column content_fossology_license.license_id is 'One of the content''s license identifier';
comment on column content_fossology_license.indexer_configuration_id is 'Tool used to compute the information';
+
+
+-- The table content_metadata provides a translation to files
+-- identified as potentially containning metadata with a translation tool (indexer_configuration_id)
+create table content_metadata(
+ id sha1 not null,
+ translated_metadata jsonb not null,
+ indexer_configuration_id bigint not null
+);
+
+comment on table content_metadata is 'metadata semantically translated from a content file';
+comment on column content_metadata.id is 'sha1 of content file';
+comment on column content_metadata.translated_metadata is 'result of translation with defined format';
+comment on column content_metadata.indexer_configuration_id is 'tool used for translation';
+
+
+-- Discovery of metadata during a listing or a deposit of an origin
+-- also provides a translation to a defined json schema using a translation tool (indexer_configuration_id)
+create table origin_metadata_history(
+ origin_id bigint not null,
+ discovery_date timestamptz not null,
+ translation_date timestamptz,
+ provenance_type text not null, -- TODO use an enum (?)
+ raw_metadata jsonb not null,
+ translated_metadata jsonb,
+ indexer_configuration_id bigint,
+ object_id bigserial -- short object identifier
+);
+
+
+comment on table origin_metadata_history is 'keeps latest metadata concerning an origin';
+comment on column origin_metadata_history.origin_id is 'the origin id for which the metadata was found';
+comment on column origin_metadata_history.discovery_date is 'the date of retrieval';
+comment on column origin_metadata_history.translation_date is 'the date of translation';
+comment on column origin_metadata_history.provenance_type is 'lister, publisher, etc'
+comment on column origin_metadata_history.raw_metadata is 'metadata in json format but with original terms';
+comment on column origin_metadata_history.translated_metadata is 'metadata in defined terms in json schema';
+comment on column origin_metadata_history.indexer_configuration_id is 'tool used for translation';
+
+
+-- Materialized view of origin_metadata_history, storing the *current* value of
+-- metadata, as last seen by SWH.
+create table origin_metadata(
+ origin_id bigint not null,
+ discovery_date timestamptz not null,
+ translation_date timestamptz,
+ provenance_type text not null, -- TODO use an enum (?)
+ raw_metadata jsonb not null,
+ translated_metadata jsonb,
+ indexer_configuration_id bigint,
+);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:33 PM (7 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3226607
Attached To
D219: Added content_metadata logic to the storage
Event Timeline
Log In to Comment