diff --git a/sql/swh-indexes.sql b/sql/swh-indexes.sql index 5dcd1c73..34e532e7 100644 --- a/sql/swh-indexes.sql +++ b/sql/swh-indexes.sql @@ -1,301 +1,301 @@ -- content create unique index concurrently content_pkey on content(sha1); create unique index concurrently on content(sha1_git); create index concurrently on content(sha256); create index concurrently on content(blake2s256); create index concurrently on content(ctime); -- TODO use a BRIN index here (postgres >= 9.5) -create index concurrently on content(object_id); +create unique index concurrently on content(object_id); alter table content add primary key using index content_pkey; -- entity_history create unique index concurrently entity_history_pkey on entity_history(id); create index concurrently on entity_history(uuid); create index concurrently on entity_history(name); alter table entity_history add primary key using index entity_history_pkey; -- entity create unique index concurrently entity_pkey on entity(uuid); create index concurrently on entity(name); create index concurrently on entity using gin(lister_metadata jsonb_path_ops); alter table entity add primary key using index entity_pkey; alter table entity add constraint entity_parent_fkey foreign key (parent) references entity(uuid) deferrable initially deferred not valid; alter table entity validate constraint entity_parent_fkey; alter table entity add constraint entity_last_id_fkey foreign key (last_id) references entity_history(id) not valid; alter table entity validate constraint entity_last_id_fkey; -- entity_equivalence create unique index concurrently entity_equivalence_pkey on entity_equivalence(entity1, entity2); alter table entity_equivalence add primary key using index entity_equivalence_pkey; alter table entity_equivalence add constraint "entity_equivalence_entity1_fkey" foreign key (entity1) references entity(uuid) not valid; alter table entity_equivalence validate constraint entity_equivalence_entity1_fkey; alter table entity_equivalence add constraint "entity_equivalence_entity2_fkey" foreign key (entity2) references entity(uuid) not valid; alter table entity_equivalence validate constraint entity_equivalence_entity2_fkey; alter table entity_equivalence add constraint "order_entities" check (entity1 < entity2) not valid; alter table entity_equivalence validate constraint order_entities; -- listable_entity create unique index concurrently listable_entity_pkey on listable_entity(uuid); alter table listable_entity add primary key using index listable_entity_pkey; alter table listable_entity add constraint listable_entity_uuid_fkey foreign key (uuid) references entity(uuid) not valid; alter table listable_entity validate constraint listable_entity_uuid_fkey; -- list_history create unique index concurrently list_history_pkey on list_history(id); alter table list_history add primary key using index list_history_pkey; alter table list_history add constraint list_history_entity_fkey foreign key (entity) references listable_entity(uuid) not valid; alter table list_history validate constraint list_history_entity_fkey; -- origin create unique index concurrently origin_pkey on origin(id); alter table origin add primary key using index origin_pkey; create index concurrently on origin(type, url); alter table origin add constraint origin_lister_fkey foreign key (lister) references listable_entity(uuid) not valid; alter table origin validate constraint origin_lister_fkey; alter table origin add constraint origin_project_fkey foreign key (project) references entity(uuid) not valid; alter table origin validate constraint origin_project_fkey; -- skipped_content alter table skipped_content add constraint skipped_content_sha1_sha1_git_sha256_key unique (sha1, sha1_git, sha256); -create unique index concurrently on skipped_content(sha1); -create unique index concurrently on skipped_content(sha1_git); +create index concurrently on skipped_content(sha1); +create index concurrently on skipped_content(sha1_git); create index concurrently on skipped_content(sha256); create index concurrently on skipped_content(blake2s256); -create index concurrently on skipped_content(object_id); +create unique index concurrently on skipped_content(object_id); alter table skipped_content add constraint skipped_content_origin_fkey foreign key (origin) references origin(id) not valid; alter table skipped_content validate constraint skipped_content_origin_fkey; -- fetch_history create unique index concurrently fetch_history_pkey on fetch_history(id); alter table fetch_history add primary key using index fetch_history_pkey; alter table fetch_history add constraint fetch_history_origin_fkey foreign key (origin) references origin(id) not valid; alter table fetch_history validate constraint fetch_history_origin_fkey; -- directory create unique index concurrently directory_pkey on directory(id); alter table directory add primary key using index directory_pkey; create index concurrently on directory using gin (dir_entries); create index concurrently on directory using gin (file_entries); create index concurrently on directory using gin (rev_entries); -create index concurrently on directory(object_id); +create unique index concurrently on directory(object_id); -- directory_entry_dir create unique index concurrently directory_entry_dir_pkey on directory_entry_dir(id); alter table directory_entry_dir add primary key using index directory_entry_dir_pkey; create unique index concurrently on directory_entry_dir(target, name, perms); -- directory_entry_file create unique index concurrently directory_entry_file_pkey on directory_entry_file(id); alter table directory_entry_file add primary key using index directory_entry_file_pkey; create unique index concurrently on directory_entry_file(target, name, perms); -- directory_entry_rev create unique index concurrently directory_entry_rev_pkey on directory_entry_rev(id); alter table directory_entry_rev add primary key using index directory_entry_rev_pkey; create unique index concurrently on directory_entry_rev(target, name, perms); -- person create unique index concurrently person_pkey on person(id); alter table person add primary key using index person_pkey; create unique index concurrently on person(fullname); create index concurrently on person(name); create index concurrently on person(email); -- revision create unique index concurrently revision_pkey on revision(id); alter table revision add primary key using index revision_pkey; alter table revision add constraint revision_author_fkey foreign key (author) references person(id) not valid; alter table revision validate constraint revision_author_fkey; alter table revision add constraint revision_committer_fkey foreign key (committer) references person(id) not valid; alter table revision validate constraint revision_committer_fkey; create index concurrently on revision(directory); -create index concurrently on revision(object_id); +create unique index concurrently on revision(object_id); -- revision_history create unique index concurrently revision_history_pkey on revision_history(id, parent_rank); alter table revision_history add primary key using index revision_history_pkey; create index concurrently on revision_history(parent_id); alter table revision_history add constraint revision_history_id_fkey foreign key (id) references revision(id) not valid; alter table revision_history validate constraint revision_history_id_fkey; -- origin_visit create unique index concurrently origin_visit_pkey on origin_visit(origin, visit); alter table origin_visit add primary key using index origin_visit_pkey; create index concurrently on origin_visit(date); alter table origin_visit add constraint origin_visit_origin_fkey foreign key (origin) references origin(id) not valid; alter table origin_visit validate constraint origin_visit_origin_fkey; -- occurrence_history create unique index concurrently occurrence_history_pkey on occurrence_history(object_id); alter table occurrence_history add primary key using index occurrence_history_pkey; create index concurrently on occurrence_history(target, target_type); create index concurrently on occurrence_history(origin, branch); create unique index concurrently on occurrence_history(origin, branch, target, target_type); alter table occurrence_history add constraint occurrence_history_origin_fkey foreign key (origin) references origin(id) not valid; alter table occurrence_history validate constraint occurrence_history_origin_fkey; -- occurrence create unique index concurrently occurrence_pkey on occurrence(origin, branch); alter table occurrence add primary key using index occurrence_pkey; alter table occurrence add constraint occurrence_origin_fkey foreign key (origin) references origin(id) not valid; alter table occurrence validate constraint occurrence_origin_fkey; -- release create unique index concurrently release_pkey on release(id); alter table release add primary key using index release_pkey; create index concurrently on release(target, target_type); -create index concurrently on release(object_id); +create unique index concurrently on release(object_id); alter table release add constraint release_author_fkey foreign key (author) references person(id) not valid; alter table release validate constraint release_author_fkey; -- indexer_configuration create unique index concurrently indexer_configuration_pkey on indexer_configuration(id); alter table indexer_configuration add primary key using index indexer_configuration_pkey; create unique index on indexer_configuration(tool_name, tool_version, tool_configuration); -- content_mimetype create unique index concurrently content_mimetype_pkey on content_mimetype(id, indexer_configuration_id); alter table content_mimetype add primary key using index content_mimetype_pkey; alter table content_mimetype add constraint content_mimetype_id_fkey foreign key (id) references content(sha1) not valid; alter table content_mimetype validate constraint content_mimetype_id_fkey; alter table content_mimetype add constraint content_mimetype_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table content_mimetype validate constraint content_mimetype_indexer_configuration_id_fkey; -- content_language create unique index concurrently content_language_pkey on content_language(id, indexer_configuration_id); alter table content_language add primary key using index content_language_pkey; alter table content_language add constraint content_language_id_fkey foreign key (id) references content(sha1) not valid; alter table content_language validate constraint content_language_id_fkey; alter table content_language add constraint content_language_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table content_language validate constraint content_language_indexer_configuration_id_fkey; -- content_ctags create index concurrently on content_ctags(id); create index concurrently on content_ctags(hash_sha1(name)); create unique index concurrently on content_ctags(id, hash_sha1(name), kind, line, lang, indexer_configuration_id); alter table content_ctags add constraint content_ctags_id_fkey foreign key (id) references content(sha1) not valid; alter table content_ctags validate constraint content_ctags_id_fkey; alter table content_ctags add constraint content_ctags_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table content_ctags validate constraint content_ctags_indexer_configuration_id_fkey; -- fossology_license create unique index concurrently fossology_license_pkey on fossology_license(id); alter table fossology_license add primary key using index fossology_license_pkey; create unique index on fossology_license(name); -- content_fossology_license create unique index concurrently content_fossology_license_pkey on content_fossology_license(id, license_id, indexer_configuration_id); alter table content_fossology_license add primary key using index content_fossology_license_pkey; alter table content_fossology_license add constraint content_fossology_license_id_fkey foreign key (id) references content(sha1) not valid; alter table content_fossology_license validate constraint content_fossology_license_id_fkey; alter table content_fossology_license add constraint content_fossology_license_license_id_fkey foreign key (license_id) references fossology_license(id) not valid; alter table content_fossology_license validate constraint content_fossology_license_license_id_fkey; alter table content_fossology_license add constraint content_fossology_license_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table content_fossology_license validate constraint content_fossology_license_indexer_configuration_id_fkey; -- content_metadata create unique index concurrently content_metadata_pkey on content_metadata(id, indexer_configuration_id); alter table content_metadata add primary key using index content_metadata_pkey; alter table content_metadata add constraint content_metadata_id_fkey foreign key (id) references content(sha1) not valid; alter table content_metadata validate constraint content_metadata_id_fkey; alter table content_metadata add constraint content_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table content_metadata validate constraint content_metadata_indexer_configuration_id_fkey; -- revision_metadata create unique index concurrently revision_metadata_pkey on revision_metadata(id, indexer_configuration_id); alter table revision_metadata add primary key using index revision_metadata_pkey; alter table revision_metadata add constraint revision_metadata_id_fkey foreign key (id) references revision(id) not valid; alter table revision_metadata validate constraint revision_metadata_id_fkey; alter table revision_metadata add constraint revision_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table revision_metadata validate constraint revision_metadata_indexer_configuration_id_fkey; -- metadata_provider create unique index concurrently metadata_provider_pkey on metadata_provider(id); alter table metadata_provider add primary key using index metadata_provider_pkey; create index concurrently on metadata_provider(provider_name, provider_url); -- origin_metadata create unique index concurrently origin_metadata_pkey on origin_metadata(id); alter table origin_metadata add primary key using index origin_metadata_pkey; create index concurrently on origin_metadata(origin_id, provider_id, tool_id); alter table origin_metadata add constraint origin_metadata_origin_fkey foreign key (origin_id) references origin(id) not valid; alter table origin_metadata validate constraint origin_metadata_origin_fkey; alter table origin_metadata add constraint origin_metadata_provider_fkey foreign key (provider_id) references metadata_provider(id) not valid; alter table origin_metadata validate constraint origin_metadata_provider_fkey; alter table origin_metadata add constraint origin_metadata_tool_fkey foreign key (tool_id) references indexer_configuration(id) not valid; alter table origin_metadata validate constraint origin_metadata_tool_fkey; -- origin_metadata_translation create unique index concurrently origin_metadata_translation_pkey on origin_metadata_translation(id, indexer_configuration_id); alter table origin_metadata_translation add primary key using index origin_metadata_translation_pkey; alter table origin_metadata_translation add constraint origin_metadata_indexer_configuration_id_fkey foreign key (indexer_configuration_id) references indexer_configuration(id) not valid; alter table origin_metadata_translation validate constraint origin_metadata_indexer_configuration_id_fkey; -- object_counts create unique index concurrently object_counts_pkey on object_counts(object_type); alter table object_counts add primary key using index object_counts_pkey; diff --git a/sql/swh-schema.sql b/sql/swh-schema.sql index 10ab22d6..ace56e2c 100644 --- a/sql/swh-schema.sql +++ b/sql/swh-schema.sql @@ -1,513 +1,513 @@ --- --- Software Heritage Data Model --- -- drop schema if exists swh cascade; -- create schema swh; -- set search_path to swh; create table dbversion ( version int primary key, release timestamptz, description text ); insert into dbversion(version, release, description) - values(111, now(), 'Work In Progress'); + values(112, now(), 'Work In Progress'); -- a SHA1 checksum (not necessarily originating from Git) create domain sha1 as bytea check (length(value) = 20); -- a Git object ID, i.e., a SHA1 checksum create domain sha1_git as bytea check (length(value) = 20); -- a SHA256 checksum create domain sha256 as bytea check (length(value) = 32); -- a blake2 checksum create domain blake2s256 as bytea check (length(value) = 32); -- UNIX path (absolute, relative, individual path component, etc.) create domain unix_path as bytea; -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod create domain file_perms as int; -- Checksums about actual file content. Note that the content itself is not -- stored in the DB, but on external (key-value) storage. A single checksum is -- used as key there, but the other can be used to verify that we do not inject -- content collisions not knowingly. create table content ( sha1 sha1 not null, sha1_git sha1_git not null, sha256 sha256 not null, blake2s256 blake2s256, length bigint not null, ctime timestamptz not null default now(), -- creation time, i.e. time of (first) injection into the storage status content_status not null default 'visible', object_id bigserial ); -- Entities constitute a typed hierarchy of organization, hosting -- facilities, groups, people and software projects. -- -- Examples of entities: Software Heritage, Debian, GNU, GitHub, -- Apache, The Linux Foundation, the Debian Python Modules Team, the -- torvalds GitHub user, the torvalds/linux GitHub project. -- -- The data model is hierarchical (via the parent attribute) and might -- store sub-branches of existing entities. The key feature of an -- entity is might be *listed* (if it is available in listable_entity) -- to retrieve information about its content, i.e: sub-entities, -- projects, origins. -- The history of entities. Allows us to keep historical metadata -- about entities. The temporal invariant is the uuid. Root -- organization uuids are manually generated (and available in -- swh-data.sql). -- -- For generated entities (generated = true), we can provide -- generation_metadata to allow listers to retrieve the uuids of previous -- iterations of the entity. -- -- Inactive entities that have been active in the past (active = -- false) should register the timestamp at which we saw them -- deactivate, in a new entry of entity_history. create table entity_history ( id bigserial not null, uuid uuid, parent uuid, -- should reference entity_history(uuid) name text not null, type entity_type not null, description text, homepage text, active boolean not null, -- whether the entity was seen on the last listing generated boolean not null, -- whether this entity has been generated by a lister lister_metadata jsonb, -- lister-specific metadata, used for queries metadata jsonb, validity timestamptz[] -- timestamps at which we have seen this entity ); -- The entity table provides a view of the latest information on a -- given entity. It is updated via a trigger on entity_history. create table entity ( uuid uuid not null, parent uuid, name text not null, type entity_type not null, description text, homepage text, active boolean not null, -- whether the entity was seen on the last listing generated boolean not null, -- whether this entity has been generated by a lister lister_metadata jsonb, -- lister-specific metadata, used for queries metadata jsonb, last_seen timestamptz, -- last listing time or disappearance time for active=false last_id bigint -- last listing id ); -- Register the equivalence between two entities. Allows sideways -- navigation in the entity table create table entity_equivalence ( entity1 uuid, entity2 uuid ); -- Register a lister for a specific entity. create table listable_entity ( uuid uuid, enabled boolean not null default true, -- do we list this entity automatically? list_engine text, -- crawler to be used to list entity's content list_url text, -- root URL to start the listing list_params jsonb, -- org-specific listing parameter latest_list timestamptz -- last time the entity's content has been listed ); -- Log of all entity listings (i.e., entity crawling) that have been -- done in the past, or are still ongoing. create table list_history ( id bigserial not null, date timestamptz not null, status boolean, -- true if and only if the listing has been successful result jsonb, -- more detailed return value, depending on status stdout text, stderr text, duration interval, -- fetch duration of NULL if still ongoing entity uuid ); -- An origin is a place, identified by an URL, where software can be found. We -- support different kinds of origins, e.g., git and other VCS repositories, -- web pages that list tarballs URLs (e.g., http://www.kernel.org), indirect -- tarball URLs (e.g., http://www.example.org/latest.tar.gz), etc. The key -- feature of an origin is that it can be *fetched* (wget, git clone, svn -- checkout, etc.) to retrieve all the contained software. create table origin ( id bigserial not null, type text, -- TODO use an enum here (?) url text not null, lister uuid, project uuid ); -- Content we have seen but skipped for some reason. This table is -- separate from the content table as we might not have the sha1 -- checksum of that data (for instance when we inject git -- repositories, objects that are too big will be skipped here, and we -- will only know their sha1_git). 'reason' contains the reason the -- content was skipped. origin is a nullable column allowing to find -- out which origin contains that skipped content. create table skipped_content ( sha1 sha1, sha1_git sha1_git, sha256 sha256, blake2s256 blake2s256, length bigint not null, ctime timestamptz not null default now(), status content_status not null default 'absent', reason text not null, origin bigint, object_id bigserial ); -- Log of all origin fetches (i.e., origin crawling) that have been done in the -- past, or are still ongoing. Similar to list_history, but for origins. create table fetch_history ( id bigserial, origin bigint, date timestamptz not null, status boolean, -- true if and only if the fetch has been successful result jsonb, -- more detailed returned values, times, etc... stdout text, stderr text, -- null when status is true, filled otherwise duration interval -- fetch duration of NULL if still ongoing ); -- A file-system directory. A directory is a list of directory entries (see -- tables: directory_entry_{dir,file}). -- -- To list the contents of a directory: -- 1. list the contained directory_entry_dir using array dir_entries -- 2. list the contained directory_entry_file using array file_entries -- 3. list the contained directory_entry_rev using array rev_entries -- 4. UNION -- -- Synonyms/mappings: -- * git: tree create table directory ( id sha1_git, dir_entries bigint[], -- sub-directories, reference directory_entry_dir file_entries bigint[], -- contained files, reference directory_entry_file rev_entries bigint[], -- mounted revisions, reference directory_entry_rev object_id bigserial -- short object identifier ); -- A directory entry pointing to a sub-directory. create table directory_entry_dir ( id bigserial, target sha1_git, -- id of target directory name unix_path, -- path name, relative to containing dir perms file_perms -- unix-like permissions ); -- A directory entry pointing to a file. create table directory_entry_file ( id bigserial, target sha1_git, -- id of target file name unix_path, -- path name, relative to containing dir perms file_perms -- unix-like permissions ); -- A directory entry pointing to a revision. create table directory_entry_rev ( id bigserial, target sha1_git, -- id of target revision name unix_path, -- path name, relative to containing dir perms file_perms -- unix-like permissions ); create table person ( id bigserial, name bytea, -- advisory: not null if we managed to parse a name email bytea, -- advisory: not null if we managed to parse an email fullname bytea not null -- freeform specification; what is actually used in the checksums -- will usually be of the form 'name ' ); -- A snapshot of a software project at a specific point in time. -- -- Synonyms/mappings: -- * git / subversion / etc: commit -- * tarball: a specific tarball -- -- Revisions are organized as DAGs. Each revision points to 0, 1, or more (in -- case of merges) parent revisions. Each revision points to a directory, i.e., -- a file-system tree containing files and directories. create table revision ( id sha1_git, date timestamptz, date_offset smallint, committer_date timestamptz, committer_date_offset smallint, type revision_type not null, directory sha1_git, -- file-system tree message bytea, author bigint, committer bigint, synthetic boolean not null default false, -- true if synthetic (cf. swh-loader-tar) metadata jsonb, -- extra metadata (tarball checksums, extra commit information, etc...) object_id bigserial, date_neg_utc_offset boolean, committer_date_neg_utc_offset boolean ); -- either this table or the sha1_git[] column on the revision table create table revision_history ( id sha1_git, parent_id sha1_git, parent_rank int not null default 0 -- parent position in merge commits, 0-based ); -- The timestamps at which Software Heritage has made a visit of the given origin. create table origin_visit ( origin bigint not null, visit bigint not null, date timestamptz not null, status origin_visit_status not null, metadata jsonb ); comment on column origin_visit.origin is 'Visited origin'; comment on column origin_visit.visit is 'Visit number the visit occurred for that origin'; comment on column origin_visit.date is 'Visit date for that origin'; comment on column origin_visit.status is 'Visit status for that origin'; comment on column origin_visit.metadata is 'Metadata associated with the visit'; -- The content of software origins is indexed starting from top-level pointers -- called "branches". Every time we fetch some origin we store in this table -- where the branches pointed to at fetch time. -- -- Synonyms/mappings: -- * git: ref (in the "git update-ref" sense) create table occurrence_history ( origin bigint not null, branch bytea not null, -- e.g., b"master" (for VCS), or b"sid" (for Debian) target sha1_git not null, -- ref target, e.g., commit id target_type object_type not null, -- ref target type visits bigint[] not null, -- the visits where that occurrence was valid. References -- origin_visit(visit), where o_h.origin = origin_visit.origin. object_id bigserial not null -- short object identifier ); -- Materialized view of occurrence_history, storing the *current* value of each -- branch, as last seen by SWH. create table occurrence ( origin bigint, branch bytea not null, target sha1_git not null, target_type object_type not null ); -- A "memorable" point in the development history of a project. -- -- Synonyms/mappings: -- * git: tag (of the annotated kind, otherwise they are just references) -- * tarball: the release version number create table release ( id sha1_git not null, target sha1_git, date timestamptz, date_offset smallint, name bytea, comment bytea, author bigint, synthetic boolean not null default false, -- true if synthetic (cf. swh-loader-tar) object_id bigserial, target_type object_type not null, date_neg_utc_offset boolean ); -- Computing metadata on sha1's contents create table indexer_configuration ( id serial not null, tool_name text not null, tool_version text not null, tool_configuration jsonb ); comment on table indexer_configuration is 'Indexer''s configuration version'; comment on column indexer_configuration.id is 'Tool identifier'; comment on column indexer_configuration.tool_version is 'Tool name'; comment on column indexer_configuration.tool_version is 'Tool version'; comment on column indexer_configuration.tool_configuration is 'Tool configuration: command line, flags, etc...'; -- Properties (mimetype, encoding, etc...) create table content_mimetype ( id sha1 not null, mimetype bytea not null, encoding bytea not null, indexer_configuration_id bigint not null ); comment on table content_mimetype is 'Metadata associated to a raw content'; comment on column content_mimetype.mimetype is 'Raw content Mimetype'; comment on column content_mimetype.encoding is 'Raw content encoding'; comment on column content_mimetype.indexer_configuration_id is 'Tool used to compute the information'; -- Language metadata create table content_language ( id sha1 not null, lang languages not null, indexer_configuration_id bigint not null ); comment on table content_language is 'Language information on a raw content'; comment on column content_language.lang is 'Language information'; comment on column content_language.indexer_configuration_id is 'Tool used to compute the information'; -- ctags information per content create table content_ctags ( id sha1 not null, name text not null, kind text not null, line bigint not null, lang ctags_languages not null, indexer_configuration_id bigint not null ); comment on table content_ctags is 'Ctags information on a raw content'; comment on column content_ctags.id is 'Content identifier'; comment on column content_ctags.name is 'Symbol name'; comment on column content_ctags.kind is 'Symbol kind (function, class, variable, const...)'; comment on column content_ctags.line is 'Symbol line'; comment on column content_ctags.lang is 'Language information for that content'; comment on column content_ctags.indexer_configuration_id is 'Tool used to compute the information'; create table fossology_license( id smallserial, name text not null ); comment on table fossology_license is 'Possible license recognized by license indexer'; comment on column fossology_license.id is 'License identifier'; comment on column fossology_license.name is 'License name'; create table content_fossology_license ( id sha1 not null, license_id smallserial not null, indexer_configuration_id bigint not null ); comment on table content_fossology_license is 'license associated to a raw content'; comment on column content_fossology_license.id is 'Raw content identifier'; comment on column content_fossology_license.license_id is 'One of the content''s license identifier'; comment on column content_fossology_license.indexer_configuration_id is 'Tool used to compute the information'; -- The table content_metadata provides a translation to files -- identified as potentially containning metadata with a translation tool (indexer_configuration_id) create table content_metadata( id sha1 not null, translated_metadata jsonb not null, indexer_configuration_id bigint not null ); comment on table content_metadata is 'metadata semantically translated from a content file'; comment on column content_metadata.id is 'sha1 of content file'; comment on column content_metadata.translated_metadata is 'result of translation with defined format'; comment on column content_metadata.indexer_configuration_id is 'tool used for translation'; -- The table revision_metadata provides a minimal set of intrinsic metadata -- detected with the detection tool (indexer_configuration_id) and aggregated -- from the content_metadata translation. create table revision_metadata( id sha1_git not null, translated_metadata jsonb not null, indexer_configuration_id bigint not null ); comment on table revision_metadata is 'metadata semantically detected and translated in a revision'; comment on column revision_metadata.id is 'sha1_git of revision'; comment on column revision_metadata.translated_metadata is 'result of detection and translation with defined format'; comment on column revision_metadata.indexer_configuration_id is 'tool used for detection'; create table metadata_provider ( id serial not null, provider_name text not null, provider_type text not null, provider_url text, metadata jsonb ); comment on table metadata_provider is 'Metadata provider information'; comment on column metadata_provider.id is 'Provider''s identifier'; comment on column metadata_provider.provider_name is 'Provider''s name'; comment on column metadata_provider.provider_url is 'Provider''s url'; comment on column metadata_provider.metadata is 'Other metadata about provider'; -- Discovery of metadata during a listing, loading, deposit or external_catalog of an origin -- also provides a translation to a defined json schema using a translation tool (indexer_configuration_id) create table origin_metadata( id bigserial not null, -- PK object identifier origin_id bigint not null, -- references origin(id) discovery_date timestamptz not null, -- when it was extracted provider_id bigint not null, -- ex: 'hal', 'lister-github', 'loader-github' tool_id bigint not null, metadata jsonb not null ); comment on table origin_metadata is 'keeps all metadata found concerning an origin'; comment on column origin_metadata.id is 'the origin_metadata object''s id'; comment on column origin_metadata.origin_id is 'the origin id for which the metadata was found'; comment on column origin_metadata.discovery_date is 'the date of retrieval'; comment on column origin_metadata.provider_id is 'the metadata provider: github, openhub, deposit, etc.'; comment on column origin_metadata.tool_id is 'the tool used for extracting metadata: lister-github, etc.'; comment on column origin_metadata.metadata is 'metadata in json format but with original terms'; create table origin_metadata_translation( id bigserial not null, -- PK origin_metadata identifier result jsonb, indexer_configuration_id bigint ); comment on table origin_metadata_translation is 'keeps translated for an origin_metadata entry'; comment on column origin_metadata_translation.id is 'the entry id in origin_metadata'; comment on column origin_metadata_translation.result is 'translated_metadata result after translation with tool'; comment on column origin_metadata_translation.indexer_configuration_id is 'tool used for translation'; -- Keep a cache of object counts create table object_counts ( object_type text, value bigint, last_update timestamptz ); diff --git a/sql/upgrades/112.sql b/sql/upgrades/112.sql new file mode 100644 index 00000000..057117b5 --- /dev/null +++ b/sql/upgrades/112.sql @@ -0,0 +1,50 @@ +-- SWH DB schema upgrade +-- from_version: 111 +-- to_version: 112 +-- description: make unique indexes unique and vice versa. + +insert into dbversion(version, release, description) + values(112, now(), 'Work In Progress'); + +ALTER INDEX content_object_id_idx rename to content_object_id_idx_2; + +ALTER INDEX directory_object_id_idx rename to directory_object_id_idx_2; + +ALTER INDEX release_object_id_idx rename to release_object_id_idx_2; + +ALTER INDEX revision_object_id_idx rename to revision_object_id_idx_2; + +ALTER INDEX skipped_content_object_id_idx rename to skipped_content_object_id_idx_2; + +ALTER INDEX skipped_content_sha1_git_idx rename to skipped_content_sha1_git_idx_2; + +ALTER INDEX skipped_content_sha1_idx rename to skipped_content_sha1_idx_2; + +CREATE UNIQUE INDEX content_object_id_idx ON content USING btree (object_id); + +CREATE UNIQUE INDEX directory_object_id_idx ON directory USING btree (object_id); + +CREATE UNIQUE INDEX release_object_id_idx ON "release" USING btree (object_id); + +CREATE UNIQUE INDEX revision_object_id_idx ON revision USING btree (object_id); + +CREATE UNIQUE INDEX skipped_content_object_id_idx ON skipped_content USING btree (object_id); + +CREATE INDEX skipped_content_sha1_git_idx ON skipped_content USING btree (sha1_git); + +CREATE INDEX skipped_content_sha1_idx ON skipped_content USING btree (sha1); + +DROP INDEX content_object_id_idx_2; + +DROP INDEX directory_object_id_idx_2; + +DROP INDEX release_object_id_idx_2; + +DROP INDEX revision_object_id_idx_2; + +DROP INDEX skipped_content_object_id_idx_2; + +DROP INDEX skipped_content_sha1_git_idx_2; + +DROP INDEX skipped_content_sha1_idx_2; +