Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/sql/30-swh-schema.sql
Show All 11 Lines | |||||
comment on table dbversion is 'Details of current db version'; | comment on table dbversion is 'Details of current db version'; | ||||
comment on column dbversion.version is 'SQL schema version'; | comment on column dbversion.version is 'SQL schema version'; | ||||
comment on column dbversion.release is 'Version deployment timestamp'; | comment on column dbversion.release is 'Version deployment timestamp'; | ||||
comment on column dbversion.description is 'Release description'; | comment on column dbversion.description is 'Release description'; | ||||
-- latest schema version | -- latest schema version | ||||
insert into dbversion(version, release, description) | insert into dbversion(version, release, description) | ||||
values(156, now(), 'Work In Progress'); | values(157, now(), 'Work In Progress'); | ||||
-- a SHA1 checksum | -- a SHA1 checksum | ||||
create domain sha1 as bytea check (length(value) = 20); | create domain sha1 as bytea check (length(value) = 20); | ||||
-- a Git object ID, i.e., a Git-style salted SHA1 checksum | -- a Git object ID, i.e., a Git-style salted SHA1 checksum | ||||
create domain sha1_git as bytea check (length(value) = 20); | create domain sha1_git as bytea check (length(value) = 20); | ||||
-- a SHA256 checksum | -- a SHA256 checksum | ||||
create domain sha256 as bytea check (length(value) = 32); | create domain sha256 as bytea check (length(value) = 32); | ||||
-- a blake2 checksum | -- a blake2 checksum | ||||
create domain blake2s256 as bytea check (length(value) = 32); | create domain blake2s256 as bytea check (length(value) = 32); | ||||
-- UNIX path (absolute, relative, individual path component, etc.) | -- UNIX path (absolute, relative, individual path component, etc.) | ||||
create domain unix_path as bytea; | create domain unix_path as bytea; | ||||
-- a set of UNIX-like access permissions, as manipulated by, e.g., chmod | -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod | ||||
create domain file_perms as int; | create domain file_perms as int; | ||||
-- an SWHID | |||||
create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); | |||||
-- Checksums about actual file content. Note that the content itself is not | -- Checksums about actual file content. Note that the content itself is not | ||||
-- stored in the DB, but on external (key-value) storage. A single checksum is | -- stored in the DB, but on external (key-value) storage. A single checksum is | ||||
-- used as key there, but the other can be used to verify that we do not inject | -- used as key there, but the other can be used to verify that we do not inject | ||||
-- content collisions not knowingly. | -- content collisions not knowingly. | ||||
create table content | create table content | ||||
( | ( | ||||
sha1 sha1 not null, | sha1 sha1 not null, | ||||
▲ Show 20 Lines • Show All 367 Lines • ▼ Show 20 Lines | |||||
comment on table metadata_authority is 'Metadata authority information'; | comment on table metadata_authority is 'Metadata authority information'; | ||||
comment on column metadata_authority.id is 'Internal identifier of the authority'; | comment on column metadata_authority.id is 'Internal identifier of the authority'; | ||||
comment on column metadata_authority.type is 'Type of authority (deposit/forge/registry)'; | comment on column metadata_authority.type is 'Type of authority (deposit/forge/registry)'; | ||||
comment on column metadata_authority.url is 'Authority''s uri'; | comment on column metadata_authority.url is 'Authority''s uri'; | ||||
comment on column metadata_authority.metadata is 'Other metadata about authority'; | comment on column metadata_authority.metadata is 'Other metadata about authority'; | ||||
-- Discovery of metadata during a listing, loading, deposit or external_catalog of an origin | -- Extrinsic metadata on a DAG objects and origins. | ||||
-- also provides a translation to a defined json schema using a translation tool (tool_id) | create table object_metadata | ||||
create table origin_metadata | ( | ||||
( | type text not null, | ||||
id bigserial not null, -- PK internal object identifier | id text not null, | ||||
origin_id bigint not null, -- references origin(id) | |||||
discovery_date timestamptz not null, -- when it was extracted | -- metadata source | ||||
authority_id bigint not null, | authority_id bigint not null, | ||||
fetcher_id bigint not null, | fetcher_id bigint not null, | ||||
format text not null default 'sword-v2-atom-codemeta-v2-in-json', | discovery_date timestamptz not null, | ||||
-- metadata itself | |||||
format text not null, | |||||
metadata bytea not null | metadata bytea not null | ||||
); | ); | ||||
comment on table origin_metadata is 'keeps all metadata found concerning an origin'; | comment on table object_metadata is 'keeps all metadata found concerning an object'; | ||||
douardda: shouldn't there also be a description for the type column?
What are the expected values for… | |||||
Done Inline ActionsI added the list of values. Yes, it's like an enum, but I didn't use one because it doesn't match any of the enums we currently have (all DAG objects plus origins, but not visits) vlorentz: I added the list of values. Yes, it's like an enum, but I didn't use one because it doesn't… | |||||
comment on column origin_metadata.id is 'the origin_metadata object''s id'; | comment on column object_metadata.type is 'the type of object (content/directory/revision/release/snapshot/origin) the metadata is on'; | ||||
comment on column origin_metadata.origin_id is 'the origin id for which the metadata was found'; | comment on column object_metadata.id is 'the SWHID or origin URL for which the metadata was found'; | ||||
comment on column origin_metadata.discovery_date is 'the date of retrieval'; | comment on column object_metadata.discovery_date is 'the date of retrieval'; | ||||
comment on column origin_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; | comment on column object_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; | ||||
comment on column origin_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; | comment on column object_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; | ||||
comment on column origin_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; | comment on column object_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; | ||||
comment on column origin_metadata.metadata is 'original metadata in opaque format'; | comment on column object_metadata.metadata is 'original metadata in opaque format'; | ||||
-- Keep a cache of object counts | -- Keep a cache of object counts | ||||
create table object_counts | create table object_counts | ||||
( | ( | ||||
object_type text, -- table for which we're counting objects (PK) | object_type text, -- table for which we're counting objects (PK) | ||||
value bigint, -- count of objects in the table | value bigint, -- count of objects in the table | ||||
last_update timestamptz, -- last update for the object count in this table | last_update timestamptz, -- last update for the object count in this table | ||||
Show All 31 Lines |
shouldn't there also be a description for the type column?
What are the expected values for this? I guess it's an enum-kind of field right?