Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/sql/30-swh-schema.sql
Show All 11 Lines | |||||
comment on table dbversion is 'Details of current db version'; | comment on table dbversion is 'Details of current db version'; | ||||
comment on column dbversion.version is 'SQL schema version'; | comment on column dbversion.version is 'SQL schema version'; | ||||
comment on column dbversion.release is 'Version deployment timestamp'; | comment on column dbversion.release is 'Version deployment timestamp'; | ||||
comment on column dbversion.description is 'Release description'; | comment on column dbversion.description is 'Release description'; | ||||
-- latest schema version | -- latest schema version | ||||
insert into dbversion(version, release, description) | insert into dbversion(version, release, description) | ||||
values(155, now(), 'Work In Progress'); | values(156, now(), 'Work In Progress'); | ||||
-- a SHA1 checksum | -- a SHA1 checksum | ||||
create domain sha1 as bytea check (length(value) = 20); | create domain sha1 as bytea check (length(value) = 20); | ||||
-- a Git object ID, i.e., a Git-style salted SHA1 checksum | -- a Git object ID, i.e., a Git-style salted SHA1 checksum | ||||
create domain sha1_git as bytea check (length(value) = 20); | create domain sha1_git as bytea check (length(value) = 20); | ||||
-- a SHA256 checksum | -- a SHA256 checksum | ||||
create domain sha256 as bytea check (length(value) = 32); | create domain sha256 as bytea check (length(value) = 32); | ||||
-- a blake2 checksum | -- a blake2 checksum | ||||
create domain blake2s256 as bytea check (length(value) = 32); | create domain blake2s256 as bytea check (length(value) = 32); | ||||
-- UNIX path (absolute, relative, individual path component, etc.) | -- UNIX path (absolute, relative, individual path component, etc.) | ||||
create domain unix_path as bytea; | create domain unix_path as bytea; | ||||
-- a set of UNIX-like access permissions, as manipulated by, e.g., chmod | -- a set of UNIX-like access permissions, as manipulated by, e.g., chmod | ||||
create domain file_perms as int; | create domain file_perms as int; | ||||
-- an SWHID | |||||
ardumont: I don't really know what's more sensible here, being more restrictive or not.
I would have… | |||||
Not Done Inline Actionsas an example from @anlambert's [1] ;) ardumont: as an example from @anlambert's [1] ;)
[1] https://github. | |||||
Done Inline Actions
No. We have no idea what the format of SWHID v2 will be. vlorentz: > [a-z]{3} because so far, it's only {snp, rev, dir, cnt, rel}.
No. We have no idea what the… | |||||
create domain swhid as text check (value ~ '^swh:[0-9]+:.*'); | |||||
-- Checksums about actual file content. Note that the content itself is not | -- Checksums about actual file content. Note that the content itself is not | ||||
-- stored in the DB, but on external (key-value) storage. A single checksum is | -- stored in the DB, but on external (key-value) storage. A single checksum is | ||||
-- used as key there, but the other can be used to verify that we do not inject | -- used as key there, but the other can be used to verify that we do not inject | ||||
-- content collisions not knowingly. | -- content collisions not knowingly. | ||||
create table content | create table content | ||||
( | ( | ||||
sha1 sha1 not null, | sha1 sha1 not null, | ||||
▲ Show 20 Lines • Show All 367 Lines • ▼ Show 20 Lines | |||||
comment on table metadata_authority is 'Metadata authority information'; | comment on table metadata_authority is 'Metadata authority information'; | ||||
comment on column metadata_authority.id is 'Internal identifier of the authority'; | comment on column metadata_authority.id is 'Internal identifier of the authority'; | ||||
comment on column metadata_authority.type is 'Type of authority (deposit/forge/registry)'; | comment on column metadata_authority.type is 'Type of authority (deposit/forge/registry)'; | ||||
comment on column metadata_authority.url is 'Authority''s uri'; | comment on column metadata_authority.url is 'Authority''s uri'; | ||||
comment on column metadata_authority.metadata is 'Other metadata about authority'; | comment on column metadata_authority.metadata is 'Other metadata about authority'; | ||||
-- Discovery of metadata during a listing, loading, deposit or external_catalog of an origin | -- Extrinsic metadata on a DAG objects and origins. | ||||
-- also provides a translation to a defined json schema using a translation tool (tool_id) | create table object_metadata | ||||
create table origin_metadata | ( | ||||
( | type text not null, | ||||
Not Done Inline Actionsheh, if we had the origin id as swhid, we would have been symmetric here :D ardumont: heh, if we had the origin id as swhid, we would have been symmetric here :D | |||||
id bigserial not null, -- PK internal object identifier | id text not null, | ||||
origin_id bigint not null, -- references origin(id) | |||||
discovery_date timestamptz not null, -- when it was extracted | -- metadata source | ||||
authority_id bigint not null, | authority_id bigint not null, | ||||
fetcher_id bigint not null, | fetcher_id bigint not null, | ||||
format text not null default 'sword-v2-atom-codemeta-v2-in-json', | discovery_date timestamptz not null, | ||||
metadata bytea not null | |||||
); | |||||
comment on table origin_metadata is 'keeps all metadata found concerning an origin'; | -- metadata itself | ||||
comment on column origin_metadata.id is 'the origin_metadata object''s id'; | format text not null, | ||||
comment on column origin_metadata.origin_id is 'the origin id for which the metadata was found'; | metadata bytea not null, | ||||
comment on column origin_metadata.discovery_date is 'the date of retrieval'; | |||||
comment on column origin_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; | -- context | ||||
comment on column origin_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; | origin text, | ||||
comment on column origin_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; | visit bigint, | ||||
comment on column origin_metadata.metadata is 'original metadata in opaque format'; | snapshot swhid, | ||||
release swhid, | |||||
revision swhid, | |||||
path bytea, | |||||
directory swhid | |||||
); | |||||
comment on table object_metadata is 'keeps all metadata found concerning an object'; | |||||
comment on column object_metadata.id is 'the SWHID or origin URL for which the metadata was found'; | |||||
comment on column object_metadata.discovery_date is 'the date of retrieval'; | |||||
comment on column object_metadata.authority_id is 'the metadata provider: github, openhub, deposit, etc.'; | |||||
comment on column object_metadata.fetcher_id is 'the tool used for extracting metadata: loaders, crawlers, etc.'; | |||||
comment on column object_metadata.format is 'name of the format of metadata, used by readers to interpret it.'; | |||||
comment on column object_metadata.metadata is 'original metadata in opaque format'; | |||||
-- Keep a cache of object counts | -- Keep a cache of object counts | ||||
create table object_counts | create table object_counts | ||||
( | ( | ||||
object_type text, -- table for which we're counting objects (PK) | object_type text, -- table for which we're counting objects (PK) | ||||
value bigint, -- count of objects in the table | value bigint, -- count of objects in the table | ||||
last_update timestamptz, -- last update for the object count in this table | last_update timestamptz, -- last update for the object count in this table | ||||
Show All 31 Lines |
I don't really know what's more sensible here, being more restrictive or not.
I would have made it less open at first, something like:
'^swh:[0-9]:[a-z]{3}:.*' ?
So that may make us think when we want to upgrade the swhid.
(Then again, i might be thinking backward here ¯\_(ツ)_/¯)