Changeset View
Standalone View
swh/storage/sql/30-swh-schema.sql
--- | --- | ||||
--- SQL implementation of the Software Heritage data model | --- SQL implementation of the Software Heritage data model | ||||
--- | --- | ||||
-- schema versions | -- schema versions | ||||
create table dbversion | create table dbversion | ||||
( | ( | ||||
version int primary key, | version int primary key, | ||||
release timestamptz, | release timestamptz, | ||||
description text | description text | ||||
); | ); | ||||
comment on table dbversion is 'Details of current db version'; | |||||
comment on column dbversion.version is 'Sql schema version'; | |||||
vlorentz: not release, but deployment | |||||
comment on column dbversion.release is 'Version release date and time'; | |||||
comment on column dbversion.description is 'Release description'; | |||||
-- latest schema version | -- latest schema version | ||||
insert into dbversion(version, release, description) | insert into dbversion(version, release, description) | ||||
values(133, now(), 'Work In Progress'); | values(137, now(), 'Work In Progress'); | ||||
-- a SHA1 checksum | -- a SHA1 checksum | ||||
create domain sha1 as bytea check (length(value) = 20); | create domain sha1 as bytea check (length(value) = 20); | ||||
-- a Git object ID, i.e., a Git-style salted SHA1 checksum | -- a Git object ID, i.e., a Git-style salted SHA1 checksum | ||||
create domain sha1_git as bytea check (length(value) = 20); | create domain sha1_git as bytea check (length(value) = 20); | ||||
-- a SHA256 checksum | -- a SHA256 checksum | ||||
Show All 20 Lines | ( | ||||
sha256 sha256 not null, | sha256 sha256 not null, | ||||
blake2s256 blake2s256, | blake2s256 blake2s256, | ||||
length bigint not null, | length bigint not null, | ||||
ctime timestamptz not null default now(), | ctime timestamptz not null default now(), | ||||
-- creation time, i.e. time of (first) injection into the storage | -- creation time, i.e. time of (first) injection into the storage | ||||
status content_status not null default 'visible', | status content_status not null default 'visible', | ||||
object_id bigserial | object_id bigserial | ||||
); | ); | ||||
comment on table content is 'Checksums of file content which is actually stored externally'; | |||||
comment on column content.sha1 is 'Content sha1 hash'; | |||||
comment on column content.sha1_git is 'Git object sha1 hash'; | |||||
comment on column content.sha256 is 'Content Sha256 hash'; | |||||
comment on column content.blake2s256 is 'Content blake2s hash'; | |||||
Done Inline ActionsNot "file creation", but the first time we saw that content. vlorentz: Not "file creation", but the first time we saw that content. | |||||
comment on column content.length is 'Content length'; | |||||
comment on column content.ctime is 'First seen time'; | |||||
Done Inline ActionsIt's not an identifier of the "file content", just of the content vlorentz: It's not an identifier of the "file content", just of the content | |||||
Done Inline Actionsthe style of these could be improved to be less awkward, e.g., "length of file content" -> "file content length" also, please check our glossary, as it will give hints of how to make this both shorter and clearer, e.g., it's not "file content", it's just "content" zack: the style of these could be improved to be less awkward, e.g., "length of file content" ->… | |||||
Done Inline ActionsI prefer using active voice for comments. But the comments will become repetitive like,
twitu: I prefer using active voice for comments. But the comments will become repetitive like… | |||||
comment on column content.status is 'Content status (absent, visible, hidden)'; | |||||
comment on column content.object_id is 'Content identifier'; | |||||
-- An origin is a place, identified by an URL, where software source code | -- An origin is a place, identified by an URL, where software source code | ||||
-- artifacts can be found. We support different kinds of origins, e.g., git and | -- artifacts can be found. We support different kinds of origins, e.g., git and | ||||
-- other VCS repositories, web pages that list tarballs URLs (e.g., | -- other VCS repositories, web pages that list tarballs URLs (e.g., | ||||
-- http://www.kernel.org), indirect tarball URLs (e.g., | -- http://www.kernel.org), indirect tarball URLs (e.g., | ||||
-- http://www.example.org/latest.tar.gz), etc. The key feature of an origin is | -- http://www.example.org/latest.tar.gz), etc. The key feature of an origin is | ||||
-- that it can be *fetched* from (wget, git clone, svn checkout, etc.) to | -- that it can be *fetched* from (wget, git clone, svn checkout, etc.) to | ||||
-- retrieve all the contained software. | -- retrieve all the contained software. | ||||
create table origin | create table origin | ||||
( | ( | ||||
id bigserial not null, | id bigserial not null, | ||||
type text, -- TODO use an enum here (?) | type text, -- TODO use an enum here (?) | ||||
url text not null | url text not null | ||||
); | ); | ||||
comment on column origin.id is 'Artifact origin id'; | |||||
comment on column origin.type is 'Type of origin'; | |||||
Done Inline ActionsI don't think comments on these columns are relevant. Maybe just one of the type. vlorentz: I don't think comments on these columns are relevant. Maybe just one of the type. | |||||
Done Inline ActionsNo point deleting them now. twitu: No point deleting them now. | |||||
Done Inline ActionsURL (uppercase) zack: URL (uppercase) | |||||
comment on column origin.url is 'Url of origin'; | |||||
-- Content blobs observed somewhere, but not ingested into the archive for | -- Content blobs observed somewhere, but not ingested into the archive for | ||||
-- whatever reason. This table is separate from the content table as we might | -- whatever reason. This table is separate from the content table as we might | ||||
-- not have the sha1 checksum of skipped contents (for instance when we inject | -- not have the sha1 checksum of skipped contents (for instance when we inject | ||||
-- git repositories, objects that are too big will be skipped here, and we will | -- git repositories, objects that are too big will be skipped here, and we will | ||||
-- only know their sha1_git). 'reason' contains the reason the content was | -- only know their sha1_git). 'reason' contains the reason the content was | ||||
-- skipped. origin is a nullable column allowing to find out which origin | -- skipped. origin is a nullable column allowing to find out which origin | ||||
-- contains that skipped content. | -- contains that skipped content. | ||||
create table skipped_content | create table skipped_content | ||||
( | ( | ||||
sha1 sha1, | sha1 sha1, | ||||
sha1_git sha1_git, | sha1_git sha1_git, | ||||
sha256 sha256, | sha256 sha256, | ||||
blake2s256 blake2s256, | blake2s256 blake2s256, | ||||
length bigint not null, | length bigint not null, | ||||
ctime timestamptz not null default now(), | ctime timestamptz not null default now(), | ||||
status content_status not null default 'absent', | status content_status not null default 'absent', | ||||
reason text not null, | reason text not null, | ||||
origin bigint, | origin bigint, | ||||
object_id bigserial | object_id bigserial | ||||
); | ); | ||||
comment on table skipped_content is 'Content blobs observed but not ingested in | |||||
Done Inline ActionsWe can leave "software heritage" implicit, given the context. How about "Content blobs observed, but not ingested in the archive" here? zack: We can leave "software heritage" implicit, given the context. How about "Content blobs observed… | |||||
Software heritage archive'; | |||||
comment on column skipped_content.sha1 is 'Skipped content sha1 hash'; | |||||
comment on column skipped_content.sha1_git is 'Git object sha1 hash'; | |||||
comment on column skipped_content.sha256 is 'Skipped content sha256 hash'; | |||||
Done Inline ActionsSame vlorentz: Same | |||||
comment on column skipped_content.blake2s256 is 'Skipped content blake2s hash'; | |||||
comment on column skipped_content.length is 'Skipped content length'; | |||||
comment on column skipped_content.ctime is 'First seen time'; | |||||
comment on column skipped_content.status is 'Skipped content status (absent, visible, hidden)'; | |||||
comment on column skipped_content.reason is 'Reason for skipping'; | |||||
comment on column skipped_content.origin is 'Origin table identifier'; | |||||
comment on column skipped_content.object_id is 'Skipped content identifier'; | |||||
-- Log of all origin fetches (i.e., origin crawling) that have been done in the | -- Log of all origin fetches (i.e., origin crawling) that have been done in the | ||||
-- past, or are still ongoing. Similar to list_history, but for origins. | -- past, or are still ongoing. Similar to list_history, but for origins. | ||||
create table fetch_history | create table fetch_history | ||||
( | ( | ||||
id bigserial, | id bigserial, | ||||
origin bigint, | origin bigint, | ||||
date timestamptz not null, | date timestamptz not null, | ||||
status boolean, -- true if and only if the fetch has been successful | status boolean, -- true if and only if the fetch has been successful | ||||
result jsonb, -- more detailed returned values, times, etc... | result jsonb, -- more detailed returned values, times, etc... | ||||
stdout text, | stdout text, | ||||
stderr text, -- null when status is true, filled otherwise | stderr text, -- null when status is true, filled otherwise | ||||
duration interval -- fetch duration of NULL if still ongoing | duration interval -- fetch duration of NULL if still ongoing | ||||
); | ); | ||||
comment on table fetch_history is 'Log of all origin fetches'; | |||||
comment on column fetch_history.id is 'Identifier for fetch history'; | |||||
comment on column fetch_history.origin is 'Origin table identifier'; | |||||
comment on column fetch_history.date is 'Fetch start time'; | |||||
Done Inline ActionsThis comment should go into more details vlorentz: This comment should go into more details | |||||
comment on column fetch_history.status is 'True indicates successful fetch'; | |||||
comment on column fetch_history.result is 'Detailed return values, times etc'; | |||||
Done Inline Actionsthese are incorrect, you should just refer to the general notions of "standard output / standard error", the intended reader will know what they mean zack: these are incorrect, you should just refer to the general notions of "standard output /… | |||||
comment on column fetch_history.stdout is 'Fetch operation output'; | |||||
Done Inline ActionsI don't think my previous comment on these fields have actually been addressed. My proposal is something like "standard output of the fetch operation" / "standard error of the fetch operation". zack: I don't think my previous comment on these fields have actually been addressed. My proposal is… | |||||
comment on column fetch_history.stderr is 'Fetch operation error'; | |||||
comment on column fetch_history.duration is 'Time taken to complete fetch, NULL if ongoing'; | |||||
-- A file-system directory. A directory is a list of directory entries (see | -- A file-system directory. A directory is a list of directory entries (see | ||||
-- tables: directory_entry_{dir,file}). | -- tables: directory_entry_{dir,file}). | ||||
-- | -- | ||||
-- To list the contents of a directory: | -- To list the contents of a directory: | ||||
-- 1. list the contained directory_entry_dir using array dir_entries | -- 1. list the contained directory_entry_dir using array dir_entries | ||||
-- 2. list the contained directory_entry_file using array file_entries | -- 2. list the contained directory_entry_file using array file_entries | ||||
-- 3. list the contained directory_entry_rev using array rev_entries | -- 3. list the contained directory_entry_rev using array rev_entries | ||||
-- 4. UNION | -- 4. UNION | ||||
-- | -- | ||||
-- Synonyms/mappings: | -- Synonyms/mappings: | ||||
-- * git: tree | -- * git: tree | ||||
create table directory | create table directory | ||||
( | ( | ||||
id sha1_git not null, | id sha1_git not null, | ||||
dir_entries bigint[], -- sub-directories, reference directory_entry_dir | dir_entries bigint[], -- sub-directories, reference directory_entry_dir | ||||
file_entries bigint[], -- contained files, reference directory_entry_file | file_entries bigint[], -- contained files, reference directory_entry_file | ||||
rev_entries bigint[], -- mounted revisions, reference directory_entry_rev | rev_entries bigint[], -- mounted revisions, reference directory_entry_rev | ||||
object_id bigserial -- short object identifier | object_id bigserial -- short object identifier | ||||
); | ); | ||||
comment on table directory is 'Contents of a directory, synonymous to tree (git)'; | |||||
comment on column directory.id is 'Git object sha1 hash'; | |||||
comment on column directory.dir_entries is 'Sub-directories, reference directory_entry_dir'; | |||||
comment on column directory.file_entries is 'Contained files, reference directory_entry_file'; | |||||
comment on column directory.rev_entries is 'Mounted revisions, reference directory_entry_rev'; | |||||
comment on column directory.object_id is 'Short object identifier'; | |||||
-- A directory entry pointing to a (sub-)directory. | -- A directory entry pointing to a (sub-)directory. | ||||
create table directory_entry_dir | create table directory_entry_dir | ||||
( | ( | ||||
id bigserial, | id bigserial, | ||||
target sha1_git, -- id of target directory | target sha1_git, -- id of target directory | ||||
name unix_path, -- path name, relative to containing dir | name unix_path, -- path name, relative to containing dir | ||||
perms file_perms -- unix-like permissions | perms file_perms -- unix-like permissions | ||||
); | ); | ||||
comment on table directory_entry_dir is 'Directory entry for directory'; | |||||
comment on column directory_entry_dir.id is 'Directory identifier'; | |||||
comment on column directory_entry_dir.target is 'Target directory identifier'; | |||||
comment on column directory_entry_dir.name is 'Path name, relative to containing directory'; | |||||
comment on column directory_entry_dir.perms is 'Unix-like permissions'; | |||||
-- A directory entry pointing to a file content. | -- A directory entry pointing to a file content. | ||||
create table directory_entry_file | create table directory_entry_file | ||||
( | ( | ||||
id bigserial, | id bigserial, | ||||
target sha1_git, -- id of target file | target sha1_git, -- id of target file | ||||
name unix_path, -- path name, relative to containing dir | name unix_path, -- path name, relative to containing dir | ||||
perms file_perms -- unix-like permissions | perms file_perms -- unix-like permissions | ||||
); | ); | ||||
comment on table directory_entry_file is 'Directory entry for file'; | |||||
comment on column directory_entry_file.id is 'File identifier'; | |||||
comment on column directory_entry_file.target is 'Target file identifier'; | |||||
comment on column directory_entry_file.name is 'Path name, relative to containing directory'; | |||||
comment on column directory_entry_file.perms is 'Unix-like permissions'; | |||||
-- A directory entry pointing to a revision. | -- A directory entry pointing to a revision. | ||||
create table directory_entry_rev | create table directory_entry_rev | ||||
( | ( | ||||
id bigserial, | id bigserial, | ||||
target sha1_git, -- id of target revision | target sha1_git, -- id of target revision | ||||
name unix_path, -- path name, relative to containing dir | name unix_path, -- path name, relative to containing dir | ||||
perms file_perms -- unix-like permissions | perms file_perms -- unix-like permissions | ||||
); | ); | ||||
comment on table directory_entry_rev is 'Directory entry for revision'; | |||||
comment on column directory_entry_dir.id is 'Revision identifier'; | |||||
comment on column directory_entry_dir.target is 'Target revision in identifier'; | |||||
comment on column directory_entry_dir.name is 'Path name, relative to containing directory'; | |||||
comment on column directory_entry_dir.perms is 'Unix-like permissions'; | |||||
-- A person referenced by some source code artifacts, e.g., a VCS revision or | -- A person referenced by some source code artifacts, e.g., a VCS revision or | ||||
-- release metadata. | -- release metadata. | ||||
create table person | create table person | ||||
( | ( | ||||
id bigserial, | id bigserial, | ||||
name bytea, -- advisory: not null if we managed to parse a name | name bytea, -- advisory: not null if we managed to parse a name | ||||
email bytea, -- advisory: not null if we managed to parse an email | email bytea, -- advisory: not null if we managed to parse an email | ||||
fullname bytea not null -- freeform specification; what is actually used in the checksums | fullname bytea not null -- freeform specification; what is actually used in the checksums | ||||
-- will usually be of the form 'name <email>' | -- will usually be of the form 'name <email>' | ||||
); | ); | ||||
comment on table person is 'Person referenced in code artifact release metadata'; | |||||
comment on column person.id is 'Person identifier'; | |||||
comment on column person.name is 'Name'; | |||||
comment on column person.email is 'Email'; | |||||
comment on column person.fullname is 'Full name'; | |||||
-- The state of a source code tree at a specific point in time. | -- The state of a source code tree at a specific point in time. | ||||
-- | -- | ||||
-- Synonyms/mappings: | -- Synonyms/mappings: | ||||
-- * git / subversion / etc: commit | -- * git / subversion / etc: commit | ||||
-- * tarball: a specific tarball | -- * tarball: a specific tarball | ||||
-- | -- | ||||
-- Revisions are organized as DAGs. Each revision points to 0, 1, or more (in | -- Revisions are organized as DAGs. Each revision points to 0, 1, or more (in | ||||
-- case of merges) parent revisions. Each revision points to a directory, i.e., | -- case of merges) parent revisions. Each revision points to a directory, i.e., | ||||
-- a file-system tree containing files and directories. | -- a file-system tree containing files and directories. | ||||
create table revision | create table revision | ||||
( | ( | ||||
id sha1_git not null, | id sha1_git not null, | ||||
date timestamptz, | date timestamptz, | ||||
date_offset smallint, | date_offset smallint, | ||||
committer_date timestamptz, | committer_date timestamptz, | ||||
committer_date_offset smallint, | committer_date_offset smallint, | ||||
type revision_type not null, | type revision_type not null, | ||||
directory sha1_git, -- source code "root" directory | directory sha1_git, -- source code 'root' directory | ||||
message bytea, | message bytea, | ||||
author bigint, | author bigint, | ||||
committer bigint, | committer bigint, | ||||
synthetic boolean not null default false, -- true iff revision has been created by Software Heritage | synthetic boolean not null default false, -- true iff revision has been created by Software Heritage | ||||
metadata jsonb, -- extra metadata (tarball checksums, extra commit information, etc...) | metadata jsonb, -- extra metadata (tarball checksums, extra commit information, etc...) | ||||
object_id bigserial, | object_id bigserial, | ||||
date_neg_utc_offset boolean, | date_neg_utc_offset boolean, | ||||
committer_date_neg_utc_offset boolean | committer_date_neg_utc_offset boolean | ||||
); | ); | ||||
comment on table revision is 'Revision represents the state of a source code tree at a | |||||
specific point in time'; | |||||
comment on column revision.id is 'Git id of sha1 checksum'; | |||||
comment on column revision.date is 'Timestamp when revision was authored'; | |||||
comment on column revision.date_offset is 'Authored timestamp offset from UTC'; | |||||
comment on column revision.committer_date is 'Timestamp when revision was committed'; | |||||
comment on column revision.committer_date_offset is 'Committed timestamp offset from UTC'; | |||||
comment on column revision.type is 'Possible revision types (''git'', ''tar'', ..)'; | |||||
comment on column revision.directory is 'Directory identifier'; | |||||
comment on column revision.message is 'Revision message'; | |||||
comment on column revision.author is 'Author identifier'; | |||||
comment on column revision.committer is 'Committer identifier'; | |||||
comment on column revision.synthetic is 'true iff revision has been created by Software Heritage'; | |||||
comment on column revision.metadata is 'extra metadata (tarball checksums, extra commit information, etc...)'; | |||||
comment on column revision.object_id is 'Object identifier'; | |||||
comment on column revision.date_neg_utc_offset is 'True indicates -0 UTC offset for author timestamp'; | |||||
comment on column revision.committer_date_neg_utc_offset is 'True indicates -0 UTC offset for committer timestamp'; | |||||
-- either this table or the sha1_git[] column on the revision table | -- either this table or the sha1_git[] column on the revision table | ||||
create table revision_history | create table revision_history | ||||
( | ( | ||||
id sha1_git not null, | id sha1_git not null, | ||||
parent_id sha1_git not null, | parent_id sha1_git not null, | ||||
parent_rank int not null default 0 | parent_rank int not null default 0 | ||||
-- parent position in merge commits, 0-based | -- parent position in merge commits, 0-based | ||||
); | ); | ||||
comment on table revision_history is 'Sequence of revision history with parent and position in history'; | |||||
comment on column revision_history.id is 'Revision history git object sha1 checksum'; | |||||
comment on column revision_history.parent_id is 'Parent revision git object identifier'; | |||||
comment on column revision_history.parent_rank is 'Parent position in merge commits, 0-based'; | |||||
-- Crawling history of software origins visited by Software Heritage. Each | -- Crawling history of software origins visited by Software Heritage. Each | ||||
-- visit is a 3-way mapping between a software origin, a timestamp, and a | -- visit is a 3-way mapping between a software origin, a timestamp, and a | ||||
-- snapshot object capturing the full-state of the origin at visit time. | -- snapshot object capturing the full-state of the origin at visit time. | ||||
create table origin_visit | create table origin_visit | ||||
( | ( | ||||
origin bigint not null, | origin bigint not null, | ||||
visit bigint not null, | visit bigint not null, | ||||
Show All 17 Lines | |||||
-- Software Heritage. This table is a simple mapping between (public) intrinsic | -- Software Heritage. This table is a simple mapping between (public) intrinsic | ||||
-- snapshot identifiers and (private) numeric sequential identifiers. | -- snapshot identifiers and (private) numeric sequential identifiers. | ||||
create table snapshot | create table snapshot | ||||
( | ( | ||||
object_id bigserial not null, -- PK internal object identifier | object_id bigserial not null, -- PK internal object identifier | ||||
id sha1_git not null -- snapshot intrinsic identifier | id sha1_git not null -- snapshot intrinsic identifier | ||||
); | ); | ||||
comment on table snapshot is 'State of a software origin as crawled by Software Heritage'; | |||||
comment on column snapshot.object_id is 'Internal object identifier'; | |||||
comment on column snapshot.id is 'Intrinsic snapshot identifier'; | |||||
-- Each snapshot associate "branch" names to other objects in the Software | -- Each snapshot associate "branch" names to other objects in the Software | ||||
-- Heritage Merkle DAG. This table describes branches as mappings between names | -- Heritage Merkle DAG. This table describes branches as mappings between names | ||||
-- and target typed objects. | -- and target typed objects. | ||||
create table snapshot_branch | create table snapshot_branch | ||||
( | ( | ||||
object_id bigserial not null, -- PK internal object identifier | object_id bigserial not null, -- PK internal object identifier | ||||
name bytea not null, -- branch name, e.g., "master" or "feature/drag-n-drop" | name bytea not null, -- branch name, e.g., "master" or "feature/drag-n-drop" | ||||
target bytea, -- target object identifier, e.g., a revision identifier | target bytea, -- target object identifier, e.g., a revision identifier | ||||
target_type snapshot_target -- target object type, e.g., "revision" | target_type snapshot_target -- target object type, e.g., "revision" | ||||
); | ); | ||||
comment on table snapshot_branch is 'Associates branches with objects in Heritage Marke DAG'; | |||||
comment on column snapshot_branch.object_id is 'Internal object identifier'; | |||||
comment on column snapshot_branch.name is 'Branch name'; | |||||
comment on column snapshot_branch.target is 'Target object identifier'; | |||||
comment on column snapshot_branch.target_type is 'Target object type'; | |||||
-- Mapping between snapshots and their branches. | -- Mapping between snapshots and their branches. | ||||
create table snapshot_branches | create table snapshot_branches | ||||
( | ( | ||||
snapshot_id bigint not null, -- snapshot identifier, ref. snapshot.object_id | snapshot_id bigint not null, -- snapshot identifier, ref. snapshot.object_id | ||||
branch_id bigint not null -- branch identifier, ref. snapshot_branch.object_id | branch_id bigint not null -- branch identifier, ref. snapshot_branch.object_id | ||||
); | ); | ||||
comment on table snapshot_branches is 'Mapping between snapshot and their branches'; | |||||
comment on column snapshot_branches.snapshot_id is 'Snapshot identifier'; | |||||
comment on column snapshot_branches.branch_id is 'Branch identifier'; | |||||
-- A "memorable" point in time in the development history of a software | -- A "memorable" point in time in the development history of a software | ||||
-- project. | -- project. | ||||
-- | -- | ||||
-- Synonyms/mappings: | -- Synonyms/mappings: | ||||
-- * git: tag (of the annotated kind, otherwise they are just references) | -- * git: tag (of the annotated kind, otherwise they are just references) | ||||
-- * tarball: the release version number | -- * tarball: the release version number | ||||
create table release | create table release | ||||
( | ( | ||||
id sha1_git not null, | id sha1_git not null, | ||||
target sha1_git, | target sha1_git, | ||||
date timestamptz, | date timestamptz, | ||||
date_offset smallint, | date_offset smallint, | ||||
name bytea, | name bytea, | ||||
comment bytea, | comment bytea, | ||||
author bigint, | author bigint, | ||||
synthetic boolean not null default false, -- true iff release has been created by Software Heritage | synthetic boolean not null default false, -- true iff release has been created by Software Heritage | ||||
object_id bigserial, | object_id bigserial, | ||||
target_type object_type not null, | target_type object_type not null, | ||||
date_neg_utc_offset boolean | date_neg_utc_offset boolean | ||||
); | ); | ||||
comment on table release is "Details of a software release, synonymous with | |||||
a tag (git) or version number (tarball)"; | |||||
comment on column release.id is 'Release git identifier'; | |||||
comment on column release.target is 'Target git identifier'; | |||||
comment on column release.date is 'Release timestamp'; | |||||
comment on column release.date_offset is 'Timestamp offset from UTC'; | |||||
comment on column release.name is 'Name'; | |||||
comment on column release.comment is 'Comment'; | |||||
comment on column release.author is 'Author'; | |||||
comment on column release.synthetic is 'Indicates if created by Software Heritage'; | |||||
comment on column release.object_id is 'Object identifier'; | |||||
comment on column release.target_type is 'Object type ("content", "directory", ..)'; | |||||
comment on column release.date_neg_utc_offset is 'True indicates -0 UTC offset for release timestamp'; | |||||
-- Tools | -- Tools | ||||
create table tool | create table tool | ||||
( | ( | ||||
id serial not null, | id serial not null, | ||||
name text not null, | name text not null, | ||||
version text not null, | version text not null, | ||||
configuration jsonb | configuration jsonb | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | |||||
create table object_counts | create table object_counts | ||||
( | ( | ||||
object_type text, -- table for which we're counting objects (PK) | object_type text, -- table for which we're counting objects (PK) | ||||
value bigint, -- count of objects in the table | value bigint, -- count of objects in the table | ||||
last_update timestamptz, -- last update for the object count in this table | last_update timestamptz, -- last update for the object count in this table | ||||
single_update boolean -- whether we update this table standalone (true) or through bucketed counts (false) | single_update boolean -- whether we update this table standalone (true) or through bucketed counts (false) | ||||
); | ); | ||||
comment on table object_counts is 'Cache of object counts'; | |||||
comment on column object_counts.object_type is 'Object type ("content", "directory", ..)'; | |||||
comment on column object_counts.value is 'Count of objects in the table'; | |||||
comment on column object_counts.last_update is 'Last update for object count'; | |||||
comment on column object_counts.single_update is 'standalone (true) or bucketed counts (false)'; | |||||
create table object_counts_bucketed | create table object_counts_bucketed | ||||
( | ( | ||||
line serial not null, -- PK | line serial not null, -- PK | ||||
object_type text not null, -- table for which we're counting objects | object_type text not null, -- table for which we're counting objects | ||||
identifier text not null, -- identifier across which we're bucketing objects | identifier text not null, -- identifier across which we're bucketing objects | ||||
bucket_start bytea, -- lower bound (inclusive) for the bucket | bucket_start bytea, -- lower bound (inclusive) for the bucket | ||||
bucket_end bytea, -- upper bound (exclusive) for the bucket | bucket_end bytea, -- upper bound (exclusive) for the bucket | ||||
value bigint, -- count of objects in the bucket | value bigint, -- count of objects in the bucket | ||||
last_update timestamptz -- last update for the object count in this bucket | last_update timestamptz -- last update for the object count in this bucket | ||||
); | ); | ||||
comment on table object_counts_bucketed is 'Bucketed count for objects ordered by type'; | |||||
comment on column object_counts_bucketed.line is 'Auto incremented idenitfier value'; | |||||
comment on column object_counts_bucketed.object_type is 'Object type ("content", "directory", ..)'; | |||||
comment on column object_counts_bucketed.identifier is 'Common identifier for bucketed objects'; | |||||
comment on column object_counts_bucketed.bucket_start is 'Lower bound (inclusive) for the bucket'; | |||||
comment on column object_counts_bucketed.bucket_end is 'Upper bound (exclusive) for the bucket'; | |||||
comment on column object_counts_bucketed.value is 'Count of objects in the bucket'; | |||||
comment on column object_counts_bucketed.last_update is 'Last update for the object count in this bucket'; | |||||
not release, but deployment