Changeset View
Changeset View
Standalone View
Standalone View
sql/swh-schema.sql
--- | --- | ||||
--- SQL implementation of the Software Heritage data model | --- SQL implementation of the Software Heritage data model | ||||
--- | --- | ||||
-- schema versions | -- schema versions | ||||
create table dbversion | create table dbversion | ||||
( | ( | ||||
version int primary key, | version int primary key, | ||||
release timestamptz, | release timestamptz, | ||||
description text | description text | ||||
); | ); | ||||
-- latest schema version | -- latest schema version | ||||
insert into dbversion(version, release, description) | insert into dbversion(version, release, description) | ||||
values(126, now(), 'Work In Progress'); | values(127, now(), 'Work In Progress'); | ||||
-- a SHA1 checksum | -- a SHA1 checksum | ||||
create domain sha1 as bytea check (length(value) = 20); | create domain sha1 as bytea check (length(value) = 20); | ||||
-- a Git object ID, i.e., a Git-style salted SHA1 checksum | -- a Git object ID, i.e., a Git-style salted SHA1 checksum | ||||
create domain sha1_git as bytea check (length(value) = 20); | create domain sha1_git as bytea check (length(value) = 20); | ||||
-- a SHA256 checksum | -- a SHA256 checksum | ||||
▲ Show 20 Lines • Show All 193 Lines • ▼ Show 20 Lines | |||||
comment on column origin_visit.origin is 'Visited origin'; | comment on column origin_visit.origin is 'Visited origin'; | ||||
comment on column origin_visit.visit is 'Sequential visit number for the origin'; | comment on column origin_visit.visit is 'Sequential visit number for the origin'; | ||||
comment on column origin_visit.date is 'Visit timestamp'; | comment on column origin_visit.date is 'Visit timestamp'; | ||||
comment on column origin_visit.status is 'Visit result'; | comment on column origin_visit.status is 'Visit result'; | ||||
comment on column origin_visit.metadata is 'Origin metadata at visit time'; | comment on column origin_visit.metadata is 'Origin metadata at visit time'; | ||||
comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time'; | comment on column origin_visit.snapshot_id is 'Origin snapshot at visit time'; | ||||
-- BEGIN legacy section (T830) | |||||
-- The content of software origins is indexed starting from top-level pointers | |||||
-- called "branches". Every time we fetch some origin we store in this table | |||||
-- where the branches pointed to at fetch time. | |||||
-- | |||||
-- Synonyms/mappings: | |||||
-- * git: ref (in the "git update-ref" sense) | |||||
create table occurrence_history | |||||
( | |||||
origin bigint not null, | |||||
branch bytea not null, -- e.g., b"master" (for VCS), or b"sid" (for Debian) | |||||
target sha1_git not null, -- ref target, e.g., commit id | |||||
target_type object_type not null, -- ref target type | |||||
visits bigint[] not null, -- the visits where that occurrence was valid. References | |||||
-- origin_visit(visit), where o_h.origin = origin_visit.origin. | |||||
object_id bigserial not null, -- short object identifier | |||||
snapshot_branch_id bigint | |||||
); | |||||
-- END legacy section (T830) | |||||
-- A snapshot represents the entire state of a software origin as crawled by | -- A snapshot represents the entire state of a software origin as crawled by | ||||
-- Software Heritage. This table is a simple mapping between (public) intrinsic | -- Software Heritage. This table is a simple mapping between (public) intrinsic | ||||
-- snapshot identifiers and (private) numeric sequential identifiers. | -- snapshot identifiers and (private) numeric sequential identifiers. | ||||
create table snapshot | create table snapshot | ||||
( | ( | ||||
object_id bigserial not null, -- PK internal object identifier | object_id bigserial not null, -- PK internal object identifier | ||||
id sha1_git -- snapshot intrinsic identifier | id sha1_git -- snapshot intrinsic identifier | ||||
); | ); | ||||
▲ Show 20 Lines • Show All 114 Lines • Show Last 20 Lines |