Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/schema.py
Show First 20 Lines • Show All 169 Lines • ▼ Show 20 Lines | CREATE TABLE IF NOT EXISTS origin ( | ||||
type text, | type text, | ||||
next_visit_id int, | next_visit_id int, | ||||
-- We need integer visit ids for compatibility with the pgsql | -- We need integer visit ids for compatibility with the pgsql | ||||
-- storage, so we're using lightweight transactions with this trick: | -- storage, so we're using lightweight transactions with this trick: | ||||
-- https://stackoverflow.com/a/29391877/539465 | -- https://stackoverflow.com/a/29391877/539465 | ||||
); | ); | ||||
CREATE TABLE IF NOT EXISTS tool_by_uuid ( | CREATE TABLE IF NOT EXISTS metadata_authority ( | ||||
id timeuuid PRIMARY KEY, | url text, | ||||
name ascii, | type ascii, | ||||
version ascii, | metadata text, | ||||
configuration blob, | PRIMARY KEY ((url), type) | ||||
); | ); | ||||
CREATE TABLE IF NOT EXISTS tool ( | CREATE TABLE IF NOT EXISTS metadata_fetcher ( | ||||
id timeuuid, | |||||
name ascii, | name ascii, | ||||
version ascii, | version ascii, | ||||
configuration blob, | metadata text, | ||||
PRIMARY KEY ((name, version, configuration)) | PRIMARY KEY ((name), version) | ||||
) | ); | ||||
CREATE TABLE IF NOT EXISTS origin_metadata ( | |||||
origin text, | |||||
authority_type text, | |||||
authority_url text, | |||||
discovery_date timestamp, | |||||
fetcher_name ascii, | |||||
fetcher_version ascii, | |||||
format ascii, | |||||
metadata blob, | |||||
moranegg: Why do we need `authority_type` in the PRIMARY KEY? isn't the url enough ? | |||||
Done Inline ActionsI don't know if it's needed, but that's how we wrote the specification. vlorentz: I don't know if it's needed, but that's how we wrote the specification. | |||||
PRIMARY KEY ((origin), authority_type, authority_url, discovery_date, | |||||
fetcher_name, fetcher_version), | |||||
-- for now, authority_url could be in the partition key; but leaving | |||||
-- in the partition key allows listing authorities with metadata on an | |||||
Done Inline Actionsevery typo? moranegg: `every` typo? | |||||
-- origin if we ever need to do it. | |||||
); | |||||
CREATE TABLE IF NOT EXISTS object_count ( | CREATE TABLE IF NOT EXISTS object_count ( | ||||
partition_key smallint, -- Constant, must always be 0 | partition_key smallint, -- Constant, must always be 0 | ||||
object_type ascii, | object_type ascii, | ||||
count counter, | count counter, | ||||
PRIMARY KEY ((partition_key), object_type) | PRIMARY KEY ((partition_key), object_type) | ||||
); | ); | ||||
Show All 14 Lines | CREATE TABLE IF NOT EXISTS skipped_content_by_{main_algo} ( | ||||
target_token bigint, -- value of token(pk) on the "primary" table | target_token bigint, -- value of token(pk) on the "primary" table | ||||
PRIMARY KEY (({main_algo}), target_token) | PRIMARY KEY (({main_algo}), target_token) | ||||
); | ); | ||||
""" | """ | ||||
TABLES = ( | TABLES = ( | ||||
"skipped_content content revision revision_parent release " | "skipped_content content revision revision_parent release " | ||||
"directory directory_entry snapshot snapshot_branch " | "directory directory_entry snapshot snapshot_branch " | ||||
"origin_visit origin tool_by_uuid tool object_count " | "origin_visit origin origin_metadata object_count " | ||||
"origin_visit_status" | "origin_visit_status metadata_authority " | ||||
"metadata_fetcher" | |||||
).split() | ).split() | ||||
HASH_ALGORITHMS = ["sha1", "sha1_git", "sha256", "blake2s256"] | HASH_ALGORITHMS = ["sha1", "sha1_git", "sha256", "blake2s256"] | ||||
for main_algo in HASH_ALGORITHMS: | for main_algo in HASH_ALGORITHMS: | ||||
CREATE_TABLES_QUERIES.extend( | CREATE_TABLES_QUERIES.extend( | ||||
CONTENT_INDEX_TEMPLATE.format( | CONTENT_INDEX_TEMPLATE.format( | ||||
main_algo=main_algo, | main_algo=main_algo, | ||||
other_algos=", ".join( | other_algos=", ".join( | ||||
[algo for algo in HASH_ALGORITHMS if algo != main_algo] | [algo for algo in HASH_ALGORITHMS if algo != main_algo] | ||||
), | ), | ||||
).split("\n\n") | ).split("\n\n") | ||||
) | ) | ||||
TABLES.append("content_by_%s" % main_algo) | TABLES.append("content_by_%s" % main_algo) | ||||
TABLES.append("skipped_content_by_%s" % main_algo) | TABLES.append("skipped_content_by_%s" % main_algo) |
Why do we need authority_type in the PRIMARY KEY? isn't the url enough ?