Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/schema.py
| Show First 20 Lines • Show All 169 Lines • ▼ Show 20 Lines | CREATE TABLE IF NOT EXISTS origin ( | ||||
| type text, | type text, | ||||
| next_visit_id int, | next_visit_id int, | ||||
| -- We need integer visit ids for compatibility with the pgsql | -- We need integer visit ids for compatibility with the pgsql | ||||
| -- storage, so we're using lightweight transactions with this trick: | -- storage, so we're using lightweight transactions with this trick: | ||||
| -- https://stackoverflow.com/a/29391877/539465 | -- https://stackoverflow.com/a/29391877/539465 | ||||
| ); | ); | ||||
| CREATE TABLE IF NOT EXISTS tool_by_uuid ( | CREATE TABLE IF NOT EXISTS metadata_authority ( | ||||
| id timeuuid PRIMARY KEY, | url text, | ||||
| name ascii, | type ascii, | ||||
| version ascii, | metadata text, | ||||
| configuration blob, | PRIMARY KEY ((url), type) | ||||
| ); | ); | ||||
| CREATE TABLE IF NOT EXISTS tool ( | CREATE TABLE IF NOT EXISTS metadata_fetcher ( | ||||
| id timeuuid, | |||||
| name ascii, | name ascii, | ||||
| version ascii, | version ascii, | ||||
| configuration blob, | metadata text, | ||||
| PRIMARY KEY ((name, version, configuration)) | PRIMARY KEY ((name), version) | ||||
| ) | ); | ||||
| CREATE TABLE IF NOT EXISTS origin_metadata ( | |||||
| origin text, | |||||
| authority_type text, | |||||
| authority_url text, | |||||
| discovery_date timestamp, | |||||
| fetcher_name ascii, | |||||
| fetcher_version ascii, | |||||
| format ascii, | |||||
| metadata blob, | |||||
moranegg: Why do we need `authority_type` in the PRIMARY KEY? isn't the url enough ? | |||||
Done Inline ActionsI don't know if it's needed, but that's how we wrote the specification. vlorentz: I don't know if it's needed, but that's how we wrote the specification. | |||||
| PRIMARY KEY ((origin), authority_type, authority_url, discovery_date, | |||||
| fetcher_name, fetcher_version), | |||||
| -- for now, authority_url could be in the partition key; but leaving | |||||
| -- in the partition key allows listing authorities with metadata on an | |||||
Done Inline Actionsevery typo? moranegg: `every` typo? | |||||
| -- origin if we ever need to do it. | |||||
| ); | |||||
| CREATE TABLE IF NOT EXISTS object_count ( | CREATE TABLE IF NOT EXISTS object_count ( | ||||
| partition_key smallint, -- Constant, must always be 0 | partition_key smallint, -- Constant, must always be 0 | ||||
| object_type ascii, | object_type ascii, | ||||
| count counter, | count counter, | ||||
| PRIMARY KEY ((partition_key), object_type) | PRIMARY KEY ((partition_key), object_type) | ||||
| ); | ); | ||||
| Show All 14 Lines | CREATE TABLE IF NOT EXISTS skipped_content_by_{main_algo} ( | ||||
| target_token bigint, -- value of token(pk) on the "primary" table | target_token bigint, -- value of token(pk) on the "primary" table | ||||
| PRIMARY KEY (({main_algo}), target_token) | PRIMARY KEY (({main_algo}), target_token) | ||||
| ); | ); | ||||
| """ | """ | ||||
| TABLES = ( | TABLES = ( | ||||
| "skipped_content content revision revision_parent release " | "skipped_content content revision revision_parent release " | ||||
| "directory directory_entry snapshot snapshot_branch " | "directory directory_entry snapshot snapshot_branch " | ||||
| "origin_visit origin tool_by_uuid tool object_count " | "origin_visit origin origin_metadata object_count " | ||||
| "origin_visit_status" | "origin_visit_status metadata_authority " | ||||
| "metadata_fetcher" | |||||
| ).split() | ).split() | ||||
| HASH_ALGORITHMS = ["sha1", "sha1_git", "sha256", "blake2s256"] | HASH_ALGORITHMS = ["sha1", "sha1_git", "sha256", "blake2s256"] | ||||
| for main_algo in HASH_ALGORITHMS: | for main_algo in HASH_ALGORITHMS: | ||||
| CREATE_TABLES_QUERIES.extend( | CREATE_TABLES_QUERIES.extend( | ||||
| CONTENT_INDEX_TEMPLATE.format( | CONTENT_INDEX_TEMPLATE.format( | ||||
| main_algo=main_algo, | main_algo=main_algo, | ||||
| other_algos=", ".join( | other_algos=", ".join( | ||||
| [algo for algo in HASH_ALGORITHMS if algo != main_algo] | [algo for algo in HASH_ALGORITHMS if algo != main_algo] | ||||
| ), | ), | ||||
| ).split("\n\n") | ).split("\n\n") | ||||
| ) | ) | ||||
| TABLES.append("content_by_%s" % main_algo) | TABLES.append("content_by_%s" % main_algo) | ||||
| TABLES.append("skipped_content_by_%s" % main_algo) | TABLES.append("skipped_content_by_%s" % main_algo) | ||||
Why do we need authority_type in the PRIMARY KEY? isn't the url enough ?