Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/cql.py
Show First 20 Lines • Show All 788 Lines • ▼ Show 20 Lines | class CqlRunner: | ||||
def origin_visit_iter(self, start_token: int) -> Iterator[Row]: | def origin_visit_iter(self, start_token: int) -> Iterator[Row]: | ||||
"""Returns all origin visits in order from this token, | """Returns all origin visits in order from this token, | ||||
and wraps around the token space.""" | and wraps around the token space.""" | ||||
yield from self._origin_visit_iter_from(start_token) | yield from self._origin_visit_iter_from(start_token) | ||||
yield from self._origin_visit_iter_to(start_token) | yield from self._origin_visit_iter_to(start_token) | ||||
########################## | ########################## | ||||
# 'tool' table | # 'metadata_authority' table | ||||
########################## | ########################## | ||||
_tool_keys = ["id", "name", "version", "configuration"] | _metadata_authority_keys = ["url", "type", "metadata"] | ||||
@_prepared_insert_statement("tool_by_uuid", _tool_keys) | @_prepared_insert_statement("metadata_authority", _metadata_authority_keys) | ||||
def tool_by_uuid_add_one(self, tool: Dict[str, Any], *, statement) -> None: | def metadata_authority_add(self, url, type, metadata, *, statement): | ||||
self._execute_with_retries(statement, [tool[key] for key in self._tool_keys]) | return self._execute_with_retries(statement, [url, type, metadata]) | ||||
@_prepared_insert_statement("tool", _tool_keys) | @_prepared_statement("SELECT * from metadata_authority WHERE type = ? AND url = ?") | ||||
def tool_add_one(self, tool: Dict[str, Any], *, statement) -> None: | def metadata_authority_get(self, type, url, *, statement) -> Optional[Row]: | ||||
self._execute_with_retries(statement, [tool[key] for key in self._tool_keys]) | return next(iter(self._execute_with_retries(statement, [type, url])), None) | ||||
self._increment_counter("tool", 1) | |||||
########################## | |||||
moranegg: I'm not sure I remember correctly, so I'll ask @ardumont to join.
But the name metadata_fetcher… | |||||
Done Inline ActionsThe tool table used by the indexer are in their dedicated storage db. ardumont: The tool table used by the indexer are in their dedicated storage db.
This tool table stayed… | |||||
# 'metadata_fetcher' table | |||||
########################## | |||||
_metadata_fetcher_keys = ["name", "version", "metadata"] | |||||
@_prepared_insert_statement("metadata_fetcher", _metadata_fetcher_keys) | |||||
def metadata_fetcher_add(self, name, version, metadata, *, statement): | |||||
return self._execute_with_retries(statement, [name, version, metadata]) | |||||
@_prepared_statement( | @_prepared_statement( | ||||
"SELECT id FROM tool " "WHERE name = ? AND version = ? " "AND configuration = ?" | "SELECT * from metadata_fetcher WHERE name = ? AND version = ?" | ||||
) | ) | ||||
def tool_get_one_uuid( | def metadata_fetcher_get(self, name, version, *, statement) -> Optional[Row]: | ||||
self, name: str, version: str, configuration: Dict[str, Any], *, statement | return next(iter(self._execute_with_retries(statement, [name, version])), None) | ||||
) -> Optional[str]: | |||||
rows = list( | ########################## | ||||
self._execute_with_retries(statement, [name, version, configuration]) | # 'origin_metadata' table | ||||
########################## | |||||
_origin_metadata_keys = [ | |||||
"origin", | |||||
"authority_type", | |||||
"authority_url", | |||||
"discovery_date", | |||||
"fetcher_name", | |||||
"fetcher_version", | |||||
"format", | |||||
"metadata", | |||||
] | |||||
@_prepared_insert_statement("origin_metadata", _origin_metadata_keys) | |||||
def origin_metadata_add( | |||||
self, | |||||
origin, | |||||
authority_type, | |||||
authority_url, | |||||
discovery_date, | |||||
fetcher_name, | |||||
fetcher_version, | |||||
format, | |||||
metadata, | |||||
*, | |||||
statement, | |||||
): | |||||
return self._execute_with_retries( | |||||
statement, | |||||
[ | |||||
origin, | |||||
authority_type, | |||||
authority_url, | |||||
discovery_date, | |||||
fetcher_name, | |||||
fetcher_version, | |||||
format, | |||||
metadata, | |||||
], | |||||
) | |||||
@_prepared_statement( | |||||
"SELECT * from origin_metadata " | |||||
"WHERE origin=? AND authority_url=? AND discovery_date>=? " | |||||
"AND authority_type=?" | |||||
) | |||||
def origin_metadata_get_after( | |||||
self, origin, authority_type, authority_url, after, *, statement | |||||
): | |||||
return self._execute_with_retries( | |||||
statement, [origin, authority_url, after, authority_type] | |||||
) | |||||
@_prepared_statement( | |||||
"SELECT * from origin_metadata " | |||||
"WHERE origin=? AND authority_url=? AND authority_type=?" | |||||
) | |||||
def origin_metadata_get( | |||||
self, origin, authority_type, authority_url, *, statement | |||||
) -> Iterable[Row]: | |||||
return self._execute_with_retries( | |||||
statement, [origin, authority_url, authority_type] | |||||
) | ) | ||||
if rows: | |||||
assert len(rows) == 1 | |||||
return rows[0].id | |||||
else: | |||||
return None | |||||
########################## | ########################## | ||||
# Miscellaneous | # Miscellaneous | ||||
########################## | ########################## | ||||
@_prepared_statement("SELECT uuid() FROM revision LIMIT 1;") | @_prepared_statement("SELECT uuid() FROM revision LIMIT 1;") | ||||
def check_read(self, *, statement): | def check_read(self, *, statement): | ||||
self._execute_with_retries(statement, []) | self._execute_with_retries(statement, []) | ||||
@_prepared_statement( | @_prepared_statement( | ||||
"SELECT object_type, count FROM object_count " "WHERE partition_key=0" | "SELECT object_type, count FROM object_count " "WHERE partition_key=0" | ||||
) | ) | ||||
def stat_counters(self, *, statement) -> ResultSet: | def stat_counters(self, *, statement) -> ResultSet: | ||||
return self._execute_with_retries(statement, []) | return self._execute_with_retries(statement, []) |
I'm not sure I remember correctly, so I'll ask @ardumont to join.
But the name metadata_fetcher is specific to extrinsic metadata, while the tool table is used for the indexers.
Would that be a change that will affect indexers?
I see that this is defined in a Cassandra file, for which I do not know the scope.