Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/storage.py
Show First 20 Lines • Show All 349 Lines • ▼ Show 20 Lines | class CassandraStorage: | ||||
def content_missing( | def content_missing( | ||||
self, contents: List[Dict[str, Any]], key_hash: str = "sha1" | self, contents: List[Dict[str, Any]], key_hash: str = "sha1" | ||||
) -> Iterable[bytes]: | ) -> Iterable[bytes]: | ||||
if key_hash not in DEFAULT_ALGORITHMS: | if key_hash not in DEFAULT_ALGORITHMS: | ||||
raise StorageArgumentException( | raise StorageArgumentException( | ||||
"key_hash should be one of {','.join(DEFAULT_ALGORITHMS)}" | "key_hash should be one of {','.join(DEFAULT_ALGORITHMS)}" | ||||
) | ) | ||||
contents_with_all_hashes = [] | |||||
contents_with_missing_hashes = [] | |||||
for content in contents: | for content in contents: | ||||
if DEFAULT_ALGORITHMS <= set(content): | |||||
contents_with_all_hashes.append(content) | |||||
else: | |||||
contents_with_missing_hashes.append(content) | |||||
# These contents can be queried efficiently directly in the main table | |||||
for content in self._cql_runner.content_missing_from_hashes( | |||||
contents_with_all_hashes | |||||
): | |||||
yield content[key_hash] | |||||
# For these, we need the expensive index lookups + main table. | |||||
for content in contents_with_missing_hashes: | |||||
res = self.content_find(content) | res = self.content_find(content) | ||||
if not res: | if not res: | ||||
yield content[key_hash] | yield content[key_hash] | ||||
def content_missing_per_sha1(self, contents: List[bytes]) -> Iterable[bytes]: | def content_missing_per_sha1(self, contents: List[bytes]) -> Iterable[bytes]: | ||||
return self.content_missing([{"sha1": c} for c in contents]) | return self.content_missing([{"sha1": c} for c in contents]) | ||||
def content_missing_per_sha1_git( | def content_missing_per_sha1_git( | ||||
▲ Show 20 Lines • Show All 1,170 Lines • Show Last 20 Lines |