Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/cassandra/storage.py
Show First 20 Lines • Show All 113 Lines • ▼ Show 20 Lines | def _content_get_from_hash(self, algo, hash_) -> Iterable: | ||||
yield row | yield row | ||||
def _content_add(self, contents: List[Content], with_data: bool) -> Dict: | def _content_add(self, contents: List[Content], with_data: bool) -> Dict: | ||||
# Filter-out content already in the database. | # Filter-out content already in the database. | ||||
contents = [ | contents = [ | ||||
c for c in contents if not self._cql_runner.content_get_from_pk(c.to_dict()) | c for c in contents if not self._cql_runner.content_get_from_pk(c.to_dict()) | ||||
] | ] | ||||
self.journal_writer.content_add(contents) | |||||
if with_data: | if with_data: | ||||
# First insert to the objstorage, if the endpoint is | # First insert to the objstorage, if the endpoint is | ||||
# `content_add` (as opposed to `content_add_metadata`). | # `content_add` (as opposed to `content_add_metadata`). | ||||
# TODO: this should probably be done in concurrently to inserting | |||||
# in index tables (but still before the main table; so an entry is | # Must add to the objstorage before the DB and journal. Otherwise: | ||||
# only added to the main table after everything else was | # 1. in case of a crash the DB may "believe" we have the content, but | ||||
# successfully inserted. | # we didn't have time to write to the objstorage before the crash | ||||
# 2. the objstorage mirroring, which reads from the journal, may attempt to | |||||
# read from the objstorage before we finished writing it | |||||
summary = self.objstorage.content_add( | summary = self.objstorage.content_add( | ||||
c for c in contents if c.status != "absent" | c for c in contents if c.status != "absent" | ||||
) | ) | ||||
content_add_bytes = summary["content:add:bytes"] | content_add_bytes = summary["content:add:bytes"] | ||||
self.journal_writer.content_add(contents) | |||||
content_add = 0 | content_add = 0 | ||||
for content in contents: | for content in contents: | ||||
content_add += 1 | content_add += 1 | ||||
# Check for sha1 or sha1_git collisions. This test is not atomic | # Check for sha1 or sha1_git collisions. This test is not atomic | ||||
# with the insertion, so it won't detect a collision if both | # with the insertion, so it won't detect a collision if both | ||||
# contents are inserted at the same time, but it's good enough. | # contents are inserted at the same time, but it's good enough. | ||||
# | # | ||||
▲ Show 20 Lines • Show All 1,190 Lines • Show Last 20 Lines |