diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py --- a/swh/storage/cassandra/storage.py +++ b/swh/storage/cassandra/storage.py @@ -442,6 +442,8 @@ # This allows this function to run in linear time overall instead of # quadratic. for found_content in found_contents_with_same_hash: + # check if the found_content.hashes() dictionary contains a superset + # of the (key, value) pairs in missing_content if missing_content.items() <= found_content.hashes().items(): # Found! break