Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7122898
D2872.id10237.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
D2872.id10237.diff
View Options
diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py
--- a/swh/storage/__init__.py
+++ b/swh/storage/__init__.py
@@ -6,10 +6,6 @@
import warnings
-class HashCollision(Exception):
- pass
-
-
STORAGE_IMPLEMENTATION = {
'pipeline', 'local', 'remote', 'memory', 'filter', 'buffer', 'retry',
'validate', 'cassandra',
diff --git a/swh/storage/api/client.py b/swh/storage/api/client.py
--- a/swh/storage/api/client.py
+++ b/swh/storage/api/client.py
@@ -8,8 +8,7 @@
from swh.core.api import RPCClient, RemoteException
from swh.model.model import Content
-from .. import HashCollision
-from ..exc import StorageAPIError, StorageArgumentException
+from ..exc import StorageAPIError, StorageArgumentException, HashCollision
from ..interface import StorageInterface
from .serializers import ENCODERS, DECODERS
@@ -33,8 +32,8 @@
and e.args and e.args[0].get('type') == 'HashCollision':
# XXX: workaround until we fix these HashCollisions happening
# when they shouldn't
- raise HashCollision(
- *e.args[0]['args'])
+ algo, hash_id, colliding_contents = e.args[0]['args']
+ raise HashCollision(algo, hash_id, colliding_contents)
else:
raise
diff --git a/swh/storage/cassandra/storage.py b/swh/storage/cassandra/storage.py
--- a/swh/storage/cassandra/storage.py
+++ b/swh/storage/cassandra/storage.py
@@ -21,8 +21,7 @@
from swh.storage.objstorage import ObjStorage
from swh.storage.writer import JournalWriter
-from .. import HashCollision
-from ..exc import StorageArgumentException
+from ..exc import StorageArgumentException, HashCollision
from .common import TOKEN_BEGIN, TOKEN_END
from .converters import (
revision_to_db, revision_from_db, release_to_db, release_from_db,
diff --git a/swh/storage/exc.py b/swh/storage/exc.py
--- a/swh/storage/exc.py
+++ b/swh/storage/exc.py
@@ -1,8 +1,14 @@
-# Copyright (C) 2015-2016 The Software Heritage developers
+# Copyright (C) 2015-2020 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Dict, List
+
+from swh.model.hashutil import hash_to_hex
+
+from swh.storage.utils import content_hex_hashes, content_bytes_hashes
+
class StorageDBError(Exception):
"""Specific storage db error (connection, erroneous queries, etc...)
@@ -26,3 +32,20 @@
class StorageArgumentException(Exception):
"""Argument passed to a Storage endpoint is invalid."""
pass
+
+
+class HashCollision(Exception):
+ """Exception raised when a content collides in a storage backend
+
+ """
+ def __init__(self, algo, hash_id, colliding_contents):
+ super().__init__()
+ self.algo = algo
+ self.hash_id = hash_to_hex(hash_id)
+ self.colliding_contents = [content_hex_hashes(c)
+ for c in colliding_contents]
+ # retro-compatibility
+ self.args = [self.algo, self.hash_id, self.colliding_contents]
+
+ def colliding_content_hashes(self) -> List[Dict[str, bytes]]:
+ return [content_bytes_hashes(c) for c in self.colliding_contents]
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -25,8 +25,7 @@
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
-from . import HashCollision
-from .exc import StorageArgumentException
+from .exc import StorageArgumentException, HashCollision
from .converters import origin_url_to_sha1
from .utils import get_partition_bounds_bytes
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -25,10 +25,10 @@
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex
from swh.storage.objstorage import ObjStorage
-from . import converters, HashCollision
+from . import converters
from .common import db_transaction_generator, db_transaction
from .db import Db
-from .exc import StorageArgumentException, StorageDBError
+from .exc import StorageArgumentException, StorageDBError, HashCollision
from .algos import diff
from .metrics import timed, send_metric, process_metrics
from .utils import (
diff --git a/swh/storage/tests/test_exception.py b/swh/storage/tests/test_exception.py
new file mode 100644
--- /dev/null
+++ b/swh/storage/tests/test_exception.py
@@ -0,0 +1,34 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.model import hashutil
+
+from swh.storage.exc import HashCollision
+from swh.storage.utils import content_hex_hashes
+
+
+def test_hash_collision_exception():
+ hex_hash_id = "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"
+ hash_id = hashutil.hash_to_bytes(hex_hash_id)
+
+ content = {
+ "blake2s256": hashutil.hash_to_bytes(
+ "8f677e3214ca8b2acad91884a1571ef3f12b786501f9a6bedfd6239d82095dd2"
+ ),
+ "sha1_git": hashutil.hash_to_bytes(
+ "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0"),
+ "sha256": hashutil.hash_to_bytes(
+ "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0"
+ ),
+ "sha1": hash_id,
+ }
+
+ exc = HashCollision('sha1', hash_id, [content])
+
+ assert exc.algo == 'sha1'
+ assert exc.hash_id == hex_hash_id
+ assert exc.colliding_contents == [content_hex_hashes(content)]
+
+ assert exc.colliding_content_hashes() == [content]
diff --git a/swh/storage/tests/test_retry.py b/swh/storage/tests/test_retry.py
--- a/swh/storage/tests/test_retry.py
+++ b/swh/storage/tests/test_retry.py
@@ -13,8 +13,8 @@
Content, Directory, Release, Revision, Snapshot, Origin
)
-from swh.storage import HashCollision, get_storage
-from swh.storage.exc import StorageArgumentException
+from swh.storage import get_storage
+from swh.storage.exc import HashCollision, StorageArgumentException
@pytest.fixture
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -30,10 +30,12 @@
Content, OriginVisit, Release, Revision
)
from swh.model.hypothesis_strategies import objects
-from swh.storage import HashCollision, get_storage
+from swh.model.hashutil import hash_to_hex
+from swh.storage import get_storage
from swh.storage.converters import origin_url_to_sha1 as sha1
-from swh.storage.exc import StorageArgumentException
+from swh.storage.exc import HashCollision, StorageArgumentException
from swh.storage.interface import StorageInterface
+from swh.storage.utils import content_hex_hashes
from .storage_data import data
@@ -314,12 +316,18 @@
with pytest.raises(HashCollision) as cm:
swh_storage.content_add([cont1, cont1b])
- actual_algo = cm.value.args[0]
+ exc = cm.value
+ actual_algo = exc.algo
assert actual_algo in ['sha1', 'sha1_git', 'blake2s256']
- actual_id = cm.value.args[1]
- assert actual_id == cont1[actual_algo]
- assert len(cm.value.args[2]) == 2
- assert cm.value.args[2] == [
+ actual_id = exc.hash_id
+ assert actual_id == hash_to_hex(cont1[actual_algo])
+ collisions = exc.args[2]
+ assert len(collisions) == 2
+ assert collisions == [
+ content_hex_hashes(Content.from_dict(cont1).hashes()),
+ content_hex_hashes(Content.from_dict(cont1b).hashes())
+ ]
+ assert exc.colliding_content_hashes() == [
Content.from_dict(cont1).hashes(),
Content.from_dict(cont1b).hashes()
]
@@ -387,12 +395,18 @@
with pytest.raises(HashCollision) as cm:
swh_storage.content_add_metadata([cont1, cont1b])
- actual_algo = cm.value.args[0]
+ exc = cm.value
+ actual_algo = exc.algo
assert actual_algo in ['sha1', 'sha1_git', 'blake2s256']
- actual_id = cm.value.args[1]
- assert actual_id == cont1[actual_algo]
- assert len(cm.value.args[2]) == 2
- assert cm.value.args[2] == [
+ actual_id = exc.hash_id
+ assert actual_id == hash_to_hex(cont1[actual_algo])
+ collisions = exc.args[2]
+ assert len(collisions) == 2
+ assert collisions == [
+ content_hex_hashes(Content.from_dict(cont1).hashes()),
+ content_hex_hashes(Content.from_dict(cont1b).hashes())
+ ]
+ assert exc.colliding_content_hashes() == [
Content.from_dict(cont1).hashes(),
Content.from_dict(cont1b).hashes()
]
diff --git a/swh/storage/tests/test_utils.py b/swh/storage/tests/test_utils.py
--- a/swh/storage/tests/test_utils.py
+++ b/swh/storage/tests/test_utils.py
@@ -3,33 +3,94 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.model.hashutil import hash_to_bytes
-from swh.storage.utils import extract_collision_hash
+from swh.model import hashutil
+from swh.storage.utils import (
+ extract_collision_hash, content_hex_hashes, content_bytes_hashes
+)
def test_extract_collision_hash():
for msg, expected_result in [
(
'Key (sha1)=(\\x34973274ccef6ab4dfaaf86599792fa9c3fe4689) ...',
- ('sha1', hash_to_bytes(
- '34973274ccef6ab4dfaaf86599792fa9c3fe4689')),
+ ('sha1', '34973274ccef6ab4dfaaf86599792fa9c3fe4689'),
),
(
'Key (sha1_git)=(\\x34973274ccef6ab4dfaaf86599792fa9c3fe4699) already exists', # noqa
- ('sha1_git', hash_to_bytes(
- '34973274ccef6ab4dfaaf86599792fa9c3fe4699')),
+ ('sha1_git',
+ '34973274ccef6ab4dfaaf86599792fa9c3fe4699'),
),
(
'Key (sha256)=(\\x673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a) ...', # noqa
- ('sha256', hash_to_bytes(
- '673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a')) # noqa
+ ('sha256',
+ '673650f936cb3b0a2f93ce09d81be10748b1b203c19e8176b4eefc1964a0cf3a') # noqa
),
(
'Key (blake2s)=(\\xd5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d) ...', # noqa
- ('blake2s', hash_to_bytes(
- 'd5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d')) # noqa
+ ('blake2s',
+ 'd5fe1939576527e42cfd76a9455a2432fe7f56669564577dd93c4280e76d661d') # noqa
),
]:
assert extract_collision_hash(msg) == expected_result
assert extract_collision_hash('Nothing matching') is None
+
+
+def test_content_hex_hashes():
+ input_content = {
+ "blake2s256": hashutil.hash_to_bytes(
+ "8f677e3214ca8b2acad91884a1571ef3f12b786501f9a6bedfd6239d82095dd2"
+ ),
+ "sha1_git": hashutil.hash_to_bytes(
+ "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0"),
+ "sha256": hashutil.hash_to_bytes(
+ "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0"
+ ),
+ "sha1": hashutil.hash_to_bytes(
+ "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"),
+ }
+
+ expected_content = {
+ "blake2s256":
+ "8f677e3214ca8b2acad91884a1571ef3f12b786501f9a6bedfd6239d82095dd2",
+ "sha1_git": "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0",
+ "sha256":
+ "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0",
+ "sha1": "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"
+ }
+
+ actual_content = content_hex_hashes(input_content)
+
+ assert len(actual_content) == len(expected_content)
+ for algo in hashutil.DEFAULT_ALGORITHMS:
+ assert actual_content[algo] == expected_content[algo]
+
+
+def test_content_bytes_hashes():
+ input_content = {
+ "blake2s256":
+ "8f677e3214ca8b2acad91884a1571ef3f12b786501f9a6bedfd6239d82095dd2",
+ "sha1_git": "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0",
+ "sha256":
+ "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0",
+ "sha1": "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"
+ }
+
+ expected_content = {
+ "blake2s256": hashutil.hash_to_bytes(
+ "8f677e3214ca8b2acad91884a1571ef3f12b786501f9a6bedfd6239d82095dd2"
+ ),
+ "sha1_git": hashutil.hash_to_bytes(
+ "ba9aaa145ccd24ef760cf31c74d8f7ca1a2e47b0"),
+ "sha256": hashutil.hash_to_bytes(
+ "2bb787a73e37352f92383abe7e2902936d1059ad9f1ba6daaa9c1e58ee6970d0"
+ ),
+ "sha1": hashutil.hash_to_bytes(
+ "38762cf7f55934b34d179ae6a4c80cadccbb7f0a"),
+ }
+
+ actual_content = content_bytes_hashes(input_content)
+
+ assert len(actual_content) == len(expected_content)
+ for algo in hashutil.DEFAULT_ALGORITHMS:
+ assert actual_content[algo] == expected_content[algo]
diff --git a/swh/storage/utils.py b/swh/storage/utils.py
--- a/swh/storage/utils.py
+++ b/swh/storage/utils.py
@@ -5,9 +5,11 @@
import re
-from typing import Optional, Tuple
+from typing import Dict, Optional, Tuple
-from swh.model.hashutil import hash_to_bytes
+from swh.model.hashutil import (
+ hash_to_bytes, hash_to_hex, DEFAULT_ALGORITHMS
+)
def _is_power_of_two(n: int) -> bool:
@@ -66,3 +68,21 @@
hash_id = result.group('id')
return hash_type, hash_to_bytes(hash_id)
return None
+
+
+def content_hex_hashes(content: Dict[str, bytes]) -> Dict[str, str]:
+ """Convert bytes hashes into hex hashes.
+
+ """
+ return {
+ algo: hash_to_hex(content[algo]) for algo in DEFAULT_ALGORITHMS
+ }
+
+
+def content_bytes_hashes(content: Dict[str, str]) -> Dict[str, bytes]:
+ """Convert bytes hashes into hex hashes.
+
+ """
+ return {
+ algo: hash_to_bytes(content[algo]) for algo in DEFAULT_ALGORITHMS
+ }
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 10:24 AM (3 d, 7 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223929
Attached To
D2872: storage*: Hex encode content hashes in HashCollision exception
Event Timeline
Log In to Comment