Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123195
D1288.id4845.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
15 KB
Subscribers
None
D1288.id4845.diff
View Options
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -14,6 +14,7 @@
"""Proxy to the SWH DB, with wrappers around stored procedures
"""
+
def mktemp_dir_entry(self, entry_type, cur=None):
self._cursor(cur).execute('SELECT swh_mktemp_dir_entry(%s)',
(('directory_entry_%s' % entry_type),))
@@ -217,16 +218,23 @@
"""
cur = self._cursor(cur)
+ checksum_dict = {'sha1': sha1, 'sha1_git': sha1_git,
+ 'sha256': sha256, 'blake2s256': blake2s256}
+ where_parts = []
+ args = []
+ # Adds only those keys which have value other than None
+ for algorithm in checksum_dict:
+ if checksum_dict[algorithm] is not None:
+ args.append(checksum_dict[algorithm])
+ where_parts.append(algorithm + '= %s')
+ query = ' AND '.join(where_parts)
cur.execute("""SELECT %s
- FROM swh_content_find(%%s, %%s, %%s, %%s)
- LIMIT 1""" % ','.join(self.content_find_cols),
- (sha1, sha1_git, sha256, blake2s256))
-
- content = cur.fetchone()
- if set(content) == {None}:
- return None
- else:
- return content
+ FROM content WHERE %s
+ """
+ % (','.join(self.content_find_cols), query),
+ args)
+ content = cur.fetchall()
+ return content
def directory_missing_from_list(self, directories, cur=None):
cur = self._cursor(cur)
@@ -829,7 +837,7 @@
query = '''SELECT %s
FROM swh_origin_metadata_get_by_origin(
%%s)''' % (','.join(
- self.origin_metadata_get_cols))
+ self.origin_metadata_get_cols))
cur.execute(query, (origin_id, ))
@@ -837,7 +845,7 @@
query = '''SELECT %s
FROM swh_origin_metadata_get_by_provider_type(
%%s, %%s)''' % (','.join(
- self.origin_metadata_get_cols))
+ self.origin_metadata_get_cols))
cur.execute(query, (origin_id, provider_type))
@@ -862,8 +870,8 @@
where name=%%s and
version=%%s and
configuration=%%s''' % (
- ','.join(self.tool_cols)),
- (name, version, configuration))
+ ','.join(self.tool_cols)),
+ (name, version, configuration))
return cur.fetchone()
@@ -879,7 +887,7 @@
RETURNING id"""
cur.execute(insert, (provider_name, provider_type, provider_url,
- jsonize(metadata)))
+ jsonize(metadata)))
return cur.fetchone()[0]
def metadata_provider_get(self, provider_id, cur=None):
@@ -887,8 +895,8 @@
cur.execute('''select %s
from metadata_provider
where id=%%s ''' % (
- ','.join(self.metadata_provider_cols)),
- (provider_id, ))
+ ','.join(self.metadata_provider_cols)),
+ (provider_id, ))
return cur.fetchone()
@@ -899,7 +907,7 @@
from metadata_provider
where provider_name=%%s and
provider_url=%%s''' % (
- ','.join(self.metadata_provider_cols)),
- (provider_name, provider_url))
+ ','.join(self.metadata_provider_cols)),
+ (provider_name, provider_url))
return cur.fetchone()
diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py
--- a/swh/storage/in_memory.py
+++ b/swh/storage/in_memory.py
@@ -76,7 +76,8 @@
if key in self._contents:
continue
for algorithm in DEFAULT_ALGORITHMS:
- if content[algorithm] in self._content_indexes[algorithm]:
+ if content[algorithm] in self._content_indexes[algorithm]\
+ and (algorithm not in {'blake2s256', 'sha256'}):
from . import HashCollision
raise HashCollision(algorithm, content[algorithm], key)
for algorithm in DEFAULT_ALGORITHMS:
@@ -291,9 +292,7 @@
if not found:
return
keys = list(set.intersection(*found))
-
- # FIXME: should really be a list of all the objects found
- return copy.deepcopy(self._contents[keys[0]])
+ return copy.deepcopy([self._contents[key] for key in keys])
def content_missing(self, contents, key_hash='sha1'):
"""List content missing from storage
@@ -320,10 +319,9 @@
yield content[key_hash]
break
else:
- # content_find cannot return None here, because we checked
- # above that there is a content with matching hashes.
- if self.content_find(content)['status'] == 'missing':
- yield content[key_hash]
+ for result in self.content_find(content):
+ if result['status'] == 'missing':
+ yield content[key_hash]
def content_missing_per_sha1(self, contents):
"""List content missing from storage based only on sha1.
@@ -404,8 +402,10 @@
ret = dict.fromkeys(keys)
ret.update(dentry)
if ret['type'] == 'file':
+ # TODO: Make it able to handle more than one content
content = self.content_find({'sha1_git': ret['target']})
if content:
+ content = content[0]
for key in keys:
ret[key] = content[key]
return ret
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -511,13 +511,16 @@
raise ValueError('content keys must contain at least one of: '
'sha1, sha1_git, sha256, blake2s256')
- c = db.content_find(sha1=content.get('sha1'),
- sha1_git=content.get('sha1_git'),
- sha256=content.get('sha256'),
- blake2s256=content.get('blake2s256'),
- cur=cur)
- if c:
- return dict(zip(db.content_find_cols, c))
+ contents = db.content_find(sha1=content.get('sha1'),
+ sha1_git=content.get('sha1_git'),
+ sha256=content.get('sha256'),
+ blake2s256=content.get('blake2s256'),
+ cur=cur)
+ if contents:
+ contents_list = []
+ for cont in contents:
+ contents_list.append(dict(zip(db.content_find_cols, cont)))
+ return contents_list
return None
@db_transaction()
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -2408,7 +2408,7 @@
actually_present = self.storage.content_find({'sha1': cont['sha1']})
- self.assertEqual(actually_present, {
+ self.assertEqual(actually_present[0], {
'ctime': now,
'sha1': cont['sha1'],
'sha256': cont['sha256'],
@@ -2423,45 +2423,50 @@
cont = self.cont
self.storage.content_add([cont])
- actually_present = self.storage.content_find({'sha1': cont['sha1']})
-
- actually_present.pop('ctime')
- self.assertEqual(actually_present, {
- 'sha1': cont['sha1'],
- 'sha256': cont['sha256'],
- 'sha1_git': cont['sha1_git'],
- 'blake2s256': cont['blake2s256'],
- 'length': cont['length'],
- 'status': 'visible'
- })
+ actually_present = self.storage.content_find(
+ {'sha1': cont['sha1']}
+ )
+ self.assertEqual(1, len(actually_present))
+ actually_present[0].pop('ctime')
+
+ self.assertEqual(actually_present[0], {
+ 'sha1': cont['sha1'],
+ 'sha256': cont['sha256'],
+ 'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
+ 'length': cont['length'],
+ 'status': 'visible'
+ })
# 2. with something to find
actually_present = self.storage.content_find(
{'sha1_git': cont['sha1_git']})
-
- actually_present.pop('ctime')
- self.assertEqual(actually_present, {
- 'sha1': cont['sha1'],
- 'sha256': cont['sha256'],
- 'sha1_git': cont['sha1_git'],
- 'blake2s256': cont['blake2s256'],
- 'length': cont['length'],
- 'status': 'visible'
- })
+ self.assertEqual(1, len(actually_present))
+
+ actually_present[0].pop('ctime')
+ self.assertEqual(actually_present[0], {
+ 'sha1': cont['sha1'],
+ 'sha256': cont['sha256'],
+ 'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
+ 'length': cont['length'],
+ 'status': 'visible'
+ })
# 3. with something to find
actually_present = self.storage.content_find(
{'sha256': cont['sha256']})
-
- actually_present.pop('ctime')
- self.assertEqual(actually_present, {
- 'sha1': cont['sha1'],
- 'sha256': cont['sha256'],
- 'sha1_git': cont['sha1_git'],
- 'blake2s256': cont['blake2s256'],
- 'length': cont['length'],
- 'status': 'visible'
- })
+ self.assertEqual(1, len(actually_present))
+
+ actually_present[0].pop('ctime')
+ self.assertEqual(actually_present[0], {
+ 'sha1': cont['sha1'],
+ 'sha256': cont['sha256'],
+ 'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
+ 'length': cont['length'],
+ 'status': 'visible'
+ })
# 4. with something to find
actually_present = self.storage.content_find({
@@ -2470,16 +2475,17 @@
'sha256': cont['sha256'],
'blake2s256': cont['blake2s256'],
})
-
- actually_present.pop('ctime')
- self.assertEqual(actually_present, {
- 'sha1': cont['sha1'],
- 'sha256': cont['sha256'],
- 'sha1_git': cont['sha1_git'],
- 'blake2s256': cont['blake2s256'],
- 'length': cont['length'],
- 'status': 'visible'
- })
+ self.assertEqual(1, len(actually_present))
+
+ actually_present[0].pop('ctime')
+ self.assertEqual(actually_present[0], {
+ 'sha1': cont['sha1'],
+ 'sha256': cont['sha256'],
+ 'sha1_git': cont['sha1_git'],
+ 'blake2s256': cont['blake2s256'],
+ 'length': cont['length'],
+ 'status': 'visible'
+ })
def test_content_find_with_non_present_content(self):
# 1. with something that does not exist
@@ -2502,6 +2508,117 @@
self.assertIsNone(actually_present)
+ def test_content_find_with_duplicate_input(self):
+ cont1 = self.cont
+ duplicate_cont = cont1.copy()
+
+ # Create fake data with colliding sha256 and blake2s256
+ sha1_array = bytearray(duplicate_cont['sha1'])
+ sha1_array[0] += 1
+ duplicate_cont['sha1'] = bytes(sha1_array)
+ sha1git_array = bytearray(duplicate_cont['sha1_git'])
+ sha1git_array[0] += 1
+ duplicate_cont['sha1_git'] = bytes(sha1git_array)
+ # Inject the data
+ self.storage.content_add([cont1, duplicate_cont])
+ finder = {'blake2s256': duplicate_cont['blake2s256'],
+ 'sha256': duplicate_cont['sha256']}
+ actual_result = list(self.storage.content_find(finder))
+
+ cont1.pop('data')
+ duplicate_cont.pop('data')
+ actual_result[0].pop('ctime')
+ actual_result[1].pop('ctime')
+
+ expected_result = [
+ cont1, duplicate_cont
+ ]
+ self.assertCountEqual(expected_result, actual_result)
+
+ def test_content_find_with_duplicate_sha256(self):
+ cont1 = self.cont
+ duplicate_cont = cont1.copy()
+
+ # Create fake data with colliding sha256 and blake2s256
+ sha1_array = bytearray(duplicate_cont['sha1'])
+ sha1_array[0] += 1
+ duplicate_cont['sha1'] = bytes(sha1_array)
+ sha1git_array = bytearray(duplicate_cont['sha1_git'])
+ sha1git_array[0] += 1
+ duplicate_cont['sha1_git'] = bytes(sha1git_array)
+ blake2s256_array = bytearray(duplicate_cont['blake2s256'])
+ blake2s256_array[0] += 1
+ duplicate_cont['blake2s256'] = bytes(blake2s256_array)
+ self.storage.content_add([cont1, duplicate_cont])
+ finder = {
+ 'sha256': duplicate_cont['sha256']
+ }
+ actual_result = list(self.storage.content_find(finder))
+
+ cont1.pop('data')
+ duplicate_cont.pop('data')
+ actual_result[0].pop('ctime')
+ actual_result[1].pop('ctime')
+ expected_result = [
+ cont1, duplicate_cont
+ ]
+ self.assertCountEqual(expected_result, actual_result)
+ # Find with both sha256 and blake2s256
+ finder = {
+ 'sha256': duplicate_cont['sha256'],
+ 'blake2s256': duplicate_cont['blake2s256']
+ }
+ actual_result = list(self.storage.content_find(finder))
+
+ actual_result[0].pop('ctime')
+
+ expected_result = [
+ duplicate_cont
+ ]
+ self.assertCountEqual(expected_result, actual_result)
+
+ def test_content_find_with_duplicate_blake2s256(self):
+ cont1 = self.cont
+ duplicate_cont = cont1.copy()
+
+ # Create fake data with colliding sha256 and blake2s256
+ sha1_array = bytearray(duplicate_cont['sha1'])
+ sha1_array[0] += 1
+ duplicate_cont['sha1'] = bytes(sha1_array)
+ sha1git_array = bytearray(duplicate_cont['sha1_git'])
+ sha1git_array[0] += 1
+ duplicate_cont['sha1_git'] = bytes(sha1git_array)
+ sha256_array = bytearray(duplicate_cont['sha256'])
+ sha256_array[0] += 1
+ duplicate_cont['sha256'] = bytes(sha256_array)
+ self.storage.content_add([cont1, duplicate_cont])
+ finder = {
+ 'blake2s256': duplicate_cont['blake2s256']
+ }
+ actual_result = list(self.storage.content_find(finder))
+
+ cont1.pop('data')
+ duplicate_cont.pop('data')
+ actual_result[0].pop('ctime')
+ actual_result[1].pop('ctime')
+ expected_result = [
+ cont1, duplicate_cont
+ ]
+ self.assertCountEqual(expected_result, actual_result)
+ # Find with both sha256 and blake2s256
+ finder = {
+ 'sha256': duplicate_cont['sha256'],
+ 'blake2s256': duplicate_cont['blake2s256']
+ }
+ actual_result = list(self.storage.content_find(finder))
+
+ actual_result[0].pop('ctime')
+
+ expected_result = [
+ duplicate_cont
+ ]
+ self.assertCountEqual(expected_result, actual_result)
+
def test_content_find_bad_input(self):
# 1. with bad input
with self.assertRaises(ValueError):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Dec 18, 2:33 AM (6 d, 3 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218529
Attached To
D1288: Storage.content_find returns list instead of single value
Event Timeline
Log In to Comment