Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124248
D642.id2063.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D642.id2063.diff
View Options
diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py
--- a/swh/storage/__init__.py
+++ b/swh/storage/__init__.py
@@ -8,6 +8,10 @@
Storage = storage.Storage
+class HashCollision(Exception):
+ pass
+
+
def get_storage(cls, args):
"""
Get a storage object of class `storage_class` with arguments
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -938,7 +938,7 @@
return cur.fetchone()[0]
- origin_metadata_get_cols = ['id', 'origin_id', 'discovery_date',
+ origin_metadata_get_cols = ['origin_id', 'discovery_date',
'tool_id', 'metadata', 'provider_id',
'provider_name', 'provider_type',
'provider_url']
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -157,7 +157,22 @@
db.content_get_metadata_keys, cur)
# move metadata in place
- db.content_add_from_temp(cur)
+ try:
+ db.content_add_from_temp(cur)
+ except psycopg2.IntegrityError as e:
+ from . import HashCollision
+ if e.diag.sqlstate == '23505' and \
+ e.diag.table_name == 'content':
+ constaint_to_hash_name = {
+ 'content_pkey': 'sha1',
+ 'content_sha1_git_idx': 'sha1_git',
+ 'content_sha256_idx': 'sha256',
+ }
+ colliding_hash_name = constaint_to_hash_name \
+ .get(e.diag.constraint_name)
+ raise HashCollision(colliding_hash_name)
+ else:
+ raise
if missing_skipped:
missing_filtered = (
@@ -1197,6 +1212,27 @@
return {k: v for (k, v) in db.stat_counters()}
@db_transaction()
+ def refresh_stat_counters(self, db=None, cur=None):
+ """Recomputes the statistics for `stat_counters`."""
+ keys = [
+ 'content',
+ 'directory',
+ 'directory_entry_dir',
+ 'directory_entry_file',
+ 'directory_entry_rev',
+ 'origin',
+ 'origin_visit',
+ 'person',
+ 'release',
+ 'revision',
+ 'revision_history',
+ 'skipped_content',
+ 'snapshot']
+
+ for key in keys:
+ cur.execute('select * from swh_update_counter(%s)', (key,))
+
+ @db_transaction()
def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
db=None, cur=None):
""" Add an origin_metadata for the origin at ts with provenance and
diff --git a/swh/storage/tests/test_api_client.py b/swh/storage/tests/test_api_client.py
--- a/swh/storage/tests/test_api_client.py
+++ b/swh/storage/tests/test_api_client.py
@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import pytest
import shutil
import tempfile
import unittest
@@ -53,3 +54,7 @@
def tearDown(self):
super().tearDown()
shutil.rmtree(self.storage_base)
+
+ @pytest.mark.skip('refresh_stat_counters not available in the remote api.')
+ def test_stat_counters(self):
+ pass
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -7,15 +7,14 @@
import datetime
import unittest
from collections import defaultdict
-from operator import itemgetter
from unittest.mock import Mock, patch
-import psycopg2
import pytest
from swh.model import from_disk, identifiers
from swh.model.hashutil import hash_to_bytes
from swh.storage.tests.storage_testing import StorageTestFixture
+from swh.storage import HashCollision
@pytest.mark.db
@@ -531,7 +530,6 @@
class twice.
"""
-
@staticmethod
def normalize_entity(entity):
entity = copy.deepcopy(entity)
@@ -570,9 +568,11 @@
sha256_array[0] += 1
cont1b['sha256'] = bytes(sha256_array)
- with self.assertRaises(psycopg2.IntegrityError):
+ with self.assertRaises(HashCollision) as cm:
self.storage.content_add([cont1, cont1b])
+ self.assertEqual(cm.exception.args[0], 'sha1')
+
def test_skipped_content_add(self):
cont = self.skipped_cont.copy()
cont2 = self.skipped_cont2.copy()
@@ -682,7 +682,7 @@
stored_data = list(self.storage.directory_ls(self.dir['id']))
data_to_store = []
- for ent in sorted(self.dir['entries'], key=itemgetter('name')):
+ for ent in self.dir['entries']:
data_to_store.append({
'dir_id': self.dir['id'],
'type': ent['type'],
@@ -696,7 +696,7 @@
'length': None,
})
- self.assertEqual(data_to_store, stored_data)
+ self.assertCountEqual(data_to_store, stored_data)
after_missing = list(self.storage.directory_missing([self.dir['id']]))
self.assertEqual([], after_missing)
@@ -885,7 +885,8 @@
# then
for actual_release in actual_releases:
- del actual_release['author']['id'] # hack: ids are generated
+ if 'id' in actual_release['author']:
+ del actual_release['author']['id'] # hack: ids are generated
self.assertEqual([self.normalize_entity(self.release),
self.normalize_entity(self.release2)],
@@ -1016,7 +1017,6 @@
# then
self.assertEqual(origin_visit1['origin'], origin_id)
self.assertIsNotNone(origin_visit1['visit'])
- self.assertTrue(origin_visit1['visit'] > 0)
actual_origin_visits = list(self.storage.origin_visit_get(origin_id))
self.assertEqual(actual_origin_visits,
@@ -1404,9 +1404,7 @@
expected_keys = ['content', 'directory', 'directory_entry_dir',
'origin', 'person', 'revision']
- for key in expected_keys:
- self.cursor.execute('select * from swh_update_counter(%s)', (key,))
- self.conn.commit()
+ self.storage.refresh_stat_counters()
counters = self.storage.stat_counters()
@@ -1681,10 +1679,9 @@
'provider_name': self.provider['name'],
'provider_url': self.provider['url']
})
- tool = self.storage.tool_get(self.metadata_tool)
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider['id'],
@@ -1692,7 +1689,6 @@
self.origin_metadata['metadata'])
actual_om1 = list(self.storage.origin_metadata_get_by(origin_id))
# then
- self.assertEqual(actual_om1[0]['id'], o_m1)
self.assertEqual(len(actual_om1), 1)
self.assertEqual(actual_om1[0]['origin_id'], origin_id)
@@ -1709,21 +1705,21 @@
'provider_name': self.provider['name'],
'provider_url': self.provider['url']
})
- tool = self.storage.tool_get(self.metadata_tool)
+ tool = list(self.storage.tool_add([self.metadata_tool]))[0]
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider['id'],
tool['id'],
self.origin_metadata['metadata'])
- o_m2 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id2,
self.origin_metadata2['discovery_date'],
provider['id'],
tool['id'],
self.origin_metadata2['metadata'])
- o_m3 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata2['discovery_date'],
provider['id'],
@@ -1735,15 +1731,12 @@
expected_results = [{
'origin_id': origin_id,
'discovery_date': datetime.datetime(
- 2017, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2017, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m3,
'provider_id': provider['id'],
'provider_name': 'hal',
'provider_type': 'deposit-client',
@@ -1752,15 +1745,12 @@
}, {
'origin_id': origin_id,
'discovery_date': datetime.datetime(
- 2015, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2015, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m1,
'provider_id': provider['id'],
'provider_name': 'hal',
'provider_type': 'deposit-client',
@@ -1771,8 +1761,7 @@
# then
self.assertEqual(len(all_metadatas), 2)
self.assertEqual(len(metadatas_for_origin2), 1)
- self.assertEqual(metadatas_for_origin2[0]['id'], o_m2)
- self.assertEqual(all_metadatas, expected_results)
+ self.assertCountEqual(all_metadatas, expected_results)
def test_origin_metadata_get_by_provider_type(self):
# given
@@ -1801,16 +1790,16 @@
# using the only tool now inserted in the data.sql, but for this
# provider should be a crawler tool (not yet implemented)
- tool = self.storage.tool_get(self.metadata_tool)
+ tool = list(self.storage.tool_add([self.metadata_tool]))[0]
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider1['id'],
tool['id'],
self.origin_metadata['metadata'])
- o_m2 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id2,
self.origin_metadata2['discovery_date'],
provider2['id'],
@@ -1821,18 +1810,18 @@
origin_metadata_get_by(
origin_id2,
provider_type))
+ for item in m_by_provider:
+ if 'id' in item:
+ del item['id']
expected_results = [{
'origin_id': origin_id2,
'discovery_date': datetime.datetime(
- 2017, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2017, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m2,
'provider_id': provider2['id'],
'provider_name': 'swMATH',
'provider_type': provider_type,
@@ -1843,8 +1832,6 @@
self.assertEqual(len(m_by_provider), 1)
self.assertEqual(m_by_provider, expected_results)
- self.assertEqual(m_by_provider[0]['id'], o_m2)
- self.assertIsNotNone(o_m1)
class TestLocalStorage(CommonTestStorage, StorageTestDbFixture,
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 2:27 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224204
Attached To
D642: Remove SQLisms from the tests and API.
Event Timeline
Log In to Comment