Page MenuHomeSoftware Heritage

D642.id2063.diff
No OneTemporary

D642.id2063.diff

diff --git a/swh/storage/__init__.py b/swh/storage/__init__.py
--- a/swh/storage/__init__.py
+++ b/swh/storage/__init__.py
@@ -8,6 +8,10 @@
Storage = storage.Storage
+class HashCollision(Exception):
+ pass
+
+
def get_storage(cls, args):
"""
Get a storage object of class `storage_class` with arguments
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -938,7 +938,7 @@
return cur.fetchone()[0]
- origin_metadata_get_cols = ['id', 'origin_id', 'discovery_date',
+ origin_metadata_get_cols = ['origin_id', 'discovery_date',
'tool_id', 'metadata', 'provider_id',
'provider_name', 'provider_type',
'provider_url']
diff --git a/swh/storage/storage.py b/swh/storage/storage.py
--- a/swh/storage/storage.py
+++ b/swh/storage/storage.py
@@ -157,7 +157,22 @@
db.content_get_metadata_keys, cur)
# move metadata in place
- db.content_add_from_temp(cur)
+ try:
+ db.content_add_from_temp(cur)
+ except psycopg2.IntegrityError as e:
+ from . import HashCollision
+ if e.diag.sqlstate == '23505' and \
+ e.diag.table_name == 'content':
+ constaint_to_hash_name = {
+ 'content_pkey': 'sha1',
+ 'content_sha1_git_idx': 'sha1_git',
+ 'content_sha256_idx': 'sha256',
+ }
+ colliding_hash_name = constaint_to_hash_name \
+ .get(e.diag.constraint_name)
+ raise HashCollision(colliding_hash_name)
+ else:
+ raise
if missing_skipped:
missing_filtered = (
@@ -1197,6 +1212,27 @@
return {k: v for (k, v) in db.stat_counters()}
@db_transaction()
+ def refresh_stat_counters(self, db=None, cur=None):
+ """Recomputes the statistics for `stat_counters`."""
+ keys = [
+ 'content',
+ 'directory',
+ 'directory_entry_dir',
+ 'directory_entry_file',
+ 'directory_entry_rev',
+ 'origin',
+ 'origin_visit',
+ 'person',
+ 'release',
+ 'revision',
+ 'revision_history',
+ 'skipped_content',
+ 'snapshot']
+
+ for key in keys:
+ cur.execute('select * from swh_update_counter(%s)', (key,))
+
+ @db_transaction()
def origin_metadata_add(self, origin_id, ts, provider, tool, metadata,
db=None, cur=None):
""" Add an origin_metadata for the origin at ts with provenance and
diff --git a/swh/storage/tests/test_api_client.py b/swh/storage/tests/test_api_client.py
--- a/swh/storage/tests/test_api_client.py
+++ b/swh/storage/tests/test_api_client.py
@@ -3,6 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import pytest
import shutil
import tempfile
import unittest
@@ -53,3 +54,7 @@
def tearDown(self):
super().tearDown()
shutil.rmtree(self.storage_base)
+
+ @pytest.mark.skip('refresh_stat_counters not available in the remote api.')
+ def test_stat_counters(self):
+ pass
diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py
--- a/swh/storage/tests/test_storage.py
+++ b/swh/storage/tests/test_storage.py
@@ -7,15 +7,14 @@
import datetime
import unittest
from collections import defaultdict
-from operator import itemgetter
from unittest.mock import Mock, patch
-import psycopg2
import pytest
from swh.model import from_disk, identifiers
from swh.model.hashutil import hash_to_bytes
from swh.storage.tests.storage_testing import StorageTestFixture
+from swh.storage import HashCollision
@pytest.mark.db
@@ -531,7 +530,6 @@
class twice.
"""
-
@staticmethod
def normalize_entity(entity):
entity = copy.deepcopy(entity)
@@ -570,9 +568,11 @@
sha256_array[0] += 1
cont1b['sha256'] = bytes(sha256_array)
- with self.assertRaises(psycopg2.IntegrityError):
+ with self.assertRaises(HashCollision) as cm:
self.storage.content_add([cont1, cont1b])
+ self.assertEqual(cm.exception.args[0], 'sha1')
+
def test_skipped_content_add(self):
cont = self.skipped_cont.copy()
cont2 = self.skipped_cont2.copy()
@@ -682,7 +682,7 @@
stored_data = list(self.storage.directory_ls(self.dir['id']))
data_to_store = []
- for ent in sorted(self.dir['entries'], key=itemgetter('name')):
+ for ent in self.dir['entries']:
data_to_store.append({
'dir_id': self.dir['id'],
'type': ent['type'],
@@ -696,7 +696,7 @@
'length': None,
})
- self.assertEqual(data_to_store, stored_data)
+ self.assertCountEqual(data_to_store, stored_data)
after_missing = list(self.storage.directory_missing([self.dir['id']]))
self.assertEqual([], after_missing)
@@ -885,7 +885,8 @@
# then
for actual_release in actual_releases:
- del actual_release['author']['id'] # hack: ids are generated
+ if 'id' in actual_release['author']:
+ del actual_release['author']['id'] # hack: ids are generated
self.assertEqual([self.normalize_entity(self.release),
self.normalize_entity(self.release2)],
@@ -1016,7 +1017,6 @@
# then
self.assertEqual(origin_visit1['origin'], origin_id)
self.assertIsNotNone(origin_visit1['visit'])
- self.assertTrue(origin_visit1['visit'] > 0)
actual_origin_visits = list(self.storage.origin_visit_get(origin_id))
self.assertEqual(actual_origin_visits,
@@ -1404,9 +1404,7 @@
expected_keys = ['content', 'directory', 'directory_entry_dir',
'origin', 'person', 'revision']
- for key in expected_keys:
- self.cursor.execute('select * from swh_update_counter(%s)', (key,))
- self.conn.commit()
+ self.storage.refresh_stat_counters()
counters = self.storage.stat_counters()
@@ -1681,10 +1679,9 @@
'provider_name': self.provider['name'],
'provider_url': self.provider['url']
})
- tool = self.storage.tool_get(self.metadata_tool)
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider['id'],
@@ -1692,7 +1689,6 @@
self.origin_metadata['metadata'])
actual_om1 = list(self.storage.origin_metadata_get_by(origin_id))
# then
- self.assertEqual(actual_om1[0]['id'], o_m1)
self.assertEqual(len(actual_om1), 1)
self.assertEqual(actual_om1[0]['origin_id'], origin_id)
@@ -1709,21 +1705,21 @@
'provider_name': self.provider['name'],
'provider_url': self.provider['url']
})
- tool = self.storage.tool_get(self.metadata_tool)
+ tool = list(self.storage.tool_add([self.metadata_tool]))[0]
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider['id'],
tool['id'],
self.origin_metadata['metadata'])
- o_m2 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id2,
self.origin_metadata2['discovery_date'],
provider['id'],
tool['id'],
self.origin_metadata2['metadata'])
- o_m3 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata2['discovery_date'],
provider['id'],
@@ -1735,15 +1731,12 @@
expected_results = [{
'origin_id': origin_id,
'discovery_date': datetime.datetime(
- 2017, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2017, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m3,
'provider_id': provider['id'],
'provider_name': 'hal',
'provider_type': 'deposit-client',
@@ -1752,15 +1745,12 @@
}, {
'origin_id': origin_id,
'discovery_date': datetime.datetime(
- 2015, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2015, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m1,
'provider_id': provider['id'],
'provider_name': 'hal',
'provider_type': 'deposit-client',
@@ -1771,8 +1761,7 @@
# then
self.assertEqual(len(all_metadatas), 2)
self.assertEqual(len(metadatas_for_origin2), 1)
- self.assertEqual(metadatas_for_origin2[0]['id'], o_m2)
- self.assertEqual(all_metadatas, expected_results)
+ self.assertCountEqual(all_metadatas, expected_results)
def test_origin_metadata_get_by_provider_type(self):
# given
@@ -1801,16 +1790,16 @@
# using the only tool now inserted in the data.sql, but for this
# provider should be a crawler tool (not yet implemented)
- tool = self.storage.tool_get(self.metadata_tool)
+ tool = list(self.storage.tool_add([self.metadata_tool]))[0]
# when adding for the same origin 2 metadatas
- o_m1 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id,
self.origin_metadata['discovery_date'],
provider1['id'],
tool['id'],
self.origin_metadata['metadata'])
- o_m2 = self.storage.origin_metadata_add(
+ self.storage.origin_metadata_add(
origin_id2,
self.origin_metadata2['discovery_date'],
provider2['id'],
@@ -1821,18 +1810,18 @@
origin_metadata_get_by(
origin_id2,
provider_type))
+ for item in m_by_provider:
+ if 'id' in item:
+ del item['id']
expected_results = [{
'origin_id': origin_id2,
'discovery_date': datetime.datetime(
- 2017, 1, 2, 0, 0,
- tzinfo=psycopg2.tz.FixedOffsetTimezone(
- offset=60,
- name=None)),
+ 2017, 1, 1, 23, 0,
+ tzinfo=datetime.timezone.utc),
'metadata': {
'name': 'test_origin_metadata',
'version': '0.0.1'
},
- 'id': o_m2,
'provider_id': provider2['id'],
'provider_name': 'swMATH',
'provider_type': provider_type,
@@ -1843,8 +1832,6 @@
self.assertEqual(len(m_by_provider), 1)
self.assertEqual(m_by_provider, expected_results)
- self.assertEqual(m_by_provider[0]['id'], o_m2)
- self.assertIsNotNone(o_m1)
class TestLocalStorage(CommonTestStorage, StorageTestDbFixture,

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 2:27 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3224204

Event Timeline