Changeset View
Standalone View
swh/storage/storage.py
Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines | def content_add(self, content): | ||||
content_filtered = (cont for cont in content_with_data | content_filtered = (cont for cont in content_with_data | ||||
if cont['sha1'] in missing_content) | if cont['sha1'] in missing_content) | ||||
db.copy_to(content_filtered, 'tmp_content', | db.copy_to(content_filtered, 'tmp_content', | ||||
db.content_get_metadata_keys, cur) | db.content_get_metadata_keys, cur) | ||||
# move metadata in place | # move metadata in place | ||||
try: | |||||
db.content_add_from_temp(cur) | db.content_add_from_temp(cur) | ||||
except psycopg2.IntegrityError as e: | |||||
from . import HashCollision | |||||
olasd: Can we extract the key that has collided? The `diag` attribute on the exception should have the… | |||||
Done Inline ActionsThe best info I can get from that attribute is the message_detail, which is 'Key (sha1)=(\\x34973274ccef6ab4dfaaf86599792fa9c3fe4689) already exists.'. I could parse it, but eww vlorentz: The best info I can get from that attribute is the `message_detail`, which is `'Key (sha1)=… | |||||
Not Done Inline ActionsYou can check: e.diag.sqlstate == '23505' (unique constraint failure - documented on https://www.postgresql.org/docs/10/errcodes-appendix.html)
We don't enforce distinct blake2s256 hashes (we should also probably drop the sha256 enforcement). Anything else should bubble up as the original IntegrityError exception. olasd: You can check:
`e.diag.sqlstate == '23505'` (unique constraint failure - documented on https… | |||||
Done Inline Actions
grep "_idx" swh/storage/sql/*.sql shows no result. vlorentz: > `content_XXX_idx` -> duplicate XXX (with XXX in sha1_git, sha256)
`grep "_idx"… | |||||
Done Inline Actionsnvm, I misunderstood vlorentz: nvm, I misunderstood | |||||
if e.diag.sqlstate == '23505' and \ | |||||
e.diag.table_name == 'content': | |||||
constaint_to_hash_name = { | |||||
'content_pkey': 'sha1', | |||||
'content_sha1_git_idx': 'sha1_git', | |||||
'content_sha256_idx': 'sha256', | |||||
} | |||||
colliding_hash_name = constaint_to_hash_name \ | |||||
.get(e.diag.constraint_name) | |||||
Not Done Inline Actionsmissing an else: raise ? olasd: missing an `else: raise` ? | |||||
raise HashCollision(colliding_hash_name) | |||||
else: | |||||
raise | |||||
if missing_skipped: | if missing_skipped: | ||||
missing_filtered = ( | missing_filtered = ( | ||||
cont for cont in content_without_data | cont for cont in content_without_data | ||||
if _unique_key(cont) in missing_skipped | if _unique_key(cont) in missing_skipped | ||||
) | ) | ||||
db.mktemp('skipped_content', cur) | db.mktemp('skipped_content', cur) | ||||
▲ Show 20 Lines • Show All 1,023 Lines • ▼ Show 20 Lines | def stat_counters(self, db=None, cur=None): | ||||
Returns: | Returns: | ||||
dict: a dictionary mapping textual labels (e.g., content) to | dict: a dictionary mapping textual labels (e.g., content) to | ||||
integer values (e.g., the number of tuples in table content) | integer values (e.g., the number of tuples in table content) | ||||
""" | """ | ||||
return {k: v for (k, v) in db.stat_counters()} | return {k: v for (k, v) in db.stat_counters()} | ||||
@db_transaction() | @db_transaction() | ||||
def refresh_stat_counters(self, db=None, cur=None): | |||||
"""Recomputes the statistics for `stat_counters`.""" | |||||
keys = [ | |||||
Done Inline ActionsThe keys should match what's in the stored procedure for stat_counters (so, content, skipped_content, directory, revision, release, snapshot, origin, origin_visit, and maybe person; directory_entry_dir was removed because it's meaningless) olasd: The keys should match what's in the stored procedure for stat_counters (so, content… | |||||
'content', | |||||
'directory', | |||||
'directory_entry_dir', | |||||
'directory_entry_file', | |||||
'directory_entry_rev', | |||||
'origin', | |||||
'origin_visit', | |||||
'person', | |||||
'release', | |||||
'revision', | |||||
'revision_history', | |||||
'skipped_content', | |||||
'snapshot'] | |||||
for key in keys: | |||||
cur.execute('select * from swh_update_counter(%s)', (key,)) | |||||
@db_transaction() | |||||
def origin_metadata_add(self, origin_id, ts, provider, tool, metadata, | def origin_metadata_add(self, origin_id, ts, provider, tool, metadata, | ||||
db=None, cur=None): | db=None, cur=None): | ||||
""" Add an origin_metadata for the origin at ts with provenance and | """ Add an origin_metadata for the origin at ts with provenance and | ||||
metadata. | metadata. | ||||
Args: | Args: | ||||
origin_id (int): the origin's id for which the metadata is added | origin_id (int): the origin's id for which the metadata is added | ||||
ts (datetime): timestamp of the found metadata | ts (datetime): timestamp of the found metadata | ||||
▲ Show 20 Lines • Show All 189 Lines • Show Last 20 Lines |
Can we extract the key that has collided? The diag attribute on the exception should have the info
http://initd.org/psycopg/docs/extensions.html#psycopg2.extensions.Diagnostics