Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import select | import select | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.db_utils import stored_procedure, jsonize | from swh.core.db.db_utils import stored_procedure, jsonize | ||||
from swh.core.db.db_utils import execute_values_generator | from swh.core.db.db_utils import execute_values_generator | ||||
class Db(BaseDb): | class Db(BaseDb): | ||||
"""Proxy to the SWH DB, with wrappers around stored procedures | """Proxy to the SWH DB, with wrappers around stored procedures | ||||
""" | """ | ||||
def mktemp_dir_entry(self, entry_type, cur=None): | def mktemp_dir_entry(self, entry_type, cur=None): | ||||
self._cursor(cur).execute('SELECT swh_mktemp_dir_entry(%s)', | self._cursor(cur).execute('SELECT swh_mktemp_dir_entry(%s)', | ||||
(('directory_entry_%s' % entry_type),)) | (('directory_entry_%s' % entry_type),)) | ||||
@stored_procedure('swh_mktemp_revision') | @stored_procedure('swh_mktemp_revision') | ||||
def mktemp_revision(self, cur=None): pass | def mktemp_revision(self, cur=None): pass | ||||
@stored_procedure('swh_mktemp_release') | @stored_procedure('swh_mktemp_release') | ||||
▲ Show 20 Lines • Show All 187 Lines • ▼ Show 20 Lines | def content_find(self, sha1=None, sha1_git=None, sha256=None, | ||||
blake2s256: blake2s256 content | blake2s256: blake2s256 content | ||||
Returns: | Returns: | ||||
The tuple (sha1, sha1_git, sha256, blake2s256) if found or None. | The tuple (sha1, sha1_git, sha256, blake2s256) if found or None. | ||||
""" | """ | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
checksum_dict = {'sha1': sha1, 'sha1_git': sha1_git, | |||||
'sha256': sha256, 'blake2s256': blake2s256} | |||||
where_parts = [] | |||||
args = [] | |||||
# Adds only those keys which have value other than None | |||||
for algorithm in checksum_dict: | |||||
if checksum_dict[algorithm] is not None: | |||||
args.append(checksum_dict[algorithm]) | |||||
where_parts.append(algorithm + '= %s') | |||||
query = ' AND '.join(where_parts) | |||||
vlorentz: nitpick: you can rewrite it like this to be more readable:
```
where_parts = []
args = []
for… | |||||
cur.execute("""SELECT %s | cur.execute("""SELECT %s | ||||
Done Inline ActionsPlease, remove the print statement ;) ardumont: Please, remove the print statement ;) | |||||
FROM swh_content_find(%%s, %%s, %%s, %%s) | FROM content WHERE %s | ||||
Done Inline ActionsIf there is no longer a need for limit, then remove it. ardumont: If there is no longer a need for limit, then remove it. | |||||
LIMIT 1""" % ','.join(self.content_find_cols), | """ | ||||
(sha1, sha1_git, sha256, blake2s256)) | % (','.join(self.content_find_cols), query), | ||||
args) | |||||
content = cur.fetchone() | content = cur.fetchall() | ||||
if set(content) == {None}: | |||||
return None | |||||
else: | |||||
return content | return content | ||||
def directory_missing_from_list(self, directories, cur=None): | def directory_missing_from_list(self, directories, cur=None): | ||||
Done Inline ActionsYou can merge this for loop with the other one. vlorentz: You can merge this `for` loop with the other one. | |||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
Done Inline ActionsYour query build seems a bit complicated. It's simpler to read. ardumont: Your query build seems a bit complicated.
Can you please try and adapt more along the lines… | |||||
yield from execute_values_generator( | yield from execute_values_generator( | ||||
cur, """ | cur, """ | ||||
SELECT id FROM (VALUES %s) as t(id) | SELECT id FROM (VALUES %s) as t(id) | ||||
WHERE NOT EXISTS ( | WHERE NOT EXISTS ( | ||||
Done Inline ActionsRemove the limit if there is no need for it. (LIMIT ALL seems like no limit to me ;) ardumont: Remove the limit if there is no need for it.
(`LIMIT ALL` seems like no limit to me ;) | |||||
SELECT 1 FROM directory d WHERE d.id = t.id | SELECT 1 FROM directory d WHERE d.id = t.id | ||||
) | ) | ||||
""", ((id,) for id in directories)) | """, ((id,) for id in directories)) | ||||
directory_ls_cols = ['dir_id', 'type', 'target', 'name', 'perms', | directory_ls_cols = ['dir_id', 'type', 'target', 'name', 'perms', | ||||
'status', 'sha1', 'sha1_git', 'sha256', 'length'] | 'status', 'sha1', 'sha1_git', 'sha256', 'length'] | ||||
def directory_walk_one(self, directory, cur=None): | def directory_walk_one(self, directory, cur=None): | ||||
Done Inline ActionsThere is nothing wrong with returning an empty list. So always return content even if it's empty. vlorentz: There is nothing wrong with returning an empty list. So always return `content` even if it's… | |||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cols = ', '.join(self.directory_ls_cols) | cols = ', '.join(self.directory_ls_cols) | ||||
query = 'SELECT %s FROM swh_directory_walk_one(%%s)' % cols | query = 'SELECT %s FROM swh_directory_walk_one(%%s)' % cols | ||||
cur.execute(query, (directory,)) | cur.execute(query, (directory,)) | ||||
yield from cur | yield from cur | ||||
def directory_walk(self, directory, cur=None): | def directory_walk(self, directory, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
▲ Show 20 Lines • Show All 571 Lines • ▼ Show 20 Lines | def origin_metadata_get_by(self, origin_id, provider_type=None, cur=None): | ||||
"""Retrieve all origin_metadata entries for one origin_id | """Retrieve all origin_metadata entries for one origin_id | ||||
""" | """ | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
if not provider_type: | if not provider_type: | ||||
query = '''SELECT %s | query = '''SELECT %s | ||||
FROM swh_origin_metadata_get_by_origin( | FROM swh_origin_metadata_get_by_origin( | ||||
%%s)''' % (','.join( | %%s)''' % (','.join( | ||||
self.origin_metadata_get_cols)) | self.origin_metadata_get_cols)) | ||||
cur.execute(query, (origin_id, )) | cur.execute(query, (origin_id, )) | ||||
else: | else: | ||||
query = '''SELECT %s | query = '''SELECT %s | ||||
FROM swh_origin_metadata_get_by_provider_type( | FROM swh_origin_metadata_get_by_provider_type( | ||||
%%s, %%s)''' % (','.join( | %%s, %%s)''' % (','.join( | ||||
self.origin_metadata_get_cols)) | self.origin_metadata_get_cols)) | ||||
cur.execute(query, (origin_id, provider_type)) | cur.execute(query, (origin_id, provider_type)) | ||||
yield from cur | yield from cur | ||||
tool_cols = ['id', 'name', 'version', 'configuration'] | tool_cols = ['id', 'name', 'version', 'configuration'] | ||||
@stored_procedure('swh_mktemp_tool') | @stored_procedure('swh_mktemp_tool') | ||||
def mktemp_tool(self, cur=None): | def mktemp_tool(self, cur=None): | ||||
pass | pass | ||||
def tool_add_from_temp(self, cur=None): | def tool_add_from_temp(self, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute("SELECT %s from swh_tool_add()" % ( | cur.execute("SELECT %s from swh_tool_add()" % ( | ||||
','.join(self.tool_cols), )) | ','.join(self.tool_cols), )) | ||||
yield from cur | yield from cur | ||||
def tool_get(self, name, version, configuration, cur=None): | def tool_get(self, name, version, configuration, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute('''select %s | cur.execute('''select %s | ||||
from tool | from tool | ||||
where name=%%s and | where name=%%s and | ||||
version=%%s and | version=%%s and | ||||
configuration=%%s''' % ( | configuration=%%s''' % ( | ||||
','.join(self.tool_cols)), | ','.join(self.tool_cols)), | ||||
(name, version, configuration)) | (name, version, configuration)) | ||||
return cur.fetchone() | return cur.fetchone() | ||||
metadata_provider_cols = ['id', 'provider_name', 'provider_type', | metadata_provider_cols = ['id', 'provider_name', 'provider_type', | ||||
'provider_url', 'metadata'] | 'provider_url', 'metadata'] | ||||
def metadata_provider_add(self, provider_name, provider_type, | def metadata_provider_add(self, provider_name, provider_type, | ||||
provider_url, metadata, cur=None): | provider_url, metadata, cur=None): | ||||
"""Insert a new provider and return the new identifier.""" | """Insert a new provider and return the new identifier.""" | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
insert = """INSERT INTO metadata_provider (provider_name, provider_type, | insert = """INSERT INTO metadata_provider (provider_name, provider_type, | ||||
provider_url, metadata) values (%s, %s, %s, %s) | provider_url, metadata) values (%s, %s, %s, %s) | ||||
RETURNING id""" | RETURNING id""" | ||||
cur.execute(insert, (provider_name, provider_type, provider_url, | cur.execute(insert, (provider_name, provider_type, provider_url, | ||||
jsonize(metadata))) | jsonize(metadata))) | ||||
return cur.fetchone()[0] | return cur.fetchone()[0] | ||||
def metadata_provider_get(self, provider_id, cur=None): | def metadata_provider_get(self, provider_id, cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute('''select %s | cur.execute('''select %s | ||||
from metadata_provider | from metadata_provider | ||||
where id=%%s ''' % ( | where id=%%s ''' % ( | ||||
','.join(self.metadata_provider_cols)), | ','.join(self.metadata_provider_cols)), | ||||
(provider_id, )) | (provider_id, )) | ||||
return cur.fetchone() | return cur.fetchone() | ||||
def metadata_provider_get_by(self, provider_name, provider_url, | def metadata_provider_get_by(self, provider_name, provider_url, | ||||
cur=None): | cur=None): | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
cur.execute('''select %s | cur.execute('''select %s | ||||
from metadata_provider | from metadata_provider | ||||
where provider_name=%%s and | where provider_name=%%s and | ||||
provider_url=%%s''' % ( | provider_url=%%s''' % ( | ||||
','.join(self.metadata_provider_cols)), | ','.join(self.metadata_provider_cols)), | ||||
(provider_name, provider_url)) | (provider_name, provider_url)) | ||||
return cur.fetchone() | return cur.fetchone() |
nitpick: you can rewrite it like this to be more readable:
then use ' AND '.join(where_parts) below.