Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import random | import random | ||||
import select | import select | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.db_utils import stored_procedure, jsonize | from swh.core.db.db_utils import stored_procedure, jsonize | ||||
from swh.core.db.db_utils import execute_values_generator | from swh.core.db.db_utils import execute_values_generator | ||||
from swh.model.model import SHA1_SIZE | from swh.model.model import OriginVisit, SHA1_SIZE | ||||
class Db(BaseDb): | class Db(BaseDb): | ||||
"""Proxy to the SWH DB, with wrappers around stored procedures | """Proxy to the SWH DB, with wrappers around stored procedures | ||||
""" | """ | ||||
def mktemp_dir_entry(self, entry_type, cur=None): | def mktemp_dir_entry(self, entry_type, cur=None): | ||||
▲ Show 20 Lines • Show All 353 Lines • ▼ Show 20 Lines | def origin_visit_update(self, origin_id, visit_id, updates, cur=None): | ||||
SET {update_cols} | SET {update_cols} | ||||
FROM origin | FROM origin | ||||
WHERE {where}""".format(**{ | WHERE {where}""".format(**{ | ||||
'update_cols': ', '.join(update_cols), | 'update_cols': ', '.join(update_cols), | ||||
'where': ' AND '.join(where) | 'where': ' AND '.join(where) | ||||
}) | }) | ||||
cur.execute(query, (*values, *where_values)) | cur.execute(query, (*values, *where_values)) | ||||
def origin_visit_upsert(self, origin, visit, date, type, status, | def origin_visit_upsert(self, origin_visit: OriginVisit, cur=None) -> None: | ||||
vlorentz: Do we want swh-model objects in `db.py`? | |||||
Done Inline Actionswhy not? ardumont: why not? | |||||
Not Done Inline ActionsBecause we didn't do it so far, so this introduces an inconsistency. vlorentz: Because we didn't do it so far, so this introduces an inconsistency. | |||||
Done Inline Actionsbut we are trying to fix that inconsistency everywhere by using model object... ardumont: but we are trying to fix that inconsistency everywhere by using model object...
I'm trying to… | |||||
Done Inline Actionsalso you typed cql with base model object ardumont: also you typed `cql` with base model object
This `cql` (cassandra) module is the equivalent of… | |||||
Not Done Inline ActionsSure, but I would rather this single function is consistent with the rest of its module, than with a completely different module. But if we want to make db.py use model objects, then sure vlorentz: Sure, but I would rather this single function is consistent with the rest of its module, than… | |||||
Done Inline ActionsWell consistency is the goal. ardumont: Well consistency is the goal.
But i'm doing that incrementally as i go along. | |||||
metadata, snapshot, cur=None): | |||||
# doing an extra query like this is way simpler than trying to join | # doing an extra query like this is way simpler than trying to join | ||||
# the origin id in the query below | # the origin id in the query below | ||||
origin_id = next(self.origin_id_get_by_url([origin])) | ov = origin_visit | ||||
origin_id = next(self.origin_id_get_by_url([ov.origin])) | |||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
query = """INSERT INTO origin_visit ({cols}) VALUES ({values}) | query = """INSERT INTO origin_visit ({cols}) VALUES ({values}) | ||||
ON CONFLICT ON CONSTRAINT origin_visit_pkey DO | ON CONFLICT ON CONSTRAINT origin_visit_pkey DO | ||||
UPDATE SET {updates}""".format( | UPDATE SET {updates}""".format( | ||||
cols=', '.join(self.origin_visit_get_cols), | cols=', '.join(self.origin_visit_get_cols), | ||||
values=', '.join('%s' for col in self.origin_visit_get_cols), | values=', '.join('%s' for col in self.origin_visit_get_cols), | ||||
updates=', '.join('{0}=excluded.{0}'.format(col) | updates=', '.join('{0}=excluded.{0}'.format(col) | ||||
for col in self.origin_visit_get_cols)) | for col in self.origin_visit_get_cols)) | ||||
cur.execute( | cur.execute( | ||||
query, (origin_id, visit, date, type, status, metadata, snapshot)) | query, (origin_id, ov.visit, ov.date, ov.type, ov.status, | ||||
ov.metadata, ov.snapshot)) | |||||
origin_visit_get_cols = [ | origin_visit_get_cols = [ | ||||
'origin', 'visit', 'date', 'type', | 'origin', 'visit', 'date', 'type', | ||||
'status', 'metadata', 'snapshot'] | 'status', 'metadata', 'snapshot'] | ||||
origin_visit_select_cols = [ | origin_visit_select_cols = [ | ||||
'origin.url AS origin', 'visit', 'date', 'origin_visit.type AS type', | 'origin.url AS origin', 'visit', 'date', 'origin_visit.type AS type', | ||||
'status', 'metadata', 'snapshot'] | 'status', 'metadata', 'snapshot'] | ||||
▲ Show 20 Lines • Show All 566 Lines • Show Last 20 Lines |
Do we want swh-model objects in db.py?