Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import random | import random | ||||
import select | import select | ||||
from typing import Any, Dict, List, Optional, Tuple | from typing import Any, Dict, Iterable, List, Optional, Tuple | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.db_utils import stored_procedure, jsonize | from swh.core.db.db_utils import stored_procedure, jsonize | ||||
from swh.core.db.db_utils import execute_values_generator | from swh.core.db.db_utils import execute_values_generator | ||||
from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | ||||
class Db(BaseDb): | class Db(BaseDb): | ||||
▲ Show 20 Lines • Show All 648 Lines • ▼ Show 20 Lines | def origin_visit_exists(self, origin_id, visit_id, cur=None): | ||||
query = "SELECT 1 FROM origin_visit where origin = %s AND visit = %s" | query = "SELECT 1 FROM origin_visit where origin = %s AND visit = %s" | ||||
cur.execute(query, (origin_id, visit_id)) | cur.execute(query, (origin_id, visit_id)) | ||||
return bool(cur.fetchone()) | return bool(cur.fetchone()) | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, origin_id: str, allowed_statuses=None, require_snapshot=False, cur=None | self, | ||||
origin_id: str, | |||||
type: Optional[str], | |||||
olasd: `= None` to avoid an api break? | |||||
ardumontAuthorUnsubmitted Done Inline Actionsmissed that one, yes, thanks ardumont: missed that one, yes, thanks | |||||
vlorentzUnsubmitted Not Done Inline Actionsthat's db.py, not an API vlorentz: that's db.py, not an API | |||||
allowed_statuses: Optional[Iterable[str]] = None, | |||||
require_snapshot: bool = False, | |||||
vlorentzUnsubmitted Not Done Inline Actionsthese don't need to have defaults either vlorentz: these don't need to have defaults either | |||||
ardumontAuthorUnsubmitted Done Inline Actionsah yes, we are in db, not in the api. So, that means, i can remove all defaults since it will be called from storage with them set (except for cur baybe). ardumont: ah yes, we are in db, not in the api.
So, that means, i can remove all defaults since it will… | |||||
cur=None, | |||||
): | ): | ||||
"""Retrieve the most recent origin_visit of the given origin, | """Retrieve the most recent origin_visit of the given origin, | ||||
with optional filters. | with optional filters. | ||||
Args: | Args: | ||||
origin_id: the origin concerned | origin_id: the origin concerned | ||||
type: Optional visit type to filter on | |||||
allowed_statuses: the visit statuses allowed for the returned visit | allowed_statuses: the visit statuses allowed for the returned visit | ||||
require_snapshot (bool): If True, only a visit with a known | require_snapshot (bool): If True, only a visit with a known | ||||
snapshot will be returned. | snapshot will be returned. | ||||
Returns: | Returns: | ||||
The origin_visit information, or None if no visit matches. | The origin_visit information, or None if no visit matches. | ||||
""" | """ | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
query_parts = [ | query_parts = [ | ||||
"SELECT %s" % ", ".join(self.origin_visit_select_cols), | "SELECT %s" % ", ".join(self.origin_visit_select_cols), | ||||
"FROM origin_visit ov ", | "FROM origin_visit ov ", | ||||
"INNER JOIN origin o ON o.id = ov.origin", | "INNER JOIN origin o ON o.id = ov.origin", | ||||
"INNER JOIN origin_visit_status ovs ", | "INNER JOIN origin_visit_status ovs ", | ||||
"ON o.id = ovs.origin AND ov.visit = ovs.visit ", | "ON o.id = ovs.origin AND ov.visit = ovs.visit ", | ||||
] | ] | ||||
query_parts.append("WHERE o.url = %s") | query_parts.append("WHERE o.url = %s") | ||||
query_params: List[Any] = [origin_id] | query_params: List[Any] = [origin_id] | ||||
if type is not None: | |||||
query_parts.append("AND ov.type = %s") | |||||
query_params.append(type) | |||||
if require_snapshot: | if require_snapshot: | ||||
query_parts.append("AND ovs.snapshot is not null") | query_parts.append("AND ovs.snapshot is not null") | ||||
if allowed_statuses: | if allowed_statuses: | ||||
query_parts.append("AND ovs.status IN %s") | query_parts.append("AND ovs.status IN %s") | ||||
query_params.append(tuple(allowed_statuses)) | query_params.append(tuple(allowed_statuses)) | ||||
query_parts.append( | query_parts.append( | ||||
▲ Show 20 Lines • Show All 542 Lines • Show Last 20 Lines |
= None to avoid an api break?