Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import random | import random | ||||
import select | import select | ||||
from typing import Any, Dict, Optional, Tuple | from typing import Any, Dict, List, Optional, Tuple | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.db_utils import stored_procedure, jsonize | from swh.core.db.db_utils import stored_procedure, jsonize | ||||
from swh.core.db.db_utils import execute_values_generator | from swh.core.db.db_utils import execute_values_generator | ||||
from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | ||||
class Db(BaseDb): | class Db(BaseDb): | ||||
▲ Show 20 Lines • Show All 644 Lines • ▼ Show 20 Lines | def origin_visit_exists(self, origin_id, visit_id, cur=None): | ||||
query = "SELECT 1 FROM origin_visit where origin = %s AND visit = %s" | query = "SELECT 1 FROM origin_visit where origin = %s AND visit = %s" | ||||
cur.execute(query, (origin_id, visit_id)) | cur.execute(query, (origin_id, visit_id)) | ||||
return bool(cur.fetchone()) | return bool(cur.fetchone()) | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, origin_id, allowed_statuses=None, require_snapshot=False, cur=None | self, origin_id: str, allowed_statuses=None, require_snapshot=False, cur=None | ||||
): | ): | ||||
"""Retrieve the most recent origin_visit of the given origin, | """Retrieve the most recent origin_visit of the given origin, | ||||
with optional filters. | with optional filters. | ||||
Args: | Args: | ||||
origin_id: the origin concerned | origin_id: the origin concerned | ||||
allowed_statuses: the visit statuses allowed for the returned visit | allowed_statuses: the visit statuses allowed for the returned visit | ||||
require_snapshot (bool): If True, only a visit with a known | require_snapshot (bool): If True, only a visit with a known | ||||
snapshot will be returned. | snapshot will be returned. | ||||
Returns: | Returns: | ||||
The origin_visit information, or None if no visit matches. | The origin_visit information, or None if no visit matches. | ||||
""" | """ | ||||
cur = self._cursor(cur) | cur = self._cursor(cur) | ||||
query_parts = [ | query_parts = [ | ||||
"SELECT %s" % ", ".join(self.origin_visit_select_cols), | "SELECT %s" % ", ".join(self.origin_visit_select_cols), | ||||
"FROM origin_visit ov ", | "FROM origin_visit ov ", | ||||
"INNER JOIN origin o ON o.id = ov.origin", | "INNER JOIN origin o ON o.id = ov.origin", | ||||
"INNER JOIN origin_visit_status ovs ", | "INNER JOIN origin_visit_status ovs ", | ||||
"ON o.id = ovs.origin AND ov.visit = ovs.visit ", | "ON o.id = ovs.origin AND ov.visit = ovs.visit ", | ||||
] | ] | ||||
query_parts.append("WHERE o.url = %s") | query_parts.append("WHERE o.url = %s") | ||||
query_params: List[Any] = [origin_id] | |||||
if require_snapshot: | if require_snapshot: | ||||
query_parts.append("AND ovs.snapshot is not null") | query_parts.append("AND ovs.snapshot is not null") | ||||
if allowed_statuses: | if allowed_statuses: | ||||
query_parts.append( | query_parts.append("AND ovs.status IN %s") | ||||
cur.mogrify("AND ovs.status IN %s", (tuple(allowed_statuses),)).decode() | query_params.append(tuple(allowed_statuses)) | ||||
) | |||||
query_parts.append( | query_parts.append( | ||||
"ORDER BY ov.date DESC, ov.visit DESC, ovs.date DESC LIMIT 1" | "ORDER BY ov.date DESC, ov.visit DESC, ovs.date DESC LIMIT 1" | ||||
) | ) | ||||
query = "\n".join(query_parts) | query = "\n".join(query_parts) | ||||
cur.execute(query, (origin_id,)) | cur.execute(query, tuple(query_params)) | ||||
r = cur.fetchone() | r = cur.fetchone() | ||||
if not r: | if not r: | ||||
return None | return None | ||||
return r | return r | ||||
def origin_visit_get_random(self, type, cur=None): | def origin_visit_get_random(self, type, cur=None): | ||||
"""Randomly select one origin visit that was full and in the last 3 | """Randomly select one origin visit that was full and in the last 3 | ||||
months | months | ||||
▲ Show 20 Lines • Show All 517 Lines • Show Last 20 Lines |