Changeset View
Changeset View
Standalone View
Standalone View
swh/scrubber/db.py
# Copyright (C) 2022 The Software Heritage developers | # Copyright (C) 2022 The Software Heritage developers | ||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||||||||
import dataclasses | import dataclasses | ||||||||||
import datetime | import datetime | ||||||||||
import functools | import functools | ||||||||||
from typing import Iterable, Iterator, List, Optional | from typing import Iterable, Iterator, List, Optional, Tuple | ||||||||||
import psycopg2 | import psycopg2 | ||||||||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||||||||
from swh.model.swhids import CoreSWHID | from swh.model.swhids import CoreSWHID | ||||||||||
@dataclasses.dataclass(frozen=True) | @dataclasses.dataclass(frozen=True) | ||||||||||
Show All 36 Lines | |||||||||||
class FixedObject: | class FixedObject: | ||||||||||
id: CoreSWHID | id: CoreSWHID | ||||||||||
object_: bytes | object_: bytes | ||||||||||
method: str | method: str | ||||||||||
recovery_date: Optional[datetime.datetime] = None | recovery_date: Optional[datetime.datetime] = None | ||||||||||
class ScrubberDb(BaseDb): | class ScrubberDb(BaseDb): | ||||||||||
current_version = 3 | current_version = 4 | ||||||||||
#################################### | #################################### | ||||||||||
# Shared tables | # Shared tables | ||||||||||
#################################### | #################################### | ||||||||||
@functools.lru_cache(1000) | @functools.lru_cache(1000) | ||||||||||
def datastore_get_or_add(self, datastore: Datastore) -> int: | def datastore_get_or_add(self, datastore: Datastore) -> int: | ||||||||||
"""Creates a datastore if it does not exist, and returns its id.""" | """Creates a datastore if it does not exist, and returns its id.""" | ||||||||||
Show All 22 Lines | def datastore_get_or_add(self, datastore: Datastore) -> int: | ||||||||||
(dataclasses.asdict(datastore)), | (dataclasses.asdict(datastore)), | ||||||||||
) | ) | ||||||||||
res = cur.fetchone() | res = cur.fetchone() | ||||||||||
assert res is not None | assert res is not None | ||||||||||
(id_,) = res | (id_,) = res | ||||||||||
return id_ | return id_ | ||||||||||
#################################### | #################################### | ||||||||||
# Checkpointing/progress tracking | |||||||||||
#################################### | |||||||||||
def checked_range_upsert( | |||||||||||
self, | |||||||||||
datastore: Datastore, | |||||||||||
range_start: CoreSWHID, | |||||||||||
range_end: CoreSWHID, | |||||||||||
date: datetime.datetime, | |||||||||||
) -> None: | |||||||||||
""" | |||||||||||
ardumontUnsubmitted Not Done Inline Actions
ardumont: | |||||||||||
Done Inline ActionsI disagree. The original meaning is:
But your change is:
ie. I mean that the table is a range -> date map, but your change makes it look like it's a date -> range map. vlorentz: I disagree. The original meaning is:
> Enregistre que l'intervalle donné a été vérifié pour la… | |||||||||||
Records in the database the given range was last checked at the given date. | |||||||||||
""" | |||||||||||
datastore_id = self.datastore_get_or_add(datastore) | |||||||||||
with self.transaction() as cur: | |||||||||||
cur.execute( | |||||||||||
""" | |||||||||||
INSERT INTO checked_range(datastore, range_start, range_end, last_date) | |||||||||||
VALUES (%s, %s, %s, %s) | |||||||||||
ON CONFLICT (datastore, range_start, range_end) DO UPDATE | |||||||||||
SET last_date = GREATEST(checked_range.last_date, EXCLUDED.last_date) | |||||||||||
""", | |||||||||||
(datastore_id, str(range_start), str(range_end), date), | |||||||||||
) | |||||||||||
def checked_range_get_last_date( | |||||||||||
self, datastore: Datastore, range_start: CoreSWHID, range_end: CoreSWHID | |||||||||||
) -> Optional[datetime.datetime]: | |||||||||||
""" | |||||||||||
Returns the last date the given range was checked in the given datastore, | |||||||||||
or :const:`None` if it was never checked. | |||||||||||
Currently, this checks range boundaries exactly, with no regard for ranges | |||||||||||
that contain or are contained by it. | |||||||||||
""" | |||||||||||
datastore_id = self.datastore_get_or_add(datastore) | |||||||||||
with self.transaction() as cur: | |||||||||||
cur.execute( | |||||||||||
""" | |||||||||||
SELECT last_date | |||||||||||
FROM checked_range | |||||||||||
WHERE datastore=%s AND range_start=%s AND range_end=%s | |||||||||||
""", | |||||||||||
(datastore_id, str(range_start), str(range_end)), | |||||||||||
) | |||||||||||
res = cur.fetchone() | |||||||||||
if res is None: | |||||||||||
return None | |||||||||||
else: | |||||||||||
(date,) = res | |||||||||||
return date | |||||||||||
def checked_range_iter( | |||||||||||
self, datastore: Datastore | |||||||||||
) -> Iterator[Tuple[CoreSWHID, CoreSWHID, datetime.datetime]]: | |||||||||||
datastore_id = self.datastore_get_or_add(datastore) | |||||||||||
with self.transaction() as cur: | |||||||||||
cur.execute( | |||||||||||
""" | |||||||||||
SELECT range_start, range_end, last_date | |||||||||||
FROM checked_range | |||||||||||
WHERE datastore=%s | |||||||||||
""", | |||||||||||
(datastore_id,), | |||||||||||
) | |||||||||||
for (range_start, range_end, last_date) in cur: | |||||||||||
yield ( | |||||||||||
CoreSWHID.from_string(range_start), | |||||||||||
CoreSWHID.from_string(range_end), | |||||||||||
last_date, | |||||||||||
) | |||||||||||
#################################### | |||||||||||
# Inventory of objects with issues | # Inventory of objects with issues | ||||||||||
#################################### | #################################### | ||||||||||
def corrupt_object_add( | def corrupt_object_add( | ||||||||||
self, | self, | ||||||||||
id: CoreSWHID, | id: CoreSWHID, | ||||||||||
datastore: Datastore, | datastore: Datastore, | ||||||||||
serialized_object: bytes, | serialized_object: bytes, | ||||||||||
▲ Show 20 Lines • Show All 343 Lines • Show Last 20 Lines |