diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -6,10 +6,12 @@ from abc import ABC from collections import OrderedDict from dataclasses import dataclass, field +from datetime import datetime import json import logging from pathlib import Path import re +import sqlite3 import sys from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple @@ -98,7 +100,9 @@ path = Path(self.conf["path"]) path.parent.mkdir(parents=True, exist_ok=True) uri = False - self.conn = await aiosqlite.connect(path, uri=uri) + self.conn = await aiosqlite.connect( + path, uri=uri, detect_types=sqlite3.PARSE_DECLTYPES + ) return self async def __aexit__(self, type=None, val=None, tb=None) -> None: @@ -114,17 +118,16 @@ DB_SCHEMA = """ create table if not exists metadata_cache ( - swhid text, + swhid text not null primary key, metadata blob, date text ); - create index if not exists idx_metadata on metadata_cache(swhid); create table if not exists visits_cache ( - url text, - metadata blob + url text not null primary key, + metadata blob, + itime timestamp -- insertion time ); - create index if not exists idx_visits on visits_cache(url); """ async def __aenter__(self): @@ -146,11 +149,17 @@ async def get_visits(self, url_encoded: str) -> Optional[List[Dict[str, Any]]]: cursor = await self.conn.execute( - "select metadata from visits_cache where url=?", (url_encoded,) + "select metadata, itime from visits_cache where url=?", (url_encoded,), ) cache = await cursor.fetchone() if cache: - visits = json.loads(cache[0]) + metadata, itime = cache[0], cache[1] + # Force-update cache with (potentially) new origin visits + diff = datetime.now() - itime + if diff.days >= 1: + return None + + visits = json.loads(metadata) visits_typed = [typify_json(v, ORIGIN_VISIT) for v in visits] return visits_typed else: @@ -173,7 +182,8 @@ async def set_visits(self, url_encoded: str, visits: List[Dict[str, Any]]) -> None: await self.conn.execute( - "insert into visits_cache values (?, ?)", (url_encoded, json.dumps(visits)), + "insert or replace into visits_cache values (?, ?, ?)", + (url_encoded, json.dumps(visits), datetime.now()), ) await self.conn.commit() @@ -189,10 +199,9 @@ DB_SCHEMA = """ create table if not exists blob_cache ( - swhid text, + swhid text not null primary key, blob blob ); - create index if not exists idx_blob on blob_cache(swhid); """ async def __aenter__(self):