diff --git a/swh/fuse/cache.py b/swh/fuse/cache.py --- a/swh/fuse/cache.py +++ b/swh/fuse/cache.py @@ -6,10 +6,12 @@ from abc import ABC from collections import OrderedDict from dataclasses import dataclass, field +from datetime import datetime import json import logging from pathlib import Path import re +import sqlite3 import sys from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple @@ -98,7 +100,9 @@ path = Path(self.conf["path"]) path.parent.mkdir(parents=True, exist_ok=True) uri = False - self.conn = await aiosqlite.connect(path, uri=uri) + self.conn = await aiosqlite.connect( + path, uri=uri, detect_types=sqlite3.PARSE_DECLTYPES + ) return self async def __aexit__(self, type=None, val=None, tb=None) -> None: @@ -114,15 +118,16 @@ DB_SCHEMA = """ create table if not exists metadata_cache ( - swhid text, + swhid text not null primary key, metadata blob, date text ); create index if not exists idx_metadata on metadata_cache(swhid); create table if not exists visits_cache ( - url text, - metadata blob + url text not null primary key, + metadata blob, + last_fetched timestamp ); create index if not exists idx_visits on visits_cache(url); """ @@ -144,13 +149,22 @@ else: return None - async def get_visits(self, url_encoded: str) -> Optional[List[Dict[str, Any]]]: + async def get_visits( + self, url_encoded: str, check_timestamp: bool = False + ) -> Optional[List[Dict[str, Any]]]: cursor = await self.conn.execute( - "select metadata from visits_cache where url=?", (url_encoded,) + "select metadata, last_fetched from visits_cache where url=?", + (url_encoded,), ) cache = await cursor.fetchone() if cache: - visits = json.loads(cache[0]) + metadata, last_fetched = cache[0], cache[1] + # Force-update cache with (potentially) new origin visits + diff = datetime.now() - last_fetched + if check_timestamp and diff.days >= 1: + return None + + visits = json.loads(metadata) visits_typed = [typify_json(v, ORIGIN_VISIT) for v in visits] return visits_typed else: @@ -173,7 +187,8 @@ async def set_visits(self, url_encoded: str, visits: List[Dict[str, Any]]) -> None: await self.conn.execute( - "insert into visits_cache values (?, ?)", (url_encoded, json.dumps(visits)), + "insert or replace into visits_cache values (?, ?, ?)", + (url_encoded, json.dumps(visits), datetime.now()), ) await self.conn.commit() @@ -189,7 +204,7 @@ DB_SCHEMA = """ create table if not exists blob_cache ( - swhid text, + swhid text not null primary key, blob blob ); create index if not exists idx_blob on blob_cache(swhid); diff --git a/swh/fuse/fuse.py b/swh/fuse/fuse.py --- a/swh/fuse/fuse.py +++ b/swh/fuse/fuse.py @@ -152,7 +152,7 @@ async def get_visits(self, url_encoded: str) -> List[Dict[str, Any]]: """ Retrieve origin visits given an encoded-URL using Software Heritage API """ - cache = await self.cache.metadata.get_visits(url_encoded) + cache = await self.cache.metadata.get_visits(url_encoded, check_timestamp=True) if cache: self.logger.debug( "Found %d visits for origin '%s' in cache", len(cache), url_encoded,