Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/postgresql/archive.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from typing import Any, Dict, Iterable, List | from typing import Any, Dict, Iterable, List | ||||
from methodtools import lru_cache | from methodtools import lru_cache | ||||
import psycopg2.extensions | import psycopg2.extensions | ||||
from swh.core.statsd import statsd | |||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
class ArchivePostgreSQL: | class ArchivePostgreSQL: | ||||
def __init__(self, conn: psycopg2.extensions.connection) -> None: | def __init__(self, conn: psycopg2.extensions.connection) -> None: | ||||
self.storage = get_storage( | self.storage = get_storage( | ||||
"postgresql", db=conn.dsn, objstorage={"cls": "memory"} | "postgresql", db=conn.dsn, objstorage={"cls": "memory"} | ||||
) | ) | ||||
self.conn = conn | self.conn = conn | ||||
def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]: | def directory_ls(self, id: Sha1Git) -> Iterable[Dict[str, Any]]: | ||||
entries = self._directory_ls(id) | entries = self._directory_ls(id) | ||||
yield from entries | yield from entries | ||||
@lru_cache(maxsize=100000) | @lru_cache(maxsize=100000) | ||||
@statsd.timed( | |||||
metric="swh_provenance_archive_direct_accesstime_seconds", | |||||
tags={"method": "directory_ls"}, | |||||
) | |||||
def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]: | def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]: | ||||
# TODO: add file size filtering | # TODO: add file size filtering | ||||
with self.conn.cursor() as cursor: | with self.conn.cursor() as cursor: | ||||
cursor.execute( | cursor.execute( | ||||
""" | """ | ||||
WITH | WITH | ||||
dir AS (SELECT id AS dir_id, dir_entries, file_entries, rev_entries | dir AS (SELECT id AS dir_id, dir_entries, file_entries, rev_entries | ||||
FROM directory WHERE id=%s), | FROM directory WHERE id=%s), | ||||
Show All 26 Lines | def _directory_ls(self, id: Sha1Git) -> List[Dict[str, Any]]: | ||||
) | ) | ||||
""", | """, | ||||
(id,), | (id,), | ||||
) | ) | ||||
return [ | return [ | ||||
{"type": row[0], "target": row[1], "name": row[2]} for row in cursor | {"type": row[0], "target": row[1], "name": row[2]} for row in cursor | ||||
] | ] | ||||
@statsd.timed( | |||||
metric="swh_provenance_archive_direct_accesstime_seconds", | |||||
tags={"method": "revision_get_parents"}, | |||||
) | |||||
def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: | def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: | ||||
with self.conn.cursor() as cursor: | with self.conn.cursor() as cursor: | ||||
cursor.execute( | cursor.execute( | ||||
""" | """ | ||||
SELECT RH.parent_id::bytea | SELECT RH.parent_id::bytea | ||||
FROM revision_history AS RH | FROM revision_history AS RH | ||||
WHERE RH.id=%s | WHERE RH.id=%s | ||||
ORDER BY RH.parent_rank | ORDER BY RH.parent_rank | ||||
""", | """, | ||||
(id,), | (id,), | ||||
) | ) | ||||
# There should be at most one row anyway | # There should be at most one row anyway | ||||
yield from (row[0] for row in cursor) | yield from (row[0] for row in cursor) | ||||
@statsd.timed( | |||||
metric="swh_provenance_archive_direct_accesstime_seconds", | |||||
tags={"method": "snapshot_get_heads"}, | |||||
) | |||||
def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: | def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: | ||||
with self.conn.cursor() as cursor: | with self.conn.cursor() as cursor: | ||||
cursor.execute( | cursor.execute( | ||||
""" | """ | ||||
WITH | WITH | ||||
snaps AS (SELECT object_id FROM snapshot WHERE snapshot.id=%s), | snaps AS (SELECT object_id FROM snapshot WHERE snapshot.id=%s), | ||||
heads AS ((SELECT R.id, R.date | heads AS ((SELECT R.id, R.date | ||||
FROM snaps | FROM snaps | ||||
Show All 26 Lines |