Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/db.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import datetime | import datetime | ||||
import random | import random | ||||
import select | import select | ||||
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | from typing import Any, Dict, Iterable, List, Optional, Tuple | ||||
from swh.core.db import BaseDb | from swh.core.db import BaseDb | ||||
from swh.core.db.db_utils import stored_procedure, jsonize as _jsonize | from swh.core.db.db_utils import stored_procedure, jsonize as _jsonize | ||||
from swh.core.db.db_utils import execute_values_generator | from swh.core.db.db_utils import execute_values_generator | ||||
from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | from swh.model.model import OriginVisit, OriginVisitStatus, SHA1_SIZE | ||||
def jsonize(d): | def jsonize(d): | ||||
▲ Show 20 Lines • Show All 1,096 Lines • ▼ Show 20 Lines | _object_metadata_insert_query = f""" | ||||
INSERT INTO object_metadata | INSERT INTO object_metadata | ||||
({', '.join(_object_metadata_insert_cols)}) | ({', '.join(_object_metadata_insert_cols)}) | ||||
VALUES ({', '.join('%s' for _ in _object_metadata_insert_cols)}) | VALUES ({', '.join('%s' for _ in _object_metadata_insert_cols)}) | ||||
ON CONFLICT (id, authority_id, discovery_date, fetcher_id) | ON CONFLICT (id, authority_id, discovery_date, fetcher_id) | ||||
DO NOTHING | DO NOTHING | ||||
""" | """ | ||||
object_metadata_get_cols = [ | object_metadata_get_cols = [ | ||||
"id", | "object_metadata.id", | ||||
"object_metadata.type", | |||||
"discovery_date", | "discovery_date", | ||||
"metadata_authority.type", | "metadata_authority.type", | ||||
"metadata_authority.url", | "metadata_authority.url", | ||||
"metadata_fetcher.id", | "metadata_fetcher.id", | ||||
"metadata_fetcher.name", | "metadata_fetcher.name", | ||||
"metadata_fetcher.version", | "metadata_fetcher.version", | ||||
*_object_metadata_context_cols, | *_object_metadata_context_cols, | ||||
"format", | "format", | ||||
"metadata", | "object_metadata.metadata", | ||||
] | ] | ||||
"""List of columns of the object_metadata, metadata_authority, | """List of columns of the object_metadata, metadata_authority, | ||||
and metadata_fetcher tables, used when reading object metadata.""" | and metadata_fetcher tables, used when reading object metadata.""" | ||||
_object_metadata_select_query = f""" | _object_metadata_select_query = f""" | ||||
SELECT | SELECT | ||||
object_metadata.id AS id, | {', '.join(object_metadata_get_cols)} | ||||
{', '.join(object_metadata_get_cols[1:-1])}, | |||||
object_metadata.metadata AS metadata | |||||
FROM object_metadata | FROM object_metadata | ||||
INNER JOIN metadata_authority | INNER JOIN metadata_authority | ||||
ON (metadata_authority.id=authority_id) | ON (metadata_authority.id=authority_id) | ||||
INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id) | INNER JOIN metadata_fetcher ON (metadata_fetcher.id=fetcher_id) | ||||
WHERE object_metadata.id=%s AND authority_id=%s | WHERE object_metadata.id=%s AND authority_id=%s | ||||
""" | """ | ||||
def object_metadata_add( | def object_metadata_add( | ||||
self, | self, | ||||
object_type: str, | object_type: str, | ||||
id: str, | id: str, | ||||
context: Dict[str, Union[str, bytes, int]], | |||||
discovery_date: datetime.datetime, | discovery_date: datetime.datetime, | ||||
authority_id: int, | authority_id: int, | ||||
fetcher_id: int, | fetcher_id: int, | ||||
format: str, | format: str, | ||||
metadata: bytes, | metadata: bytes, | ||||
origin: Optional[str], | |||||
visit: Optional[int], | |||||
snapshot: Optional[str], | |||||
release: Optional[str], | |||||
revision: Optional[str], | |||||
path: Optional[bytes], | |||||
directory: Optional[str], | |||||
cur, | cur, | ||||
): | ): | ||||
query = self._object_metadata_insert_query | query = self._object_metadata_insert_query | ||||
args: Dict[str, Any] = dict( | args: Dict[str, Any] = dict( | ||||
type=object_type, | type=object_type, | ||||
id=id, | id=id, | ||||
authority_id=authority_id, | authority_id=authority_id, | ||||
fetcher_id=fetcher_id, | fetcher_id=fetcher_id, | ||||
discovery_date=discovery_date, | discovery_date=discovery_date, | ||||
format=format, | format=format, | ||||
metadata=metadata, | metadata=metadata, | ||||
origin=origin, | |||||
visit=visit, | |||||
snapshot=snapshot, | |||||
release=release, | |||||
revision=revision, | |||||
path=path, | |||||
directory=directory, | |||||
) | ) | ||||
for col in self._object_metadata_context_cols: | |||||
args[col] = context.get(col) | |||||
params = [args[col] for col in self._object_metadata_insert_cols] | params = [args[col] for col in self._object_metadata_insert_cols] | ||||
cur.execute(query, params) | cur.execute(query, params) | ||||
def object_metadata_get( | def object_metadata_get( | ||||
self, | self, | ||||
object_type: str, | object_type: str, | ||||
▲ Show 20 Lines • Show All 113 Lines • Show Last 20 Lines |