Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
# Copyright (C) 2015-2020 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import contextlib | import contextlib | ||||
import copy | |||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
import json | import json | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from typing import Any, Dict, Iterable, List, Optional, Union | from typing import Any, Dict, Iterable, List, Optional, Union | ||||
import attr | import attr | ||||
import dateutil.parser | import dateutil.parser | ||||
import psycopg2 | import psycopg2 | ||||
import psycopg2.pool | import psycopg2.pool | ||||
import psycopg2.errors | import psycopg2.errors | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
SkippedContent, Content, Directory, Revision, Release, | Content, Directory, Origin, OriginVisit, | ||||
Snapshot, Origin, SHA1_SIZE | Revision, Release, SkippedContent, Snapshot, SHA1_SIZE | ||||
) | ) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
from swh.storage.objstorage import ObjStorage | from swh.storage.objstorage import ObjStorage | ||||
from . import converters, HashCollision | from . import converters, HashCollision | ||||
from .common import db_transaction_generator, db_transaction | from .common import db_transaction_generator, db_transaction | ||||
from .db import Db | from .db import Db | ||||
from .exc import StorageArgumentException, StorageDBError | from .exc import StorageArgumentException, StorageDBError | ||||
▲ Show 20 Lines • Show All 831 Lines • ▼ Show 20 Lines | def origin_visit_update(self, origin: str, visit_id: int, | ||||
updated_visit = {**visit, **updates} | updated_visit = {**visit, **updates} | ||||
self.journal_writer.origin_visit_update(updated_visit) | self.journal_writer.origin_visit_update(updated_visit) | ||||
with convert_validation_exceptions(): | with convert_validation_exceptions(): | ||||
db.origin_visit_update(origin_url, visit_id, updates, cur) | db.origin_visit_update(origin_url, visit_id, updates, cur) | ||||
@timed | @timed | ||||
@db_transaction() | @db_transaction() | ||||
def origin_visit_upsert(self, visits, db=None, cur=None): | def origin_visit_upsert(self, visits: Iterable[OriginVisit], | ||||
visits = copy.deepcopy(visits) | db=None, cur=None) -> None: | ||||
for visit in visits: | |||||
if isinstance(visit['date'], str): | |||||
visit['date'] = dateutil.parser.parse(visit['date']) | |||||
if not isinstance(visit['origin'], str): | |||||
raise StorageArgumentException( | |||||
"visit['origin'] must be a string, not %r" | |||||
% (visit['origin'],)) | |||||
self.journal_writer.origin_visit_upsert(visits) | self.journal_writer.origin_visit_upsert(visits) | ||||
for visit in visits: | for visit in visits: | ||||
# TODO: upsert them all in a single query | # TODO: upsert them all in a single query | ||||
db.origin_visit_upsert(**visit, cur=cur) | db.origin_visit_upsert(visit, cur=cur) | ||||
@timed | @timed | ||||
@db_transaction_generator(statement_timeout=500) | @db_transaction_generator(statement_timeout=500) | ||||
def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | def origin_visit_get(self, origin, last_visit=None, limit=None, db=None, | ||||
cur=None): | cur=None): | ||||
for line in db.origin_visit_get_all( | for line in db.origin_visit_get_all( | ||||
origin, last_visit=last_visit, limit=limit, cur=cur): | origin, last_visit=last_visit, limit=limit, cur=cur): | ||||
data = dict(zip(db.origin_visit_get_cols, line)) | data = dict(zip(db.origin_visit_get_cols, line)) | ||||
▲ Show 20 Lines • Show All 265 Lines • Show Last 20 Lines |