Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/storage.py
# Copyright (C) 2015-2019 The Software Heritage developers | # Copyright (C) 2015-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import itertools | import itertools | ||||
import json | import json | ||||
from collections import defaultdict | from collections import defaultdict | ||||
from concurrent.futures import ThreadPoolExecutor | from concurrent.futures import ThreadPoolExecutor | ||||
from contextlib import contextmanager | from contextlib import contextmanager | ||||
from typing import Any, Dict, Mapping, Optional | from typing import Any, Dict, List, Mapping, Optional | ||||
import dateutil.parser | import dateutil.parser | ||||
import psycopg2 | import psycopg2 | ||||
import psycopg2.pool | import psycopg2.pool | ||||
from swh.core.api import remote_api_endpoint | from swh.core.api import remote_api_endpoint | ||||
from swh.model.model import SHA1_SIZE | from swh.model.model import SHA1_SIZE | ||||
from swh.model.hashutil import ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
▲ Show 20 Lines • Show All 509 Lines • ▼ Show 20 Lines | def content_get_partition( | ||||
'next_page_token': None, | 'next_page_token': None, | ||||
} | } | ||||
if result['next']: | if result['next']: | ||||
result2['next_page_token'] = hash_to_hex(result['next']) | result2['next_page_token'] = hash_to_hex(result['next']) | ||||
return result2 | return result2 | ||||
@remote_api_endpoint('content/metadata') | @remote_api_endpoint('content/metadata') | ||||
@timed | @timed | ||||
@db_transaction_generator(statement_timeout=500) | @db_transaction(statement_timeout=500) | ||||
def content_get_metadata(self, content, db=None, cur=None): | def content_get_metadata( | ||||
self, contents: List[bytes], | |||||
db=None, cur=None) -> Dict[bytes, List[Dict]]: | |||||
"""Retrieve content metadata in bulk | """Retrieve content metadata in bulk | ||||
Args: | Args: | ||||
content: iterable of content identifiers (sha1) | content: iterable of content identifiers (sha1) | ||||
Returns: | Returns: | ||||
an iterable with content metadata corresponding to the given ids | a dict with keys the content's sha1 and the associated value | ||||
either the existing content's metadata or None if the content does | |||||
not exist. | |||||
""" | """ | ||||
for metadata in db.content_get_metadata_from_sha1s(content, cur): | result: Dict[bytes, List[Dict]] = {sha1: [] for sha1 in contents} | ||||
yield dict(zip(db.content_get_metadata_keys, metadata)) | for row in db.content_get_metadata_from_sha1s(contents, cur): | ||||
content_meta = dict(zip(db.content_get_metadata_keys, row)) | |||||
result[content_meta['sha1']].append(content_meta) | |||||
return result | |||||
vlorentz: you can skip the conditionals by using an `inner join` and initializing `result` to `{sha1… | |||||
Not Done Inline ActionsEven better: you can initialize it to dict.fromkeys(contents)! vlorentz: Even better: you can initialize it to `dict.fromkeys(contents)`! | |||||
Done Inline ActionsSorry, i had to change the type to something more appropriate. ardumont: Sorry, i had to change the type to something more appropriate.
I think your first proposal… | |||||
Not Done Inline Actionsyeah, you can use {sha1: [] for sha1 in contents} instead vlorentz: yeah, you can use `{sha1: [] for sha1 in contents}` instead | |||||
Done Inline ActionsYes, it works! thanks. ardumont: Yes, it works! thanks. | |||||
@remote_api_endpoint('content/missing') | @remote_api_endpoint('content/missing') | ||||
@timed | @timed | ||||
@db_transaction_generator() | @db_transaction_generator() | ||||
def content_missing(self, content, key_hash='sha1', db=None, cur=None): | def content_missing(self, content, key_hash='sha1', db=None, cur=None): | ||||
"""List content missing from storage | """List content missing from storage | ||||
Args: | Args: | ||||
content ([dict]): iterable of dictionaries whose keys are | content ([dict]): iterable of dictionaries whose keys are | ||||
▲ Show 20 Lines • Show All 1,570 Lines • Show Last 20 Lines |
you can skip the conditionals by using an inner join and initializing result to {sha1: None for sha1 in contents}.