Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show All 19 Lines | from typing import ( | ||||
Generic, | Generic, | ||||
Hashable, | Hashable, | ||||
Iterable, | Iterable, | ||||
Iterator, | Iterator, | ||||
List, | List, | ||||
Optional, | Optional, | ||||
Tuple, | Tuple, | ||||
TypeVar, | TypeVar, | ||||
Union, | |||||
) | ) | ||||
import attr | import attr | ||||
from deprecated import deprecated | from deprecated import deprecated | ||||
from swh.core.api.serializers import msgpack_loads, msgpack_dumps | from swh.core.api.serializers import msgpack_loads, msgpack_dumps | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
BaseContent, | BaseContent, | ||||
Content, | Content, | ||||
SkippedContent, | SkippedContent, | ||||
Directory, | Directory, | ||||
Revision, | Revision, | ||||
Release, | Release, | ||||
Snapshot, | Snapshot, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
Origin, | Origin, | ||||
SHA1_SIZE, | SHA1_SIZE, | ||||
) | ) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
from swh.storage.objstorage import ObjStorage | from swh.storage.objstorage import ObjStorage | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from .exc import StorageArgumentException, HashCollision | |||||
from .converters import origin_url_to_sha1 | from .converters import origin_url_to_sha1 | ||||
from .exc import StorageArgumentException, HashCollision | |||||
from .extrinsic_metadata import check_extrinsic_metadata_context, CONTEXT_KEYS | |||||
from .utils import get_partition_bounds_bytes | from .utils import get_partition_bounds_bytes | ||||
from .writer import JournalWriter | from .writer import JournalWriter | ||||
# Max block size of contents to return | # Max block size of contents to return | ||||
BULK_BLOCK_CONTENT_LEN_MAX = 10000 | BULK_BLOCK_CONTENT_LEN_MAX = 10000 | ||||
SortedListItem = TypeVar("SortedListItem") | SortedListItem = TypeVar("SortedListItem") | ||||
▲ Show 20 Lines • Show All 949 Lines • ▼ Show 20 Lines | def stat_counters(self): | ||||
return stats | return stats | ||||
def refresh_stat_counters(self): | def refresh_stat_counters(self): | ||||
pass | pass | ||||
def content_metadata_add( | def content_metadata_add( | ||||
self, | self, | ||||
id: str, | id: str, | ||||
context: Dict[str, Union[str, bytes, int]], | |||||
discovery_date: datetime.datetime, | discovery_date: datetime.datetime, | ||||
authority: Dict[str, Any], | authority: Dict[str, Any], | ||||
fetcher: Dict[str, Any], | fetcher: Dict[str, Any], | ||||
format: str, | format: str, | ||||
metadata: bytes, | metadata: bytes, | ||||
) -> None: | ) -> None: | ||||
self._object_metadata_add( | self._object_metadata_add( | ||||
"content", id, discovery_date, authority, fetcher, format, metadata, | "content", | ||||
id, | |||||
discovery_date, | |||||
authority, | |||||
fetcher, | |||||
format, | |||||
metadata, | |||||
context, | |||||
) | ) | ||||
def origin_metadata_add( | def origin_metadata_add( | ||||
self, | self, | ||||
origin_url: str, | origin_url: str, | ||||
discovery_date: datetime.datetime, | discovery_date: datetime.datetime, | ||||
authority: Dict[str, Any], | authority: Dict[str, Any], | ||||
fetcher: Dict[str, Any], | fetcher: Dict[str, Any], | ||||
format: str, | format: str, | ||||
metadata: bytes, | metadata: bytes, | ||||
) -> None: | ) -> None: | ||||
if not isinstance(origin_url, str): | if not isinstance(origin_url, str): | ||||
raise StorageArgumentException( | raise StorageArgumentException( | ||||
"origin_url must be str, not %r" % (origin_url,) | "origin_url must be str, not %r" % (origin_url,) | ||||
) | ) | ||||
context: Dict[str, Union[str, bytes, int]] = {} # origins have no context | |||||
self._object_metadata_add( | self._object_metadata_add( | ||||
"origin", origin_url, discovery_date, authority, fetcher, format, metadata, | "origin", | ||||
origin_url, | |||||
discovery_date, | |||||
authority, | |||||
fetcher, | |||||
format, | |||||
metadata, | |||||
context, | |||||
) | ) | ||||
def _object_metadata_add( | def _object_metadata_add( | ||||
self, | self, | ||||
object_type: str, | object_type: str, | ||||
id: str, | id: str, | ||||
discovery_date: datetime.datetime, | discovery_date: datetime.datetime, | ||||
authority: Dict[str, Any], | authority: Dict[str, Any], | ||||
fetcher: Dict[str, Any], | fetcher: Dict[str, Any], | ||||
format: str, | format: str, | ||||
metadata: bytes, | metadata: bytes, | ||||
context: Dict[str, Union[str, bytes, int]], | |||||
) -> None: | ) -> None: | ||||
check_extrinsic_metadata_context(object_type, context) | |||||
if not isinstance(metadata, bytes): | if not isinstance(metadata, bytes): | ||||
raise StorageArgumentException( | raise StorageArgumentException( | ||||
"metadata must be bytes, not %r" % (metadata,) | "metadata must be bytes, not %r" % (metadata,) | ||||
) | ) | ||||
authority_key = self._metadata_authority_key(authority) | authority_key = self._metadata_authority_key(authority) | ||||
if authority_key not in self._metadata_authorities: | if authority_key not in self._metadata_authorities: | ||||
raise StorageArgumentException(f"Unknown authority {authority}") | raise StorageArgumentException(f"Unknown authority {authority}") | ||||
fetcher_key = self._metadata_fetcher_key(fetcher) | fetcher_key = self._metadata_fetcher_key(fetcher) | ||||
if fetcher_key not in self._metadata_fetchers: | if fetcher_key not in self._metadata_fetchers: | ||||
raise StorageArgumentException(f"Unknown fetcher {fetcher}") | raise StorageArgumentException(f"Unknown fetcher {fetcher}") | ||||
object_metadata_list = self._object_metadata[id][authority_key] | object_metadata_list = self._object_metadata[id][authority_key] | ||||
object_metadata: Dict[str, Any] = { | object_metadata: Dict[str, Any] = { | ||||
"id": id, | "id": id, | ||||
"discovery_date": discovery_date, | "discovery_date": discovery_date, | ||||
"authority": authority_key, | "authority": authority_key, | ||||
"fetcher": fetcher_key, | "fetcher": fetcher_key, | ||||
"format": format, | "format": format, | ||||
"metadata": metadata, | "metadata": metadata, | ||||
} | } | ||||
if CONTEXT_KEYS[object_type]: | |||||
object_metadata["context"] = context | |||||
for existing_object_metadata in object_metadata_list: | for existing_object_metadata in object_metadata_list: | ||||
if ( | if ( | ||||
existing_object_metadata["fetcher"] == fetcher_key | existing_object_metadata["fetcher"] == fetcher_key | ||||
and existing_object_metadata["discovery_date"] == discovery_date | and existing_object_metadata["discovery_date"] == discovery_date | ||||
): | ): | ||||
# Duplicate of an existing one; replace it. | # Duplicate of an existing one; replace it. | ||||
existing_object_metadata.update(object_metadata) | existing_object_metadata.update(object_metadata) | ||||
break | break | ||||
▲ Show 20 Lines • Show All 167 Lines • Show Last 20 Lines |