Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show All 16 Lines | |||||
from swh.model.model import \ | from swh.model.model import \ | ||||
Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin | Content, Directory, Revision, Release, Snapshot, OriginVisit, Origin | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS | from swh.model.hashutil import DEFAULT_ALGORITHMS | ||||
from swh.objstorage import get_objstorage | from swh.objstorage import get_objstorage | ||||
from swh.objstorage.exc import ObjNotFoundError | from swh.objstorage.exc import ObjNotFoundError | ||||
from .storage import get_journal_writer | from .storage import get_journal_writer | ||||
from .converters import origin_url_to_sha1 | |||||
# Max block size of contents to return | # Max block size of contents to return | ||||
BULK_BLOCK_CONTENT_LEN_MAX = 10000 | BULK_BLOCK_CONTENT_LEN_MAX = 10000 | ||||
def now(): | def now(): | ||||
return datetime.datetime.now(tz=datetime.timezone.utc) | return datetime.datetime.now(tz=datetime.timezone.utc) | ||||
Show All 14 Lines | class Storage: | ||||
def reset(self): | def reset(self): | ||||
self._directories = {} | self._directories = {} | ||||
self._revisions = {} | self._revisions = {} | ||||
self._releases = {} | self._releases = {} | ||||
self._snapshots = {} | self._snapshots = {} | ||||
self._origins = {} | self._origins = {} | ||||
self._origins_by_id = [] | self._origins_by_id = [] | ||||
self._origins_by_sha1 = {} | |||||
self._origin_visits = {} | self._origin_visits = {} | ||||
self._persons = [] | self._persons = [] | ||||
self._origin_metadata = defaultdict(list) | self._origin_metadata = defaultdict(list) | ||||
self._tools = {} | self._tools = {} | ||||
self._metadata_providers = {} | self._metadata_providers = {} | ||||
self._objects = defaultdict(list) | self._objects = defaultdict(list) | ||||
# ideally we would want a skip list for both fast inserts and searches | # ideally we would want a skip list for both fast inserts and searches | ||||
▲ Show 20 Lines • Show All 1,003 Lines • ▼ Show 20 Lines | def origin_get(self, origins): | ||||
results.append(self._convert_origin(result)) | results.append(self._convert_origin(result)) | ||||
if return_single: | if return_single: | ||||
assert len(results) == 1 | assert len(results) == 1 | ||||
return results[0] | return results[0] | ||||
else: | else: | ||||
return results | return results | ||||
def origin_get_by_sha1(self, sha1s): | |||||
"""Return origins, identified by the sha1 of their URLs. | |||||
Args: | |||||
sha1s (list[bytes]): a list of sha1s | |||||
Yields: | |||||
dicts containing origin information as returned | |||||
by :meth:`swh.storage.in_memory.Storage.origin_get`, or None if an | |||||
origin matching the sha1 is not found. | |||||
""" | |||||
return [ | |||||
self._convert_origin(self._origins_by_sha1.get(sha1)) | |||||
for sha1 in sha1s | |||||
] | |||||
def origin_get_range(self, origin_from=1, origin_count=100): | def origin_get_range(self, origin_from=1, origin_count=100): | ||||
"""Retrieve ``origin_count`` origins whose ids are greater | """Retrieve ``origin_count`` origins whose ids are greater | ||||
or equal than ``origin_from``. | or equal than ``origin_from``. | ||||
Origins are sorted by id before retrieving them. | Origins are sorted by id before retrieving them. | ||||
Args: | Args: | ||||
origin_from (int): the minimum id of origins to retrieve | origin_from (int): the minimum id of origins to retrieve | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | def origin_add_one(self, origin): | ||||
# generate an origin_id because it is needed by origin_get_range. | # generate an origin_id because it is needed by origin_get_range. | ||||
# TODO: remove this when we remove origin_get_range | # TODO: remove this when we remove origin_get_range | ||||
origin_id = len(self._origins) + 1 | origin_id = len(self._origins) + 1 | ||||
self._origins_by_id.append(origin.url) | self._origins_by_id.append(origin.url) | ||||
assert len(self._origins_by_id) == origin_id | assert len(self._origins_by_id) == origin_id | ||||
self._origins[origin.url] = origin | self._origins[origin.url] = origin | ||||
self._origins_by_sha1[origin_url_to_sha1(origin.url)] = origin | |||||
self._origin_visits[origin.url] = [] | self._origin_visits[origin.url] = [] | ||||
self._objects[origin.url].append(('origin', origin.url)) | self._objects[origin.url].append(('origin', origin.url)) | ||||
return origin.url | return origin.url | ||||
def origin_visit_add(self, origin, date, type): | def origin_visit_add(self, origin, date, type): | ||||
"""Add an origin_visit for the origin at date with status 'ongoing'. | """Add an origin_visit for the origin at date with status 'ongoing'. | ||||
▲ Show 20 Lines • Show All 483 Lines • Show Last 20 Lines |