Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 131 Lines • ▼ Show 20 Lines | def reset(self): | ||||
self._content_indexes = defaultdict(lambda: defaultdict(set)) | self._content_indexes = defaultdict(lambda: defaultdict(set)) | ||||
self._skipped_contents = {} | self._skipped_contents = {} | ||||
self._skipped_content_indexes = defaultdict(lambda: defaultdict(set)) | self._skipped_content_indexes = defaultdict(lambda: defaultdict(set)) | ||||
self._directories = {} | self._directories = {} | ||||
self._revisions = {} | self._revisions = {} | ||||
self._releases = {} | self._releases = {} | ||||
self._snapshots = {} | self._snapshots = {} | ||||
self._origins = {} | self._origins = {} | ||||
self._origins_by_id = [] | self._origins_by_id: Dict[int, str] = {} | ||||
self._origins_by_sha1 = {} | self._origins_by_sha1 = {} | ||||
self._origin_visits = {} | self._origin_visits = {} | ||||
self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {} | self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {} | ||||
self._persons = {} | self._persons = {} | ||||
# {object_type: {id: {authority: [metadata]}}} | # {object_type: {id: {authority: [metadata]}}} | ||||
self._raw_extrinsic_metadata: Dict[ | self._raw_extrinsic_metadata: Dict[ | ||||
MetadataTargetType, | MetadataTargetType, | ||||
▲ Show 20 Lines • Show All 530 Lines • ▼ Show 20 Lines | def origin_get(self, origins: List[str]) -> Iterable[Optional[Origin]]: | ||||
return [self.origin_get_one(origin_url) for origin_url in origins] | return [self.origin_get_one(origin_url) for origin_url in origins] | ||||
def origin_get_by_sha1(self, sha1s): | def origin_get_by_sha1(self, sha1s): | ||||
return [self._convert_origin(self._origins_by_sha1.get(sha1)) for sha1 in sha1s] | return [self._convert_origin(self._origins_by_sha1.get(sha1)) for sha1 in sha1s] | ||||
def origin_list( | def origin_list( | ||||
self, page_token: Optional[str] = None, limit: int = 100 | self, page_token: Optional[str] = None, limit: int = 100 | ||||
) -> PagedResult[Origin]: | ) -> PagedResult[Origin]: | ||||
origin_urls = sorted(self._origins) | from_ = int(page_token) if page_token else 1 | ||||
from_ = bisect.bisect_left(origin_urls, page_token) if page_token else 0 | |||||
next_page_token = None | next_page_token = None | ||||
# Take one more origin so we can reuse it as the next page token if any | # Take one more origin so we can reuse it as the next page token if any | ||||
origins = [Origin(url=url) for url in origin_urls[from_ : from_ + limit + 1]] | origins = [] | ||||
for idx in range(from_, from_ + limit + 1): | |||||
last_id = idx | |||||
origin = self._origins_by_id.get(idx) | |||||
if not origin: | |||||
break | |||||
origins.append(Origin(url=self._origins_by_id[idx])) | |||||
if len(origins) > limit: | if len(origins) > limit: | ||||
# last origin id is the next page token | # last origin id is the next page token | ||||
next_page_token = str(origins[-1].url) | next_page_token = str(last_id) | ||||
# excluding that origin from the result to respect the limit size | # excluding that origin from the result to respect the limit size | ||||
origins = origins[:limit] | origins = origins[:limit] | ||||
assert len(origins) <= limit | assert len(origins) <= limit | ||||
return PagedResult(results=origins, next_page_token=next_page_token) | return PagedResult(results=origins, next_page_token=next_page_token) | ||||
def origin_search( | def origin_search( | ||||
self, | self, | ||||
url_pattern: str, | url_pattern: str, | ||||
page_token: Optional[str] = None, | page_token: Optional[str] = None, | ||||
limit: int = 50, | limit: int = 50, | ||||
regexp: bool = False, | regexp: bool = False, | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | def origin_add(self, origins: List[Origin]) -> Dict[str, int]: | ||||
self.origin_add_one(origin) | self.origin_add_one(origin) | ||||
added += 1 | added += 1 | ||||
return {"origin:add": added} | return {"origin:add": added} | ||||
def origin_add_one(self, origin: Origin) -> str: | def origin_add_one(self, origin: Origin) -> str: | ||||
if origin.url not in self._origins: | if origin.url not in self._origins: | ||||
self.journal_writer.origin_add([origin]) | self.journal_writer.origin_add([origin]) | ||||
# generate an origin_id because it is needed by origin_get_range. | # generate an origin_id because it is needed by origin_list | ||||
# TODO: remove this when we remove origin_get_range | |||||
origin_id = len(self._origins) + 1 | origin_id = len(self._origins) + 1 | ||||
self._origins_by_id.append(origin.url) | self._origins_by_id[origin_id] = origin.url | ||||
assert len(self._origins_by_id) == origin_id | assert len(self._origins_by_id) == origin_id | ||||
self._origins[origin.url] = origin | self._origins[origin.url] = origin | ||||
self._origins_by_sha1[origin_url_to_sha1(origin.url)] = origin | self._origins_by_sha1[origin_url_to_sha1(origin.url)] = origin | ||||
self._origin_visits[origin.url] = [] | self._origin_visits[origin.url] = [] | ||||
self._objects[origin.url].append(("origin", origin.url)) | self._objects[origin.url].append(("origin", origin.url)) | ||||
return origin.url | return origin.url | ||||
▲ Show 20 Lines • Show All 495 Lines • Show Last 20 Lines |