diff --git a/swh/storage/in_memory.py b/swh/storage/in_memory.py --- a/swh/storage/in_memory.py +++ b/swh/storage/in_memory.py @@ -137,7 +137,7 @@ self._releases = {} self._snapshots = {} self._origins = {} - self._origins_by_id = [] + self._origins_by_id: Dict[int, str] = {} self._origins_by_sha1 = {} self._origin_visits = {} self._origin_visit_statuses: Dict[Tuple[str, int], List[OriginVisitStatus]] = {} @@ -684,16 +684,21 @@ def origin_list( self, page_token: Optional[str] = None, limit: int = 100 ) -> PagedResult[Origin]: - origin_urls = sorted(self._origins) - from_ = bisect.bisect_left(origin_urls, page_token) if page_token else 0 + from_ = int(page_token) if page_token else 1 next_page_token = None # Take one more origin so we can reuse it as the next page token if any - origins = [Origin(url=url) for url in origin_urls[from_ : from_ + limit + 1]] + origins = [] + for idx in range(from_, from_ + limit + 1): + last_id = idx + origin = self._origins_by_id.get(idx) + if not origin: + break + origins.append(Origin(url=self._origins_by_id[idx])) if len(origins) > limit: # last origin id is the next page token - next_page_token = str(origins[-1].url) + next_page_token = str(last_id) # excluding that origin from the result to respect the limit size origins = origins[:limit] @@ -766,10 +771,9 @@ def origin_add_one(self, origin: Origin) -> str: if origin.url not in self._origins: self.journal_writer.origin_add([origin]) - # generate an origin_id because it is needed by origin_get_range. - # TODO: remove this when we remove origin_get_range + # generate an origin_id because it is needed by origin_list origin_id = len(self._origins) + 1 - self._origins_by_id.append(origin.url) + self._origins_by_id[origin_id] = origin.url assert len(self._origins_by_id) == origin_id self._origins[origin.url] = origin