Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/in_memory.py
Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | from swh.model.model import ( | ||||
SHA1_SIZE, | SHA1_SIZE, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | from swh.model.hashutil import DEFAULT_ALGORITHMS, hash_to_bytes, hash_to_hex | ||||
from swh.storage.interface import PagedResult | |||||
from swh.storage.objstorage import ObjStorage | from swh.storage.objstorage import ObjStorage | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from .converters import origin_url_to_sha1 | from .converters import origin_url_to_sha1 | ||||
from .exc import StorageArgumentException, HashCollision | from .exc import StorageArgumentException, HashCollision | ||||
from .utils import get_partition_bounds_bytes | from .utils import get_partition_bounds_bytes | ||||
from .writer import JournalWriter | from .writer import JournalWriter | ||||
▲ Show 20 Lines • Show All 795 Lines • ▼ Show 20 Lines | def _origin_visit_get_updated(self, origin: str, visit_id: int) -> Dict[str, Any]: | ||||
**visit_update.to_dict(), | **visit_update.to_dict(), | ||||
# but keep the date of the creation of the origin visit | # but keep the date of the creation of the origin visit | ||||
"date": visit.date, | "date": visit.date, | ||||
} | } | ||||
def origin_visit_get( | def origin_visit_get( | ||||
self, | self, | ||||
origin: str, | origin: str, | ||||
last_visit: Optional[int] = None, | page_token: Optional[str] = None, | ||||
limit: Optional[int] = None, | |||||
order: str = "asc", | order: str = "asc", | ||||
) -> Iterable[Dict[str, Any]]: | limit: int = 10, | ||||
) -> PagedResult[OriginVisit]: | |||||
next_page_token = None | |||||
page_token = page_token or "0" | |||||
order = order.lower() | order = order.lower() | ||||
assert order in ["asc", "desc"] | allowed_orders = ["asc", "desc"] | ||||
if order not in allowed_orders: | |||||
raise StorageArgumentException( | |||||
f"order must be one of {', '.join(allowed_orders)}." | |||||
) | |||||
if not isinstance(page_token, str): | |||||
raise StorageArgumentException("page_token must be a string.") | |||||
visit_from = int(page_token) | |||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits: | extra_limit = limit + 1 | ||||
visits = self._origin_visits[origin_url] | visits = sorted( | ||||
visits = sorted(visits, key=lambda v: v.visit, reverse=(order == "desc")) | self._origin_visits.get(origin_url, []), | ||||
if last_visit is not None: | key=lambda v: v.visit, | ||||
if order == "asc": | reverse=(order == "desc"), | ||||
visits = [v for v in visits if v.visit > last_visit] | ) | ||||
else: | |||||
visits = [v for v in visits if v.visit < last_visit] | if visit_from > 0 and order == "asc": | ||||
if limit is not None: | visits = [v for v in visits if v.visit > visit_from] | ||||
elif visit_from > 0 and order == "desc": | |||||
visits = [v for v in visits if v.visit < visit_from] | |||||
ardumont: ??
`visits = visits[:extra_limit]` should be enough
?? | |||||
Not Done Inline Actionsyes. I think I commented something about it yesterday vlorentz: yes. I think I commented something about it yesterday | |||||
Done Inline Actionsabout is None, i thought I dropped it entirely but messed it up. ardumont: about `is None`, i thought I dropped it entirely but messed it up. | |||||
visits = [v for v in visits if v][:extra_limit] | |||||
assert len(visits) <= extra_limit | |||||
if len(visits) == extra_limit: | |||||
last_visit = visits[limit] | |||||
visits = visits[:limit] | visits = visits[:limit] | ||||
for visit in visits: | assert last_visit is not None | ||||
if not visit: | next_page_token = str(last_visit.visit) | ||||
continue | |||||
visit_id = visit.visit | |||||
visit_update = self._origin_visit_get_updated(origin_url, visit_id) | return PagedResult(results=visits, next_page_token=next_page_token) | ||||
assert visit_update is not None | |||||
yield visit_update | |||||
def origin_visit_find_by_date( | def origin_visit_find_by_date( | ||||
self, origin: str, visit_date: datetime.datetime | self, origin: str, visit_date: datetime.datetime | ||||
) -> Optional[OriginVisit]: | ) -> Optional[OriginVisit]: | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits: | if origin_url in self._origin_visits: | ||||
visits = self._origin_visits[origin_url] | visits = self._origin_visits[origin_url] | ||||
visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit)) | visit = min(visits, key=lambda v: (abs(v.date - visit_date), -v.visit)) | ||||
return visit | return visit | ||||
return None | return None | ||||
def origin_visit_get_by(self, origin: str, visit: int) -> Optional[OriginVisit]: | def origin_visit_get_by(self, origin: str, visit: int) -> Optional[OriginVisit]: | ||||
origin_url = self._get_origin_url(origin) | origin_url = self._get_origin_url(origin) | ||||
if origin_url in self._origin_visits and visit <= len( | if origin_url in self._origin_visits and visit <= len( | ||||
Done Inline ActionsWhy if v is not None? Can visits really contain None objects? If so, we need this filtering sooner, because we access one of its attributes. vlorentz: Why `if v is not None`? Can `visits` really contain None objects? If so, we need this filtering… | |||||
Done Inline ActionsI'm not really sure indeed. ardumont: I'm not really sure indeed.
I kept the logic from the left (line 883 with the skip when on the… | |||||
Done Inline Actionss/unknown/None/ ardumont: s/unknown/None/ | |||||
self._origin_visits[origin_url] | self._origin_visits[origin_url] | ||||
): | ): | ||||
found_visit, _ = self._origin_visit_status_get_latest(origin, visit) | found_visit, _ = self._origin_visit_status_get_latest(origin, visit) | ||||
return found_visit | return found_visit | ||||
return None | return None | ||||
def origin_visit_get_latest( | def origin_visit_get_latest( | ||||
self, | self, | ||||
▲ Show 20 Lines • Show All 301 Lines • Show Last 20 Lines |
??
visits = visits[:extra_limit] should be enough
??