Differential D3594 Diff 12648 swh/storage/storage.py

Changeset View

Standalone View

swh/storage/storage.py

Show All 10 Lines
from contextlib import contextmanager		from contextlib import contextmanager
from typing import (		from typing import (
Any,		Any,
Counter,		Counter,
Dict,		Dict,
Iterable,		Iterable,
List,		List,
Optional,		Optional,
		Tuple,
Union,		Union,
)		)

import attr		import attr
import psycopg2		import psycopg2
import psycopg2.pool		import psycopg2.pool
import psycopg2.errors		import psycopg2.errors

▲ Show 20 Lines • Show All 932 Lines • ▼ Show 20 Lines	) -> Optional[Dict[str, Any]]:
visit = dict(zip(db.origin_visit_get_cols, row))		visit = dict(zip(db.origin_visit_get_cols, row))
return self._origin_visit_apply_update(visit, db)		return self._origin_visit_apply_update(visit, db)
return None		return None

@timed		@timed
@db_transaction()		@db_transaction()
def origin_visit_get_random(		def origin_visit_get_random(
self, type: str, db=None, cur=None		self, type: str, db=None, cur=None
) -> Optional[Dict[str, Any]]:		) -> Optional[Tuple[OriginVisit, OriginVisitStatus]]:
row = db.origin_visit_get_random(type, cur)		row = db.origin_visit_get_random(type, cur)
if row:		if row is not None:
visit = dict(zip(db.origin_visit_get_cols, row))		visit = OriginVisit(origin=row[0], visit=row[1], date=row[2], type=row[3],)
return self._origin_visit_apply_update(visit, db)		visit_status = OriginVisitStatus(
		origin=row[0],
		visit=row[1],
		date=row[2],
		status=row[4],
		metadata=row[5],
		snapshot=row[6],
		vlorentzUnsubmitted Not Done Inline Actions This violates the abstraction, as `storage.py` now needs to hardcode the map between indices and names. I recommend you keep the dict(zip()) and use dict access. Or alternatively, make `db.py` return namedtuples (like `cql.py` does with `Row`) vlorentz: This violates the abstraction, as `storage.py` now needs to hardcode the map between indices…
		ardumontAuthorUnsubmitted Done Inline Actions I was interested in "performance". I'm a bit wary of how many conversions we keep on doing everywhere... The alternative i did not comprehend fully yet interests me, i'll look into that ;) ardumont: I was interested in "performance". I'm a bit wary of how many conversions we keep on doing…
		vlorentzUnsubmitted Not Done Inline Actions Nothing to worry about performance-wise: In [1]: from swh.model.model import OriginVisitStatus, OriginVisit In [2]: import datetime In [3]: cols = ["origin", "visit", "date", "type", "status", "metadata", "snapshot"] In [4]: row = ("o", 42, datetime.datetime.now(), "t", "full", None, None) In [5]: %timeit d = dict(zip(cols, row)); OriginVisit(origin=d["origin"], visit=d["visit"], date=d["date"], type=d["type"]); OriginVisitStatus(origin=d["origin"], visit=d["visit"], date=d["date"], status=d["status"], metadata=d["metadata"], snapshot=d["snapshot"]) 100000 loops, best of 5: 16.6 µs per loop In [6]: %timeit OriginVisit(origin=row[0], visit=row[1], date=row[2], type=row[3]); OriginVisitStatus(origin=row[0], visit=row[1], date=row[2], status=row[4], metadata=row[5], snapshot=row[6]) 100000 loops, best of 5: 15.8 µs per loop Building the `OriginVisit` and `OriginVisitStatus` objects takes much longer (about 15µs) than the dict and tuple operations (respectively 100ns and 500ns in your version and mine) vlorentz: Nothing to worry about performance-wise: ``` In [1]: from swh.model.model import…
		vlorentzUnsubmitted Not Done Inline Actions not to mention the cost of network operations, which trump all this vlorentz: not to mention the cost of network operations, which trump all this
		ardumontAuthorUnsubmitted Done Inline Actions cool, thanks ;) ardumont: cool, thanks ;)
		)
		return visit, visit_status
return None		return None

@timed		@timed
@db_transaction(statement_timeout=2000)		@db_transaction(statement_timeout=2000)
def object_find_by_sha1_git(self, ids, db=None, cur=None):		def object_find_by_sha1_git(self, ids, db=None, cur=None):
ret = {id: [] for id in ids}		ret = {id: [] for id in ids}

for retval in db.object_find_by_sha1_git(ids, cur=cur):		for retval in db.object_find_by_sha1_git(ids, cur=cur):
▲ Show 20 Lines • Show All 363 Lines • Show Last 20 Lines