Changeset View
Standalone View
swh/provenance/postgresql/provenancedb.py
- This file was moved from swh/provenance/postgresql/provenancedb_base.py.
Show All 18 Lines | from ..interface import ( | ||||
EntityType, | EntityType, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
RevisionData, | RevisionData, | ||||
) | ) | ||||
class ProvenanceDBBase: | class ProvenanceDB: | ||||
def __init__( | def __init__( | ||||
self, conn: psycopg2.extensions.connection, raise_on_commit: bool = False | self, conn: psycopg2.extensions.connection, raise_on_commit: bool = False | ||||
): | ): | ||||
BaseDb.adapt_conn(conn) | BaseDb.adapt_conn(conn) | ||||
conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) | conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) | ||||
conn.set_session(autocommit=True) | conn.set_session(autocommit=True) | ||||
self.conn = conn | self.conn = conn | ||||
self.cursor = self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) | self.cursor = self.conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) | ||||
Show All 13 Lines | def flavor(self) -> str: | ||||
return self._flavor | return self._flavor | ||||
def with_path(self) -> bool: | def with_path(self) -> bool: | ||||
return "with-path" in self.flavor | return "with-path" in self.flavor | ||||
@property | @property | ||||
def denormalized(self) -> bool: | def denormalized(self) -> bool: | ||||
return "denormalized" in self.flavor | return "denormalized" in self.flavor | ||||
def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | |||||
... | |||||
def content_find_all( | |||||
self, id: Sha1Git, limit: Optional[int] = None | |||||
) -> Generator[ProvenanceResult, None, None]: | |||||
... | |||||
def content_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | def content_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | ||||
aeviso: Why moving this up here? It was sorted in a way it was easy to find before | |||||
Done Inline Actions
To put it near other "feature flags" methods/properties (with-path and denormalized). douardda: > Why moving this up here? It was sorted in a way it was easy to find before
To put it near… | |||||
Not Done Inline ActionsAlso, I still believe the function should be in it's original place to make it easier to find ("private" methods are at the end of the class definition sorted in alphabetic order). aeviso: Also, I still believe the function should be in it's original place to make it easier to find… | |||||
Not Done Inline ActionsAlso, I don't think this is working as before. This is used for inserting/selecting on relations and path field should be skipped for the without-path flavor (see relation_add and _relation_get methods). aeviso: Also, I don't think this is working as before. This is used for inserting/selecting on… | |||||
Done Inline Actions
old code was: class ProvenanceWithoutPathDB(ProvenanceDBBase): def _relation_uses_location_table(self, relation: RelationType) -> bool: return False class ProvenanceWithPathDB(ProvenanceDBBase): def _relation_uses_location_table(self, relation: RelationType) -> bool: src, *_ = relation.value.split("_") return src in ("content", "directory") Looks exactly the same to me: if with_path is False, returns False (as ProvenanceWithoutPathDB), otherwise, it's the exact same logic as ProvenanceWithPathDB. Also note that the fact tests do pass just fine (without any modification in the code of the tests besides the test_provenance_flavor test is rather good clue that this code is still correct). douardda: > Also, I don't think this is working as before. This is used for inserting/selecting on… | |||||
Not Done Inline ActionsThe problem is when the function returns True for the without-path flavor. Then relation_add will insert in location (which shouldn't be there for that flavor in the first place) and _relation_get will return the paths. So I guess there are two options here, either: In either case I still believe the logic is not the same as before, and should be revisited. aeviso: The problem is when the function returns `True` for the `without-path` flavor. Then… | |||||
Done Inline ActionsI dont't understand what you say. The propsoed code is: def _relation_uses_location_table(self, relation: RelationType) -> bool: if self.with_path(): src = relation.value.split("_")[0] return src in ("content", "directory") return False so:
What is wrong with this logic? This
assertion of yours looks incorrect to me. It never returns True for the without-path flavor. Or do I miss something? douardda: I dont't understand what you say. The propsoed code is:
```
def… | |||||
Not Done Inline ActionsRight, I got confused by how the diff displayed the changes and I was thinking about the with-path implementation only aeviso: Right, I got confused by how the diff displayed the changes and I was thinking about the `with… | |||||
return self._entity_set_date("content", dates) | return self._entity_set_date("content", dates) | ||||
def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]: | def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]: | ||||
return self._entity_get_date("content", ids) | return self._entity_get_date("content", ids) | ||||
def directory_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | def directory_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | ||||
return self._entity_set_date("directory", dates) | return self._entity_set_date("directory", dates) | ||||
Show All 39 Lines | def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]: | ||||
""" | """ | ||||
self.cursor.execute(sql, sha1s) | self.cursor.execute(sql, sha1s) | ||||
urls.update((row["sha1"], row["url"]) for row in self.cursor.fetchall()) | urls.update((row["sha1"], row["url"]) for row in self.cursor.fetchall()) | ||||
return urls | return urls | ||||
def revision_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | def revision_set_date(self, dates: Dict[Sha1Git, datetime]) -> bool: | ||||
return self._entity_set_date("revision", dates) | return self._entity_set_date("revision", dates) | ||||
def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | |||||
Not Done Inline ActionsCapitalization on SQL strings: select -> SELECT, from -> FROM aeviso: Capitalization on SQL strings: select -> SELECT, from -> FROM | |||||
sql = "select * from swh_provenance_content_find_first(%s)" | |||||
self.cursor.execute(sql, (id,)) | |||||
row = self.cursor.fetchone() | |||||
return ProvenanceResult(**row) if row is not None else None | |||||
def content_find_all( | |||||
self, id: Sha1Git, limit: Optional[int] = None | |||||
) -> Generator[ProvenanceResult, None, None]: | |||||
Not Done Inline ActionsSame as above aeviso: Same as above | |||||
sql = "select * from swh_provenance_content_find_all(%s, %s)" | |||||
self.cursor.execute(sql, (id, limit)) | |||||
yield from (ProvenanceResult(**row) for row in self.cursor.fetchall()) | |||||
def revision_set_origin(self, origins: Dict[Sha1Git, Sha1Git]) -> bool: | def revision_set_origin(self, origins: Dict[Sha1Git, Sha1Git]) -> bool: | ||||
try: | try: | ||||
if origins: | if origins: | ||||
sql = """ | sql = """ | ||||
LOCK TABLE ONLY revision; | LOCK TABLE ONLY revision; | ||||
INSERT INTO revision(sha1, origin) | INSERT INTO revision(sha1, origin) | ||||
(SELECT V.rev AS sha1, O.id AS origin | (SELECT V.rev AS sha1, O.id AS origin | ||||
FROM (VALUES %s) AS V(rev, org) | FROM (VALUES %s) AS V(rev, org) | ||||
▲ Show 20 Lines • Show All 219 Lines • ▼ Show 20 Lines | ) -> Set[RelationData]: | ||||
if self._relation_uses_location_table(relation): | if self._relation_uses_location_table(relation): | ||||
sql += "INNER JOIN location AS L ON (L.id=CL.path)" | sql += "INNER JOIN location AS L ON (L.id=CL.path)" | ||||
self.cursor.execute(sql, sha1s) | self.cursor.execute(sql, sha1s) | ||||
result.update(RelationData(**row) for row in self.cursor.fetchall()) | result.update(RelationData(**row) for row in self.cursor.fetchall()) | ||||
return result | return result | ||||
def _relation_uses_location_table(self, relation: RelationType) -> bool: | def _relation_uses_location_table(self, relation: RelationType) -> bool: | ||||
... | if self.with_path(): | ||||
src = relation.value.split("_")[0] | |||||
return src in ("content", "directory") | |||||
return False |
Why moving this up here? It was sorted in a way it was easy to find before