Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/provenance.py
Show First 20 Lines • Show All 103 Lines • ▼ Show 20 Lines | class ProvenanceInterface(Protocol): | ||||
) -> None: | ) -> None: | ||||
... | ... | ||||
def revision_visited(self, revision: RevisionEntry) -> bool: | def revision_visited(self, revision: RevisionEntry) -> bool: | ||||
... | ... | ||||
class DatetimeCache(TypedDict): | class DatetimeCache(TypedDict): | ||||
data: Dict[Sha1Git, datetime] | data: Dict[Sha1Git, Optional[datetime]] | ||||
added: Set[Sha1Git] | added: Set[Sha1Git] | ||||
class OriginCache(TypedDict): | class OriginCache(TypedDict): | ||||
data: Dict[Sha1Git, str] | data: Dict[Sha1Git, str] | ||||
added: Set[Sha1Git] | added: Set[Sha1Git] | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | ) -> Optional[Tuple[Sha1Git, Sha1Git, datetime, bytes]]: | ||||
return self.storage.content_find_first(id) | return self.storage.content_find_first(id) | ||||
def content_find_all( | def content_find_all( | ||||
self, id: Sha1Git, limit: Optional[int] = None | self, id: Sha1Git, limit: Optional[int] = None | ||||
) -> Generator[Tuple[Sha1Git, Sha1Git, datetime, bytes], None, None]: | ) -> Generator[Tuple[Sha1Git, Sha1Git, datetime, bytes], None, None]: | ||||
yield from self.storage.content_find_all(id, limit=limit) | yield from self.storage.content_find_all(id, limit=limit) | ||||
def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]: | def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]: | ||||
return self.get_dates("content", [blob.id]).get(blob.id, None) | return self.get_dates("content", [blob.id]).get(blob.id) | ||||
def content_get_early_dates( | def content_get_early_dates( | ||||
self, blobs: Iterable[FileEntry] | self, blobs: Iterable[FileEntry] | ||||
) -> Dict[Sha1Git, datetime]: | ) -> Dict[Sha1Git, datetime]: | ||||
return self.get_dates("content", [blob.id for blob in blobs]) | return self.get_dates("content", [blob.id for blob in blobs]) | ||||
def content_set_early_date(self, blob: FileEntry, date: datetime): | def content_set_early_date(self, blob: FileEntry, date: datetime): | ||||
self.cache["content"]["data"][blob.id] = date | self.cache["content"]["data"][blob.id] = date | ||||
self.cache["content"]["added"].add(blob.id) | self.cache["content"]["added"].add(blob.id) | ||||
def directory_add_to_revision( | def directory_add_to_revision( | ||||
self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes | self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes | ||||
): | ): | ||||
self.cache["directory_in_revision"].add( | self.cache["directory_in_revision"].add( | ||||
(directory.id, revision.id, normalize(path)) | (directory.id, revision.id, normalize(path)) | ||||
) | ) | ||||
def directory_get_date_in_isochrone_frontier( | def directory_get_date_in_isochrone_frontier( | ||||
self, directory: DirectoryEntry | self, directory: DirectoryEntry | ||||
) -> Optional[datetime]: | ) -> Optional[datetime]: | ||||
return self.get_dates("directory", [directory.id]).get(directory.id, None) | return self.get_dates("directory", [directory.id]).get(directory.id) | ||||
def directory_get_dates_in_isochrone_frontier( | def directory_get_dates_in_isochrone_frontier( | ||||
self, dirs: Iterable[DirectoryEntry] | self, dirs: Iterable[DirectoryEntry] | ||||
) -> Dict[Sha1Git, datetime]: | ) -> Dict[Sha1Git, datetime]: | ||||
return self.get_dates("directory", [directory.id for directory in dirs]) | return self.get_dates("directory", [directory.id for directory in dirs]) | ||||
def directory_set_date_in_isochrone_frontier( | def directory_set_date_in_isochrone_frontier( | ||||
self, directory: DirectoryEntry, date: datetime | self, directory: DirectoryEntry, date: datetime | ||||
): | ): | ||||
self.cache["directory"]["data"][directory.id] = date | self.cache["directory"]["data"][directory.id] = date | ||||
self.cache["directory"]["added"].add(directory.id) | self.cache["directory"]["added"].add(directory.id) | ||||
def get_dates( | def get_dates( | ||||
self, entity: Literal["content", "revision", "directory"], ids: List[Sha1Git] | self, entity: Literal["content", "revision", "directory"], ids: List[Sha1Git] | ||||
) -> Dict[Sha1Git, datetime]: | ) -> Dict[Sha1Git, datetime]: | ||||
cache = self.cache[entity] | cache = self.cache[entity] | ||||
missing_ids = set(id for id in ids if id not in cache) | missing_ids = set(id for id in ids if id not in cache) | ||||
if missing_ids: | if missing_ids: | ||||
cache["data"].update(self.storage.get_dates(entity, list(missing_ids))) | cache["data"].update(self.storage.get_dates(entity, list(missing_ids))) | ||||
return {sha1: cache["data"][sha1] for sha1 in ids if sha1 in cache["data"]} | dates: Dict[Sha1Git, datetime] = {} | ||||
for sha1 in ids: | |||||
date = cache["data"].get(sha1) | |||||
if date is not None: | |||||
dates[sha1] = date | |||||
return dates | |||||
douardda: I don't see the point of looping on the whole cache here rather than the input list of sha1s. | |||||
Done Inline ActionsBecause cache now hold Optional[datetime] as values and this method return datetime. So this is mainly to please mypy aeviso: Because cache now hold `Optional[datetime]` as values and this method return `datetime`. So… | |||||
Done Inline ActionsBut does my proposal code makes mypy unhappy? douardda: But does my proposal code makes mypy unhappy? | |||||
Done Inline Actionsmy suggested code douardda: my //suggested// code | |||||
Done Inline ActionsYes, you are making the check against a call to .get(sha1) but adding to the dict the of indexing ([sha1]) operator... mypy won't acknowledge that this results are equal aeviso: Yes, you are making the check against a call to `.get(sha1)` but adding to the dict the of… | |||||
def origin_add(self, origin: OriginEntry) -> None: | def origin_add(self, origin: OriginEntry) -> None: | ||||
self.cache["origin"]["data"][origin.id] = origin.url | self.cache["origin"]["data"][origin.id] = origin.url | ||||
self.cache["origin"]["added"].add(origin.id) | self.cache["origin"]["added"].add(origin.id) | ||||
def revision_add(self, revision: RevisionEntry): | def revision_add(self, revision: RevisionEntry): | ||||
# Add current revision to the compact DB | |||||
assert revision.date is not None | |||||
self.cache["revision"]["data"][revision.id] = revision.date | self.cache["revision"]["data"][revision.id] = revision.date | ||||
self.cache["revision"]["added"].add(revision.id) | self.cache["revision"]["added"].add(revision.id) | ||||
def revision_add_before_revision( | def revision_add_before_revision( | ||||
self, relative: RevisionEntry, revision: RevisionEntry | self, relative: RevisionEntry, revision: RevisionEntry | ||||
): | ): | ||||
self.cache["revision_before_revision"].setdefault(revision.id, set()).add( | self.cache["revision_before_revision"].setdefault(revision.id, set()).add( | ||||
relative.id | relative.id | ||||
) | ) | ||||
def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry): | def revision_add_to_origin(self, origin: OriginEntry, revision: RevisionEntry): | ||||
self.cache["revision_in_origin"].add((revision.id, origin.id)) | self.cache["revision_in_origin"].add((revision.id, origin.id)) | ||||
def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]: | def revision_get_early_date(self, revision: RevisionEntry) -> Optional[datetime]: | ||||
return self.get_dates("revision", [revision.id]).get(revision.id, None) | return self.get_dates("revision", [revision.id]).get(revision.id) | ||||
def revision_get_preferred_origin( | def revision_get_preferred_origin( | ||||
self, revision: RevisionEntry | self, revision: RevisionEntry | ||||
) -> Optional[Sha1Git]: | ) -> Optional[Sha1Git]: | ||||
cache = self.cache["revision_origin"] | cache = self.cache["revision_origin"] | ||||
if revision.id not in cache: | if revision.id not in cache: | ||||
origin = self.storage.revision_get_preferred_origin(revision.id) | origin = self.storage.revision_get_preferred_origin(revision.id) | ||||
if origin is not None: | if origin is not None: | ||||
Show All 22 Lines |
I don't see the point of looping on the whole cache here rather than the input list of sha1s.
why not keep the original implemantation adapted to check for None dates?
like