Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/provenance.py
Show All 17 Lines | from .interface import ( | ||||
ProvenanceInterface, | ProvenanceInterface, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
ProvenanceStorageInterface, | ProvenanceStorageInterface, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
RevisionData, | RevisionData, | ||||
) | ) | ||||
from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry | from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry | ||||
from .util import path_normalize | |||||
LOGGER = logging.getLogger(__name__) | LOGGER = logging.getLogger(__name__) | ||||
BACKEND_DURATION_METRIC = "swh_provenance_backend_duration_seconds" | BACKEND_DURATION_METRIC = "swh_provenance_backend_duration_seconds" | ||||
class DatetimeCache(TypedDict): | class DatetimeCache(TypedDict): | ||||
data: Dict[Sha1Git, Optional[datetime]] | data: Dict[Sha1Git, Optional[datetime]] | ||||
▲ Show 20 Lines • Show All 247 Lines • ▼ Show 20 Lines | def flush_revision_content_layer(self) -> None: | ||||
LOGGER.warning( | LOGGER.warning( | ||||
"Unable to write revision dates to the storage. Retrying..." | "Unable to write revision dates to the storage. Retrying..." | ||||
) | ) | ||||
def content_add_to_directory( | def content_add_to_directory( | ||||
self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes | self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes | ||||
) -> None: | ) -> None: | ||||
self.cache["content_in_directory"].add( | self.cache["content_in_directory"].add( | ||||
(blob.id, directory.id, normalize(os.path.join(prefix, blob.name))) | (blob.id, directory.id, path_normalize(os.path.join(prefix, blob.name))) | ||||
) | ) | ||||
def content_add_to_revision( | def content_add_to_revision( | ||||
self, revision: RevisionEntry, blob: FileEntry, prefix: bytes | self, revision: RevisionEntry, blob: FileEntry, prefix: bytes | ||||
) -> None: | ) -> None: | ||||
self.cache["content_in_revision"].add( | self.cache["content_in_revision"].add( | ||||
(blob.id, revision.id, normalize(os.path.join(prefix, blob.name))) | (blob.id, revision.id, path_normalize(os.path.join(prefix, blob.name))) | ||||
) | ) | ||||
def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: | ||||
return self.storage.content_find_first(id) | return self.storage.content_find_first(id) | ||||
def content_find_all( | def content_find_all( | ||||
self, id: Sha1Git, limit: Optional[int] = None | self, id: Sha1Git, limit: Optional[int] = None | ||||
) -> Generator[ProvenanceResult, None, None]: | ) -> Generator[ProvenanceResult, None, None]: | ||||
Show All 10 Lines | class Provenance: | ||||
def content_set_early_date(self, blob: FileEntry, date: datetime) -> None: | def content_set_early_date(self, blob: FileEntry, date: datetime) -> None: | ||||
self.cache["content"]["data"][blob.id] = date | self.cache["content"]["data"][blob.id] = date | ||||
self.cache["content"]["added"].add(blob.id) | self.cache["content"]["added"].add(blob.id) | ||||
def directory_add_to_revision( | def directory_add_to_revision( | ||||
self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes | self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes | ||||
) -> None: | ) -> None: | ||||
self.cache["directory_in_revision"].add( | self.cache["directory_in_revision"].add( | ||||
(directory.id, revision.id, normalize(path)) | (directory.id, revision.id, path_normalize(path)) | ||||
) | ) | ||||
def directory_get_date_in_isochrone_frontier( | def directory_get_date_in_isochrone_frontier( | ||||
self, directory: DirectoryEntry | self, directory: DirectoryEntry | ||||
) -> Optional[datetime]: | ) -> Optional[datetime]: | ||||
return self.get_dates("directory", [directory.id]).get(directory.id) | return self.get_dates("directory", [directory.id]).get(directory.id) | ||||
def directory_get_dates_in_isochrone_frontier( | def directory_get_dates_in_isochrone_frontier( | ||||
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | class Provenance: | ||||
) -> None: | ) -> None: | ||||
self.cache["revision_origin"]["data"][revision.id] = origin.id | self.cache["revision_origin"]["data"][revision.id] = origin.id | ||||
self.cache["revision_origin"]["added"].add(revision.id) | self.cache["revision_origin"]["added"].add(revision.id) | ||||
def revision_visited(self, revision: RevisionEntry) -> bool: | def revision_visited(self, revision: RevisionEntry) -> bool: | ||||
return revision.id in dict(self.cache["revision_in_origin"]) or bool( | return revision.id in dict(self.cache["revision_in_origin"]) or bool( | ||||
self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id]) | self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id]) | ||||
) | ) | ||||
def normalize(path: bytes) -> bytes: | |||||
return path[2:] if path.startswith(bytes("." + os.path.sep, "utf-8")) else path |