Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/provenance.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime | from datetime import datetime | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Dict, Generator, Iterable, Optional, Set, Tuple | from types import TracebackType | ||||
from typing import Dict, Generator, Iterable, Optional, Set, Tuple, Type | |||||
from typing_extensions import Literal, TypedDict | from typing_extensions import Literal, TypedDict | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from .interface import ( | from .interface import ( | ||||
ProvenanceInterface, | |||||
ProvenanceResult, | ProvenanceResult, | ||||
ProvenanceStorageInterface, | ProvenanceStorageInterface, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
RevisionData, | RevisionData, | ||||
) | ) | ||||
from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry | from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry | ||||
▲ Show 20 Lines • Show All 45 Lines • ▼ Show 20 Lines | def new_cache() -> ProvenanceCache: | ||||
) | ) | ||||
class Provenance: | class Provenance: | ||||
def __init__(self, storage: ProvenanceStorageInterface) -> None: | def __init__(self, storage: ProvenanceStorageInterface) -> None: | ||||
self.storage = storage | self.storage = storage | ||||
self.cache = new_cache() | self.cache = new_cache() | ||||
def __enter__(self) -> ProvenanceInterface: | |||||
self.open() | |||||
return self | |||||
def __exit__( | |||||
self, | |||||
exc_type: Optional[Type[BaseException]], | |||||
exc_val: Optional[BaseException], | |||||
exc_tb: Optional[TracebackType], | |||||
) -> None: | |||||
self.close() | |||||
def clear_caches(self) -> None: | def clear_caches(self) -> None: | ||||
self.cache = new_cache() | self.cache = new_cache() | ||||
def close(self) -> None: | |||||
self.storage.close() | |||||
def flush(self) -> None: | def flush(self) -> None: | ||||
# Revision-content layer insertions ############################################ | # Revision-content layer insertions ############################################ | ||||
# After relations, dates for the entities can be safely set, acknowledging that | # After relations, dates for the entities can be safely set, acknowledging that | ||||
# these entities won't need to be reprocessed in case of failure. | # these entities won't need to be reprocessed in case of failure. | ||||
cnts = { | cnts = { | ||||
src | src | ||||
for src, _, _ in self.cache["content_in_revision"] | for src, _, _ in self.cache["content_in_revision"] | ||||
▲ Show 20 Lines • Show All 243 Lines • ▼ Show 20 Lines | ) -> Dict[Sha1Git, datetime]: | ||||
cache["data"].update(updated) | cache["data"].update(updated) | ||||
dates: Dict[Sha1Git, datetime] = {} | dates: Dict[Sha1Git, datetime] = {} | ||||
for sha1 in ids: | for sha1 in ids: | ||||
date = cache["data"].setdefault(sha1, None) | date = cache["data"].setdefault(sha1, None) | ||||
if date is not None: | if date is not None: | ||||
dates[sha1] = date | dates[sha1] = date | ||||
return dates | return dates | ||||
def open(self) -> None: | |||||
self.storage.open() | |||||
def origin_add(self, origin: OriginEntry) -> None: | def origin_add(self, origin: OriginEntry) -> None: | ||||
self.cache["origin"]["data"][origin.id] = origin.url | self.cache["origin"]["data"][origin.id] = origin.url | ||||
self.cache["origin"]["added"].add(origin.id) | self.cache["origin"]["added"].add(origin.id) | ||||
def revision_add(self, revision: RevisionEntry) -> None: | def revision_add(self, revision: RevisionEntry) -> None: | ||||
self.cache["revision"]["data"][revision.id] = revision.date | self.cache["revision"]["data"][revision.id] = revision.date | ||||
self.cache["revision"]["added"].add(revision.id) | self.cache["revision"]["added"].add(revision.id) | ||||
▲ Show 20 Lines • Show All 46 Lines • Show Last 20 Lines |