Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/graph.py
from __future__ import annotations | |||||
vlorentz: missing a copyright header btw | |||||
aevisoAuthorUnsubmitted Done Inline ActionsWhich is the copyright header that should go here? aeviso: Which is the copyright header that should go here? | |||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import logging | import logging | ||||
import os | import os | ||||
from typing import Dict, Optional, Set | from typing import Any, Dict, Optional, Set | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
from .model import DirectoryEntry, RevisionEntry | from .model import DirectoryEntry, RevisionEntry | ||||
from .provenance import ProvenanceInterface | from .provenance import ProvenanceInterface | ||||
UTCMIN = datetime.min.replace(tzinfo=timezone.utc) | UTCMIN = datetime.min.replace(tzinfo=timezone.utc) | ||||
class HistoryNode: | class HistoryNode: | ||||
def __init__( | def __init__( | ||||
self, entry: RevisionEntry, visited: bool = False, in_history: bool = False | self, entry: RevisionEntry, visited: bool = False, in_history: bool = False | ||||
): | ) -> None: | ||||
self.entry = entry | self.entry = entry | ||||
# A revision is `visited` if it is directly pointed by an origin (ie. a head | # A revision is `visited` if it is directly pointed by an origin (ie. a head | ||||
# revision for some snapshot) | # revision for some snapshot) | ||||
self.visited = visited | self.visited = visited | ||||
# A revision is `in_history` if it appears in the history graph of an already | # A revision is `in_history` if it appears in the history graph of an already | ||||
# processed revision in the provenance database | # processed revision in the provenance database | ||||
self.in_history = in_history | self.in_history = in_history | ||||
self.parents: Set[HistoryNode] = set() | self.parents: Set[HistoryNode] = set() | ||||
def add_parent( | def add_parent( | ||||
self, parent: RevisionEntry, visited: bool = False, in_history: bool = False | self, parent: RevisionEntry, visited: bool = False, in_history: bool = False | ||||
) -> "HistoryNode": | ) -> HistoryNode: | ||||
node = HistoryNode(parent, visited=visited, in_history=in_history) | node = HistoryNode(parent, visited=visited, in_history=in_history) | ||||
self.parents.add(node) | self.parents.add(node) | ||||
return node | return node | ||||
def __str__(self): | def __str__(self) -> str: | ||||
return ( | return ( | ||||
f"<{self.entry}: visited={self.visited}, in_history={self.in_history}, " | f"<{self.entry}: visited={self.visited}, in_history={self.in_history}, " | ||||
f"parents=[{', '.join(str(parent) for parent in self.parents)}]>" | f"parents=[{', '.join(str(parent) for parent in self.parents)}]>" | ||||
) | ) | ||||
def __eq__(self, other): | def __eq__(self, other: Any) -> bool: | ||||
return isinstance(other, HistoryNode) and self.__dict__ == other.__dict__ | return isinstance(other, HistoryNode) and self.__dict__ == other.__dict__ | ||||
def __hash__(self): | def __hash__(self) -> int: | ||||
return hash((self.entry, self.visited, self.in_history)) | return hash((self.entry, self.visited, self.in_history)) | ||||
def build_history_graph( | def build_history_graph( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
) -> HistoryNode: | ) -> HistoryNode: | ||||
Show All 28 Lines | |||||
class IsochroneNode: | class IsochroneNode: | ||||
def __init__( | def __init__( | ||||
self, | self, | ||||
entry: DirectoryEntry, | entry: DirectoryEntry, | ||||
dbdate: Optional[datetime] = None, | dbdate: Optional[datetime] = None, | ||||
depth: int = 0, | depth: int = 0, | ||||
prefix: bytes = b"", | prefix: bytes = b"", | ||||
): | ) -> None: | ||||
self.entry = entry | self.entry = entry | ||||
self.depth = depth | self.depth = depth | ||||
# dbdate is the maxdate for this node that comes from the DB | # dbdate is the maxdate for this node that comes from the DB | ||||
self._dbdate: Optional[datetime] = dbdate | self._dbdate: Optional[datetime] = dbdate | ||||
# maxdate is set by the maxdate computation algorithm | # maxdate is set by the maxdate computation algorithm | ||||
self.maxdate: Optional[datetime] = None | self.maxdate: Optional[datetime] = None | ||||
# known is True if this node is already known in the db; either because | # known is True if this node is already known in the db; either because | ||||
# the current directory actually exists in the database, or because all | # the current directory actually exists in the database, or because all | ||||
# the content of the current directory is known (subdirectories and files) | # the content of the current directory is known (subdirectories and files) | ||||
self.known = self.dbdate is not None | self.known = self.dbdate is not None | ||||
self.invalid = False | self.invalid = False | ||||
self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name | self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name | ||||
self.children: Set[IsochroneNode] = set() | self.children: Set[IsochroneNode] = set() | ||||
@property | @property | ||||
def dbdate(self): | def dbdate(self) -> Optional[datetime]: | ||||
# use a property to make this attribute (mostly) read-only | # use a property to make this attribute (mostly) read-only | ||||
return self._dbdate | return self._dbdate | ||||
def invalidate(self): | def invalidate(self) -> None: | ||||
self._dbdate = None | self._dbdate = None | ||||
self.maxdate = None | self.maxdate = None | ||||
self.known = False | self.known = False | ||||
self.invalid = True | self.invalid = True | ||||
def add_directory( | def add_directory( | ||||
self, child: DirectoryEntry, date: Optional[datetime] = None | self, child: DirectoryEntry, date: Optional[datetime] = None | ||||
) -> "IsochroneNode": | ) -> IsochroneNode: | ||||
# we should not be processing this node (ie add subdirectories or files) if it's | # we should not be processing this node (ie add subdirectories or files) if it's | ||||
# actually known by the provenance DB | # actually known by the provenance DB | ||||
assert self.dbdate is None | assert self.dbdate is None | ||||
node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) | node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) | ||||
self.children.add(node) | self.children.add(node) | ||||
return node | return node | ||||
def __str__(self): | def __str__(self) -> str: | ||||
return ( | return ( | ||||
f"<{self.entry}: depth={self.depth}, " | f"<{self.entry}: depth={self.depth}, " | ||||
f"dbdate={self.dbdate}, maxdate={self.maxdate}, " | f"dbdate={self.dbdate}, maxdate={self.maxdate}, " | ||||
f"known={self.known}, invalid={self.invalid}, path={self.path}, " | f"known={self.known}, invalid={self.invalid}, path={self.path!r}, " | ||||
f"children=[{', '.join(str(child) for child in self.children)}]>" | f"children=[{', '.join(str(child) for child in self.children)}]>" | ||||
) | ) | ||||
def __eq__(self, other): | def __eq__(self, other: Any) -> bool: | ||||
return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | ||||
def __hash__(self): | def __hash__(self) -> int: | ||||
# only immutable attributes are considered to compute hash | # only immutable attributes are considered to compute hash | ||||
return hash((self.entry, self.depth, self.path)) | return hash((self.entry, self.depth, self.path)) | ||||
def build_isochrone_graph( | def build_isochrone_graph( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
▲ Show 20 Lines • Show All 111 Lines • Show Last 20 Lines |
missing a copyright header btw