Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/graph.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from __future__ import annotations | from __future__ import annotations | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import os | import os | ||||
from typing import Any, Dict, Optional, Set | from typing import Any, Dict, Optional, Set | ||||
from swh.core.statsd import statsd | |||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
from .interface import ProvenanceInterface | from .interface import ProvenanceInterface | ||||
from .model import DirectoryEntry, RevisionEntry | from .model import DirectoryEntry, RevisionEntry | ||||
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total" | |||||
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds" | |||||
UTCMIN = datetime.min.replace(tzinfo=timezone.utc) | UTCMIN = datetime.min.replace(tzinfo=timezone.utc) | ||||
class HistoryNode: | class HistoryNode: | ||||
def __init__( | def __init__( | ||||
self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False | self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False | ||||
) -> None: | ) -> None: | ||||
self.entry = entry | self.entry = entry | ||||
Show All 15 Lines | def as_dict(self) -> Dict[str, Any]: | ||||
return { | return { | ||||
"rev": hash_to_hex(self.entry.id), | "rev": hash_to_hex(self.entry.id), | ||||
"is_head": self.is_head, | "is_head": self.is_head, | ||||
"in_history": self.in_history, | "in_history": self.in_history, | ||||
} | } | ||||
class HistoryGraph: | class HistoryGraph: | ||||
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"}) | |||||
def __init__( | def __init__( | ||||
douardda: Is this the right 'method' name ? | |||||
Done Inline ActionsWell, that's how we want StatsD to identify this method. Isn't it the way to do it? aeviso: Well, that's how we want StatsD to identify this method. Isn't it the way to do it? | |||||
Done Inline ActionsIMHO the "method" tag should just name the wrapped object (method or, here, class). So I'd prefer using "HistoryGraph" here. (not a blocker, just my preference). douardda: IMHO the "method" tag should just name the wrapped object (method or, here, class).
So I'd… | |||||
Done Inline ActionsThis method is actually the counterpart of build_isochrone_graph below but for the origin-revision layer. We actually used to have a build_history_graph function before that was removed when HistoryGraph was refactored. From the metrics point of view it is not necessary for tags to match function names, we need to have a clear idea of what's being measured. aeviso: This method is actually the counterpart of `build_isochrone_graph` below but for the origin… | |||||
self, | self, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
) -> None: | ) -> None: | ||||
self._head = HistoryNode( | self._head = HistoryNode( | ||||
revision, | revision, | ||||
is_head=provenance.revision_visited(revision), | is_head=provenance.revision_visited(revision), | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | ) -> None: | ||||
self.children: Set[IsochroneNode] = set() | self.children: Set[IsochroneNode] = set() | ||||
@property | @property | ||||
def dbdate(self) -> Optional[datetime]: | def dbdate(self) -> Optional[datetime]: | ||||
# use a property to make this attribute (mostly) read-only | # use a property to make this attribute (mostly) read-only | ||||
return self._dbdate | return self._dbdate | ||||
def invalidate(self) -> None: | def invalidate(self) -> None: | ||||
statsd.increment( | |||||
metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"} | |||||
) | |||||
self._dbdate = None | self._dbdate = None | ||||
self.maxdate = None | self.maxdate = None | ||||
self.known = False | self.known = False | ||||
self.invalid = True | self.invalid = True | ||||
def add_directory( | def add_directory( | ||||
self, child: DirectoryEntry, date: Optional[datetime] = None | self, child: DirectoryEntry, date: Optional[datetime] = None | ||||
) -> IsochroneNode: | ) -> IsochroneNode: | ||||
Show All 15 Lines | class IsochroneNode: | ||||
def __eq__(self, other: Any) -> bool: | def __eq__(self, other: Any) -> bool: | ||||
return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | ||||
def __hash__(self) -> int: | def __hash__(self) -> int: | ||||
# only immutable attributes are considered to compute hash | # only immutable attributes are considered to compute hash | ||||
return hash((self.entry, self.depth, self.path)) | return hash((self.entry, self.depth, self.path)) | ||||
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_isochrone_graph"}) | |||||
def build_isochrone_graph( | def build_isochrone_graph( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
directory: DirectoryEntry, | directory: DirectoryEntry, | ||||
) -> IsochroneNode: | ) -> IsochroneNode: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root == directory.id | assert revision.root == directory.id | ||||
▲ Show 20 Lines • Show All 93 Lines • Show Last 20 Lines |
Is this the right 'method' name ?