Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/graph.py
Show First 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | ) -> None: | ||||
self.depth = depth | self.depth = depth | ||||
# dbdate is the maxdate for this node that comes from the DB | # dbdate is the maxdate for this node that comes from the DB | ||||
self._dbdate: Optional[datetime] = dbdate | self._dbdate: Optional[datetime] = dbdate | ||||
# maxdate is set by the maxdate computation algorithm | # maxdate is set by the maxdate computation algorithm | ||||
self.maxdate: Optional[datetime] = None | self.maxdate: Optional[datetime] = None | ||||
# known is True if this node is already known in the db; either because | |||||
# the current directory actually exists in the database, or because all | |||||
# the content of the current directory is known (subdirectories and files) | |||||
self.known = self.dbdate is not None | |||||
self.invalid = False | self.invalid = False | ||||
self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name | self.path = os.path.join(prefix, self.entry.name) if prefix else self.entry.name | ||||
self.children: Set[IsochroneNode] = set() | self.children: Set[IsochroneNode] = set() | ||||
@property | @property | ||||
def dbdate(self) -> Optional[datetime]: | def dbdate(self) -> Optional[datetime]: | ||||
# use a property to make this attribute (mostly) read-only | # use a property to make this attribute (mostly) read-only | ||||
return self._dbdate | return self._dbdate | ||||
def invalidate(self) -> None: | def invalidate(self) -> None: | ||||
statsd.increment( | statsd.increment( | ||||
metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"} | metric=GRAPH_OPERATIONS_METRIC, tags={"method": "invalidate_frontier"} | ||||
) | ) | ||||
self._dbdate = None | self._dbdate = None | ||||
self.maxdate = None | self.maxdate = None | ||||
self.known = False | |||||
self.invalid = True | self.invalid = True | ||||
def add_directory( | def add_directory( | ||||
self, child: DirectoryEntry, date: Optional[datetime] = None | self, child: DirectoryEntry, date: Optional[datetime] = None | ||||
) -> IsochroneNode: | ) -> IsochroneNode: | ||||
# we should not be processing this node (ie add subdirectories or files) if it's | # we should not be processing this node (ie add subdirectories or files) if it's | ||||
# actually known by the provenance DB | # actually known by the provenance DB | ||||
assert self.dbdate is None | assert self.dbdate is None | ||||
node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) | node = IsochroneNode(child, dbdate=date, depth=self.depth + 1, prefix=self.path) | ||||
self.children.add(node) | self.children.add(node) | ||||
return node | return node | ||||
def __str__(self) -> str: | def __str__(self) -> str: | ||||
return ( | return ( | ||||
f"<{self.entry}: depth={self.depth}, " | f"<{self.entry}: depth={self.depth}, dbdate={self.dbdate}, " | ||||
f"dbdate={self.dbdate}, maxdate={self.maxdate}, " | f"maxdate={self.maxdate}, invalid={self.invalid}, path={self.path!r}, " | ||||
f"known={self.known}, invalid={self.invalid}, path={self.path!r}, " | |||||
f"children=[{', '.join(str(child) for child in self.children)}]>" | f"children=[{', '.join(str(child) for child in self.children)}]>" | ||||
) | ) | ||||
def __eq__(self, other: Any) -> bool: | def __eq__(self, other: Any) -> bool: | ||||
return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | return isinstance(other, IsochroneNode) and self.__dict__ == other.__dict__ | ||||
def __hash__(self) -> int: | def __hash__(self) -> int: | ||||
# only immutable attributes are considered to compute hash | # only immutable attributes are considered to compute hash | ||||
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | ) -> IsochroneNode: | ||||
# Precalculate max known date for each node in the graph (only directory nodes are | # Precalculate max known date for each node in the graph (only directory nodes are | ||||
# pushed to the stack). | # pushed to the stack). | ||||
stack = [root] | stack = [root] | ||||
while stack: | while stack: | ||||
current = stack.pop() | current = stack.pop() | ||||
# Current directory node is known if it already has an assigned date (ie. it was | # Current directory node is known if it already has an assigned date (ie. it was | ||||
# already seen as an isochrone frontier). | # already seen as an isochrone frontier). | ||||
if current.known: | if current.dbdate is not None: | ||||
assert current.maxdate is None | assert current.maxdate is None | ||||
current.maxdate = current.dbdate | current.maxdate = current.dbdate | ||||
else: | else: | ||||
if any(x.maxdate is None for x in current.children): | if any(x.maxdate is None for x in current.children): | ||||
# at least one child of current has no maxdate yet | # at least one child of current has no maxdate yet | ||||
# Current node needs to be analysed again after its children. | # Current node needs to be analysed again after its children. | ||||
stack.append(current) | stack.append(current) | ||||
for child in current.children: | for child in current.children: | ||||
Show All 12 Lines | while stack: | ||||
for child in current.children | for child in current.children | ||||
if child.maxdate is not None # unnecessary, but needed for mypy | if child.maxdate is not None # unnecessary, but needed for mypy | ||||
] | ] | ||||
+ [ | + [ | ||||
fdates.get(file.id, revision.date) | fdates.get(file.id, revision.date) | ||||
for file in current.entry.files | for file in current.entry.files | ||||
] | ] | ||||
) | ) | ||||
if current.maxdate <= revision.date: | |||||
current.known = ( | |||||
# true if all subdirectories are known | |||||
all(child.known for child in current.children) | |||||
# true if all files are in fdates, i.e. if all files were known | |||||
# *before building this isochrone graph node* | |||||
# Note: the 'all()' is lazy: will stop iterating as soon as | |||||
# possible | |||||
and all((file.id in fdates) for file in current.entry.files) | |||||
) | |||||
else: | |||||
# at least one content is being processed out-of-order, then current | |||||
# node should be treated as unknown | |||||
current.maxdate = revision.date | |||||
current.known = False | |||||
return root | return root |