Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
Show First 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | while stack: | ||||
) | ) | ||||
else: | else: | ||||
assert current.maxdate is not None | assert current.maxdate is not None | ||||
# Current directory is not an outer isochrone frontier for any previous | # Current directory is not an outer isochrone frontier for any previous | ||||
# revision. It might be eligible for this one. | # revision. It might be eligible for this one. | ||||
if is_new_frontier( | if is_new_frontier( | ||||
current, | current, | ||||
revision=revision, | revision=revision, | ||||
trackall=trackall, | |||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
): | ): | ||||
# Outer frontier should be moved to current position in the isochrone | # Outer frontier should be moved to current position in the isochrone | ||||
# graph. This is the first time this directory is found in the isochrone | # graph. This is the first time this directory is found in the isochrone | ||||
# frontier. | # frontier. | ||||
provenance.directory_set_date_in_isochrone_frontier( | provenance.directory_set_date_in_isochrone_frontier( | ||||
current.entry, current.maxdate | current.entry, current.maxdate | ||||
Show All 24 Lines | while stack: | ||||
provenance.content_add_to_revision(revision, blob, current.path) | provenance.content_add_to_revision(revision, blob, current.path) | ||||
for child in current.children: | for child in current.children: | ||||
stack.append(child) | stack.append(child) | ||||
def is_new_frontier( | def is_new_frontier( | ||||
node: IsochroneNode, | node: IsochroneNode, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
trackall: bool = True, | |||||
lower: bool = True, | lower: bool = True, | ||||
mindepth: int = 1, | mindepth: int = 1, | ||||
) -> bool: | ) -> bool: | ||||
assert node.maxdate is not None # for mypy | assert node.maxdate is not None # for mypy | ||||
assert revision.date is not None # idem | assert revision.date is not None # idem | ||||
if trackall: | # We want to ensure that all first occurrences end up in the content_early_in_rev | ||||
# The only real condition for a directory to be a frontier is that its content | # relation. Thus, we force for every blob outside a frontier to have an strictly | ||||
# is already known and its maxdate is less (or equal) than current revision's | # earlier date. | ||||
# date. Checking mindepth is meant to skip root directories (or any arbitrary | |||||
# depth) to improve the result. The option lower tries to maximize the reuse | |||||
# rate of previously defined frontiers by keeping them low in the directory | |||||
# tree. | |||||
return ( | |||||
node.known | |||||
and node.maxdate <= revision.date # all content is earlier than revision | |||||
and node.depth | |||||
>= mindepth # current node is deeper than the min allowed depth | |||||
and (has_blobs(node) if lower else True) # there is at least one blob in it | |||||
) | |||||
else: | |||||
# If we are only tracking first occurrences, we want to ensure that all first | |||||
# occurrences end up in the content_early_in_rev relation. Thus, we force for | |||||
# every blob outside a frontier to have an strictly earlier date. | |||||
return ( | return ( | ||||
node.maxdate < revision.date # all content is earlier than revision | node.maxdate < revision.date # all content is earlier than revision | ||||
and node.depth >= mindepth # deeper than the min allowed depth | and node.depth >= mindepth # deeper than the min allowed depth | ||||
and (has_blobs(node) if lower else True) # there is at least one blob | and (has_blobs(node) if lower else True) # there is at least one blob | ||||
) | ) | ||||
def has_blobs(node: IsochroneNode) -> bool: | def has_blobs(node: IsochroneNode) -> bool: | ||||
# We may want to look for files in different ways to decide whether to define a | # We may want to look for files in different ways to decide whether to define a | ||||
# frontier or not: | # frontier or not: | ||||
# 1. Only files in current node: | # 1. Only files in current node: | ||||
return any(node.entry.files) | return any(node.entry.files) | ||||
# 2. Files anywhere in the isochrone graph | # 2. Files anywhere in the isochrone graph | ||||
# stack = [node] | # stack = [node] | ||||
# while stack: | # while stack: | ||||
# current = stack.pop() | # current = stack.pop() | ||||
# if any( | # if any(current.entry.files): | ||||
# map(lambda child: isinstance(child.entry, FileEntry), current.children)): | |||||
# return True | # return True | ||||
# else: | # else: | ||||
# # All children are directory entries. | # # All children are directory entries. | ||||
# stack.extend(current.children) | # stack.extend(current.children) | ||||
# return False | # return False | ||||
# 3. Files in the intermediate directories between current node and any previously | # 3. Files in the intermediate directories between current node and any previously | ||||
# defined frontier: | # defined frontier: | ||||
# TODO: complete this case! | # TODO: complete this case! | ||||
# return any( | |||||
# map(lambda child: isinstance(child.entry, FileEntry), node.children) | |||||
# ) or all( | |||||
# map( | |||||
# lambda child: ( | |||||
# not (isinstance(child.entry, DirectoryEntry) and child.date is None) | |||||
# ) | |||||
# or has_blobs(child), | |||||
# node.children, | |||||
# ) | |||||
# ) |