Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/graph.py
Show First 20 Lines • Show All 182 Lines • ▼ Show 20 Lines | ) -> IsochroneNode: | ||||
# | # | ||||
# 2. compute the maxdate for each node of the tree that was not found in the DB. | # 2. compute the maxdate for each node of the tree that was not found in the DB. | ||||
# Build the nodes structure | # Build the nodes structure | ||||
root_date = provenance.directory_get_date_in_isochrone_frontier(directory) | root_date = provenance.directory_get_date_in_isochrone_frontier(directory) | ||||
root = IsochroneNode(directory, dbdate=root_date) | root = IsochroneNode(directory, dbdate=root_date) | ||||
stack = [root] | stack = [root] | ||||
logging.debug( | logging.debug( | ||||
f"Recursively creating isochrone graph for revision {revision.id.hex()}..." | "Recursively creating isochrone graph for revision %s...", revision.id.hex() | ||||
) | ) | ||||
fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date} | fdates: Dict[Sha1Git, datetime] = {} # map {file_id: date} | ||||
while stack: | while stack: | ||||
current = stack.pop() | current = stack.pop() | ||||
if current.dbdate is None or current.dbdate > revision.date: | if current.dbdate is None or current.dbdate > revision.date: | ||||
# If current directory has an associated date in the isochrone frontier that | # If current directory has an associated date in the isochrone frontier that | ||||
# is greater or equal to the current revision's one, it should be ignored as | # is greater or equal to the current revision's one, it should be ignored as | ||||
# the revision is being processed out of order. | # the revision is being processed out of order. | ||||
if current.dbdate is not None and current.dbdate > revision.date: | if current.dbdate is not None and current.dbdate > revision.date: | ||||
logging.debug( | logging.debug( | ||||
f"Invalidating frontier on {current.entry.id.hex()}" | "Invalidating frontier on %s (date %s) " | ||||
f" (date {current.dbdate})" | "when processing revision %s (date %s)", | ||||
f" when processing revision {revision.id.hex()}" | current.entry.id.hex(), | ||||
f" (date {revision.date})" | current.dbdate, | ||||
revision.id.hex(), | |||||
revision.date, | |||||
) | ) | ||||
current.invalidate() | current.invalidate() | ||||
# Pre-query all known dates for directories in the current directory | # Pre-query all known dates for directories in the current directory | ||||
# for the provenance object to have them cached and (potentially) improve | # for the provenance object to have them cached and (potentially) improve | ||||
# performance. | # performance. | ||||
current.entry.retrieve_children(archive) | current.entry.retrieve_children(archive) | ||||
ddates = provenance.directory_get_dates_in_isochrone_frontier( | ddates = provenance.directory_get_dates_in_isochrone_frontier( | ||||
current.entry.dirs | current.entry.dirs | ||||
) | ) | ||||
for dir in current.entry.dirs: | for dir in current.entry.dirs: | ||||
# Recursively analyse subdirectory nodes | # Recursively analyse subdirectory nodes | ||||
node = current.add_directory(dir, date=ddates.get(dir.id, None)) | node = current.add_directory(dir, date=ddates.get(dir.id, None)) | ||||
stack.append(node) | stack.append(node) | ||||
fdates.update(provenance.content_get_early_dates(current.entry.files)) | fdates.update(provenance.content_get_early_dates(current.entry.files)) | ||||
logging.debug( | logging.debug( | ||||
f"Isochrone graph for revision {revision.id.hex()} successfully created!" | "Isochrone graph for revision %s successfully created!", revision.id.hex() | ||||
) | ) | ||||
# Precalculate max known date for each node in the graph (only directory nodes are | # Precalculate max known date for each node in the graph (only directory nodes are | ||||
# pushed to the stack). | # pushed to the stack). | ||||
logging.debug(f"Computing maxdates for revision {revision.id.hex()}...") | logging.debug("Computing maxdates for revision %s...", revision.id.hex()) | ||||
stack = [root] | stack = [root] | ||||
while stack: | while stack: | ||||
current = stack.pop() | current = stack.pop() | ||||
# Current directory node is known if it already has an assigned date (ie. it was | # Current directory node is known if it already has an assigned date (ie. it was | ||||
# already seen as an isochrone frontier). | # already seen as an isochrone frontier). | ||||
if current.known: | if current.known: | ||||
assert current.maxdate is None | assert current.maxdate is None | ||||
Show All 34 Lines | while stack: | ||||
# possible | # possible | ||||
and all((file.id in fdates) for file in current.entry.files) | and all((file.id in fdates) for file in current.entry.files) | ||||
) | ) | ||||
else: | else: | ||||
# at least one content is being processed out-of-order, then current | # at least one content is being processed out-of-order, then current | ||||
# node should be treated as unknown | # node should be treated as unknown | ||||
current.maxdate = revision.date | current.maxdate = revision.date | ||||
current.known = False | current.known = False | ||||
logging.debug(f"Maxdates for revision {revision.id.hex()} successfully computed!") | logging.debug("Maxdates for revision %s successfully computed!", revision.id.hex()) | ||||
return root | return root |