diff --git a/swh/provenance/revisited.gr.py b/swh/provenance/revisited.gr.py new file mode 100644 index 0000000..acbe35d --- /dev/null +++ b/swh/provenance/revisited.gr.py @@ -0,0 +1,106 @@ +def revision_process_content( + provenance: ProvenanceInterface, + revision: RevisionEntry, + directory: DirectoryEntry +): + stack = [(directory, directory.name)] + + while stack: + # Get next directory to process and query its date right before + # processing to be sure we get the most recently updated value. + current, path = stack.pop() + date = provenance.directory_get_date_in_isochrone_frontier(current) + + if date is None: + # The directory has never been seen on the isochrone graph of a + # revision. Its children should be checked. + blobs = [child for child in iter(current) if isinstance(child, FileEntry)] + dirs = [child for child in iter(current) if isinstance(child, DirectoryEntry)] + + blobdates = provenance.content_get_early_dates(blobs) + dirdates = provenance.directory_get_early_dates(dirs) + + # Get the list of ids with no duplicates to ensure we have + # available dates for all the elements. This prevents takign a + # wrong decision when a blob occurres more than once in the same + # directory. + ids = list(dict.fromkeys([child.id for child in blobs + dirs])) + if ids: + dates = list(blobdates.values()) + list(dirdates.values()) + + if len(dates) == len(ids): + # All child nodes of current directory are already known. + maxdate = max(dates) < revision.date + # GR : ok all childs have a known date + + if maxdate < revision.date: + # The directory belongs to the isochrone frontier of the + # current revision, and this is the first time it appears + # as such. + # FIXME GR : GR do not agree. + # : it means that this directory is outside the isochrone frontier + # : but may be not at the frontier if its parent directory is such that maxdate