Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
Show First 20 Lines • Show All 63 Lines • ▼ Show 20 Lines | |||||
) -> None: | ) -> None: | ||||
for revision in revisions: | for revision in revisions: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root is not None | assert revision.root is not None | ||||
# Processed content starting from the revision's root directory. | # Processed content starting from the revision's root directory. | ||||
date = provenance.revision_get_date(revision) | date = provenance.revision_get_date(revision) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
graph = build_isochrone_graph( | graph = build_isochrone_graph( | ||||
archive, | |||||
provenance, | provenance, | ||||
archive, | |||||
revision, | revision, | ||||
DirectoryEntry(revision.root), | DirectoryEntry(revision.root), | ||||
minsize=minsize, | minsize=minsize, | ||||
) | ) | ||||
revision_process_content( | revision_process_content( | ||||
archive, | |||||
provenance, | provenance, | ||||
archive, | |||||
revision, | revision, | ||||
graph, | graph, | ||||
trackall=trackall, | trackall=trackall, | ||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
minsize=minsize, | minsize=minsize, | ||||
) | ) | ||||
if commit: | if commit: | ||||
provenance.flush() | provenance.flush() | ||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | ||||
def revision_process_content( | def revision_process_content( | ||||
archive: ArchiveInterface, | |||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | |||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
graph: IsochroneNode, | graph: IsochroneNode, | ||||
trackall: bool = True, | trackall: bool = True, | ||||
lower: bool = True, | lower: bool = True, | ||||
mindepth: int = 1, | mindepth: int = 1, | ||||
minsize: int = 0, | minsize: int = 0, | ||||
) -> None: | ) -> None: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
Show All 27 Lines | while stack: | ||||
provenance.directory_set_date_in_isochrone_frontier( | provenance.directory_set_date_in_isochrone_frontier( | ||||
current.entry, current.maxdate | current.entry, current.maxdate | ||||
) | ) | ||||
if trackall: | if trackall: | ||||
provenance.directory_add_to_revision( | provenance.directory_add_to_revision( | ||||
revision, current.entry, current.path | revision, current.entry, current.path | ||||
) | ) | ||||
flatten_directory( | flatten_directory( | ||||
archive, provenance, current.entry, minsize=minsize | provenance, archive, current.entry, minsize=minsize | ||||
) | ) | ||||
else: | else: | ||||
# If current node is an invalidated frontier, update its date for future | # If current node is an invalidated frontier, update its date for future | ||||
# revisions to get the proper value. | # revisions to get the proper value. | ||||
if current.invalid: | if current.invalid: | ||||
provenance.directory_set_date_in_isochrone_frontier( | provenance.directory_set_date_in_isochrone_frontier( | ||||
current.entry, current.maxdate | current.entry, current.maxdate | ||||
) | ) | ||||
# No point moving the frontier here. Either there are no files or they | # No point moving the frontier here. Either there are no files or they | ||||
# are being seen for the first time here. Add all blobs to current | # are being seen for the first time here. Add all blobs to current | ||||
# revision updating date if necessary, and recursively analyse | # revision updating date if necessary, and recursively analyse | ||||
# subdirectories as candidates to the outer frontier. | # subdirectories as candidates to the outer frontier. | ||||
for blob in current.entry.files: | for blob in current.entry.files: | ||||
date = provenance.content_get_early_date(blob) | date = provenance.content_get_early_date(blob) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
provenance.content_set_early_date(blob, revision.date) | provenance.content_set_early_date(blob, revision.date) | ||||
provenance.content_add_to_revision(revision, blob, current.path) | provenance.content_add_to_revision(revision, blob, current.path) | ||||
for child in current.children: | for child in current.children: | ||||
stack.append(child) | stack.append(child) | ||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"}) | ||||
def flatten_directory( | def flatten_directory( | ||||
archive: ArchiveInterface, | |||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | |||||
directory: DirectoryEntry, | directory: DirectoryEntry, | ||||
minsize: int = 0, | minsize: int = 0, | ||||
) -> None: | ) -> None: | ||||
"""Recursively retrieve all the files of 'directory' and insert them in the | """Recursively retrieve all the files of 'directory' and insert them in the | ||||
'provenance' database in the 'content_to_directory' table. | 'provenance' database in the 'content_to_directory' table. | ||||
""" | """ | ||||
stack = [(directory, b"")] | stack = [(directory, b"")] | ||||
while stack: | while stack: | ||||
▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines |