Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
Show First 20 Lines • Show All 53 Lines • ▼ Show 20 Lines | |||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "main"}) | ||||
def revision_add( | def revision_add( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
revisions: List[RevisionEntry], | revisions: List[RevisionEntry], | ||||
trackall: bool = True, | trackall: bool = True, | ||||
lower: bool = True, | lower: bool = True, | ||||
mindepth: int = 1, | mindepth: int = 1, | ||||
minsize: int = 0, | |||||
commit: bool = True, | commit: bool = True, | ||||
) -> None: | ) -> None: | ||||
for revision in revisions: | for revision in revisions: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root is not None | assert revision.root is not None | ||||
# Processed content starting from the revision's root directory. | # Processed content starting from the revision's root directory. | ||||
date = provenance.revision_get_date(revision) | date = provenance.revision_get_date(revision) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
graph = build_isochrone_graph( | graph = build_isochrone_graph( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
DirectoryEntry(revision.root), | DirectoryEntry(revision.root), | ||||
minsize=minsize, | |||||
) | ) | ||||
# TODO: add file size filtering | |||||
revision_process_content( | revision_process_content( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
graph, | graph, | ||||
trackall=trackall, | trackall=trackall, | ||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
minsize=minsize, | |||||
) | ) | ||||
if commit: | if commit: | ||||
provenance.flush() | provenance.flush() | ||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | ||||
def revision_process_content( | def revision_process_content( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
graph: IsochroneNode, | graph: IsochroneNode, | ||||
trackall: bool = True, | trackall: bool = True, | ||||
lower: bool = True, | lower: bool = True, | ||||
mindepth: int = 1, | mindepth: int = 1, | ||||
minsize: int = 0, | |||||
) -> None: | ) -> None: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
provenance.revision_add(revision) | provenance.revision_add(revision) | ||||
stack = [graph] | stack = [graph] | ||||
while stack: | while stack: | ||||
current = stack.pop() | current = stack.pop() | ||||
if current.dbdate is not None: | if current.dbdate is not None: | ||||
Show All 20 Lines | while stack: | ||||
# frontier. | # frontier. | ||||
provenance.directory_set_date_in_isochrone_frontier( | provenance.directory_set_date_in_isochrone_frontier( | ||||
current.entry, current.maxdate | current.entry, current.maxdate | ||||
) | ) | ||||
if trackall: | if trackall: | ||||
provenance.directory_add_to_revision( | provenance.directory_add_to_revision( | ||||
revision, current.entry, current.path | revision, current.entry, current.path | ||||
) | ) | ||||
flatten_directory(archive, provenance, current.entry) | flatten_directory( | ||||
archive, provenance, current.entry, minsize=minsize | |||||
) | |||||
else: | else: | ||||
# If current node is an invalidated frontier, update its date for future | # If current node is an invalidated frontier, update its date for future | ||||
# revisions to get the proper value. | # revisions to get the proper value. | ||||
if current.invalid: | if current.invalid: | ||||
provenance.directory_set_date_in_isochrone_frontier( | provenance.directory_set_date_in_isochrone_frontier( | ||||
current.entry, current.maxdate | current.entry, current.maxdate | ||||
) | ) | ||||
# No point moving the frontier here. Either there are no files or they | # No point moving the frontier here. Either there are no files or they | ||||
Show All 9 Lines | while stack: | ||||
stack.append(child) | stack.append(child) | ||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "flatten_directory"}) | ||||
def flatten_directory( | def flatten_directory( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
directory: DirectoryEntry, | directory: DirectoryEntry, | ||||
minsize: int = 0, | |||||
) -> None: | ) -> None: | ||||
"""Recursively retrieve all the files of 'directory' and insert them in the | """Recursively retrieve all the files of 'directory' and insert them in the | ||||
'provenance' database in the 'content_to_directory' table. | 'provenance' database in the 'content_to_directory' table. | ||||
""" | """ | ||||
stack = [(directory, b"")] | stack = [(directory, b"")] | ||||
while stack: | while stack: | ||||
current, prefix = stack.pop() | current, prefix = stack.pop() | ||||
current.retrieve_children(archive) | current.retrieve_children(archive, minsize=minsize) | ||||
for f_child in current.files: | for f_child in current.files: | ||||
# Add content to the directory with the computed prefix. | # Add content to the directory with the computed prefix. | ||||
provenance.content_add_to_directory(directory, f_child, prefix) | provenance.content_add_to_directory(directory, f_child, prefix) | ||||
for d_child in current.dirs: | for d_child in current.dirs: | ||||
# Recursively walk the child directory. | # Recursively walk the child directory. | ||||
stack.append((d_child, os.path.join(prefix, d_child.name))) | stack.append((d_child, os.path.join(prefix, d_child.name))) | ||||
▲ Show 20 Lines • Show All 64 Lines • Show Last 20 Lines |