Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
Show First 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | def revision_add( | ||||
commit: bool = True, | commit: bool = True, | ||||
) -> None: | ) -> None: | ||||
for revision in revisions: | for revision in revisions: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root is not None | assert revision.root is not None | ||||
# Processed content starting from the revision's root directory. | # Processed content starting from the revision's root directory. | ||||
date = provenance.revision_get_date(revision) | date = provenance.revision_get_date(revision) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
with statsd.timed("swh.loader.graph-building-time"): | |||||
graph = build_isochrone_graph( | graph = build_isochrone_graph( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
DirectoryEntry(revision.root), | DirectoryEntry(revision.root), | ||||
) | ) | ||||
# TODO: add file size filtering | statsd.gauge("provenance.graph-total-compexity", graph.size) | ||||
if graph.size > 1: | |||||
# logging big revisions | |||||
with open("skipped-revs-1.txt", "a") as f: | |||||
# send them to seperate queue | |||||
f.write(str(revision) + "\n") | |||||
statsd.increment("swh.loader.revision-skipped") | |||||
continue | |||||
statsd.increment("swh.loader.revision-total") | |||||
with statsd.timed("swh.loader.revision-process-time"): | |||||
revision_process_content( | revision_process_content( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
graph, | graph, | ||||
trackall=trackall, | trackall=trackall, | ||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
) | ) | ||||
if commit: | if commit: | ||||
provenance.flush() | provenance.flush() | ||||
@statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | @statsd.timed(metric=REVISION_DURATION_METRIC, tags={"method": "process_content"}) | ||||
def revision_process_content( | def revision_process_content( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines |