Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
from itertools import islice | from itertools import islice | ||||
import logging | import logging | ||||
import os | import os | ||||
import time | import time | ||||
from typing import Iterable, Iterator, List, Optional, Tuple | from typing import Iterable, Iterator, List, Optional, Tuple | ||||
import iso8601 | import iso8601 | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
from .graph import IsochroneNode, build_isochrone_graph | from .graph import IsochroneNode, build_isochrone_graph | ||||
from .model import DirectoryEntry, RevisionEntry | from .model import DirectoryEntry, RevisionEntry | ||||
from .provenance import ProvenanceInterface | from .provenance import ProvenanceInterface | ||||
class CSVRevisionIterator: | class CSVRevisionIterator: | ||||
▲ Show 20 Lines • Show All 47 Lines • ▼ Show 20 Lines | ) -> None: | ||||
start = time.time() | start = time.time() | ||||
for revision in revisions: | for revision in revisions: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root is not None | assert revision.root is not None | ||||
# Processed content starting from the revision's root directory. | # Processed content starting from the revision's root directory. | ||||
date = provenance.revision_get_early_date(revision) | date = provenance.revision_get_early_date(revision) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
logging.debug( | logging.debug( | ||||
f"Processing revisions {hash_to_hex(revision.id)}" | f"Processing revisions {revision.id.hex()}" | ||||
f" (known date {date} / revision date {revision.date})..." | f" (known date {date} / revision date {revision.date})..." | ||||
) | ) | ||||
graph = build_isochrone_graph( | graph = build_isochrone_graph( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
DirectoryEntry(revision.root), | DirectoryEntry(revision.root), | ||||
) | ) | ||||
# TODO: add file size filtering | # TODO: add file size filtering | ||||
revision_process_content( | revision_process_content( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
graph, | graph, | ||||
trackall=trackall, | trackall=trackall, | ||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
) | ) | ||||
done = time.time() | done = time.time() | ||||
if commit: | if commit: | ||||
provenance.commit() | provenance.commit() | ||||
stop = time.time() | stop = time.time() | ||||
logging.debug( | logging.debug( | ||||
f"Revisions {';'.join([hash_to_hex(revision.id) for revision in revisions])} " | f"Revisions {';'.join([revision.id.hex() for revision in revisions])} " | ||||
f" were processed in {stop - start} secs (commit took {stop - done} secs)!" | f" were processed in {stop - start} secs (commit took {stop - done} secs)!" | ||||
) | ) | ||||
# logging.critical( | # logging.critical( | ||||
# ";".join([hash_to_hex(revision.id) for revision in revisions]) | # ";".join([revision.id.hex() for revision in revisions]) | ||||
# + f",{stop - start},{stop - done}" | # + f",{stop - start},{stop - done}" | ||||
# ) | # ) | ||||
def revision_process_content( | def revision_process_content( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
▲ Show 20 Lines • Show All 145 Lines • Show Last 20 Lines |