Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/revision.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import logging | |||||
import os | import os | ||||
import time | |||||
from typing import Generator, Iterable, Iterator, List, Optional, Tuple | from typing import Generator, Iterable, Iterator, List, Optional, Tuple | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
from .graph import IsochroneNode, build_isochrone_graph | from .graph import IsochroneNode, build_isochrone_graph | ||||
from .interface import ProvenanceInterface | from .interface import ProvenanceInterface | ||||
from .model import DirectoryEntry, RevisionEntry | from .model import DirectoryEntry, RevisionEntry | ||||
Show All 36 Lines | def revision_add( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
revisions: List[RevisionEntry], | revisions: List[RevisionEntry], | ||||
trackall: bool = True, | trackall: bool = True, | ||||
lower: bool = True, | lower: bool = True, | ||||
mindepth: int = 1, | mindepth: int = 1, | ||||
commit: bool = True, | commit: bool = True, | ||||
) -> None: | ) -> None: | ||||
start = time.time() | |||||
for revision in revisions: | for revision in revisions: | ||||
assert revision.date is not None | assert revision.date is not None | ||||
assert revision.root is not None | assert revision.root is not None | ||||
# Processed content starting from the revision's root directory. | # Processed content starting from the revision's root directory. | ||||
date = provenance.revision_get_date(revision) | date = provenance.revision_get_date(revision) | ||||
if date is None or revision.date < date: | if date is None or revision.date < date: | ||||
logging.debug( | |||||
f"Processing revisions {revision.id.hex()}" | |||||
f" (known date {date} / revision date {revision.date})..." | |||||
) | |||||
graph = build_isochrone_graph( | graph = build_isochrone_graph( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
DirectoryEntry(revision.root), | DirectoryEntry(revision.root), | ||||
) | ) | ||||
# TODO: add file size filtering | # TODO: add file size filtering | ||||
revision_process_content( | revision_process_content( | ||||
archive, | archive, | ||||
provenance, | provenance, | ||||
revision, | revision, | ||||
graph, | graph, | ||||
trackall=trackall, | trackall=trackall, | ||||
lower=lower, | lower=lower, | ||||
mindepth=mindepth, | mindepth=mindepth, | ||||
) | ) | ||||
done = time.time() | |||||
if commit: | if commit: | ||||
provenance.flush() | provenance.flush() | ||||
stop = time.time() | |||||
logging.debug( | |||||
f"Revisions {';'.join([revision.id.hex() for revision in revisions])} " | |||||
f" were processed in {stop - start} secs (commit took {stop - done} secs)!" | |||||
) | |||||
def revision_process_content( | def revision_process_content( | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
revision: RevisionEntry, | revision: RevisionEntry, | ||||
graph: IsochroneNode, | graph: IsochroneNode, | ||||
trackall: bool = True, | trackall: bool = True, | ||||
▲ Show 20 Lines • Show All 143 Lines • Show Last 20 Lines |