Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/origin.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from itertools import islice | from itertools import islice | ||||
import logging | |||||
import time | |||||
from typing import Generator, Iterable, Iterator, List, Optional, Tuple | from typing import Generator, Iterable, Iterator, List, Optional, Tuple | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from .archive import ArchiveInterface | from .archive import ArchiveInterface | ||||
from .graph import HistoryGraph | from .graph import HistoryGraph | ||||
from .interface import ProvenanceInterface | from .interface import ProvenanceInterface | ||||
from .model import OriginEntry, RevisionEntry | from .model import OriginEntry, RevisionEntry | ||||
Show All 27 Lines | def __iter__(self) -> Generator[OriginEntry, None, None]: | ||||
return (OriginEntry(url, snapshot) for url, snapshot in self.statuses) | return (OriginEntry(url, snapshot) for url, snapshot in self.statuses) | ||||
def origin_add( | def origin_add( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
origins: List[OriginEntry], | origins: List[OriginEntry], | ||||
) -> None: | ) -> None: | ||||
start = time.time() | |||||
for origin in origins: | for origin in origins: | ||||
provenance.origin_add(origin) | provenance.origin_add(origin) | ||||
origin.retrieve_revisions(archive) | origin.retrieve_revisions(archive) | ||||
for revision in origin.revisions: | for revision in origin.revisions: | ||||
graph = HistoryGraph(archive, provenance, revision) | graph = HistoryGraph(archive, provenance, revision) | ||||
origin_add_revision(provenance, origin, graph) | origin_add_revision(provenance, origin, graph) | ||||
done = time.time() | |||||
provenance.flush() | provenance.flush() | ||||
stop = time.time() | |||||
logging.debug( | |||||
"Origins " | |||||
";".join([origin.id.hex() + ":" + origin.snapshot.hex() for origin in origins]) | |||||
+ f" were processed in {stop - start} secs (commit took {stop - done} secs)!" | |||||
) | |||||
def origin_add_revision( | def origin_add_revision( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
origin: OriginEntry, | origin: OriginEntry, | ||||
graph: HistoryGraph, | graph: HistoryGraph, | ||||
) -> None: | ) -> None: | ||||
# XXX: simplified version of the origin-revision algorithm. This is generating flat | # XXX: simplified version of the origin-revision algorithm. This is generating flat | ||||
Show All 33 Lines |