Page MenuHomeSoftware Heritage

D8593.diff
No OneTemporary

D8593.diff

diff --git a/swh/provenance/algos/__init__.py b/swh/provenance/algos/__init__.py
new file mode 100644
diff --git a/swh/provenance/directory.py b/swh/provenance/algos/directory.py
rename from swh/provenance/directory.py
rename to swh/provenance/algos/directory.py
--- a/swh/provenance/directory.py
+++ b/swh/provenance/algos/directory.py
@@ -8,10 +8,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry
REVISION_DURATION_METRIC = "swh_provenance_directory_duration_seconds"
diff --git a/swh/provenance/graph.py b/swh/provenance/algos/isochrone_graph.py
rename from swh/provenance/graph.py
rename to swh/provenance/algos/isochrone_graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/algos/isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
@@ -26,50 +25,6 @@
pass
-class HistoryGraph:
- @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
- def __init__(
- self,
- archive: ArchiveInterface,
- revision: RevisionEntry,
- ) -> None:
- self.head_id = revision.id
- self._nodes: Set[Sha1Git] = set()
- # rev -> set(parents)
- self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
-
- stack = {self.head_id}
- while stack:
- current = stack.pop()
-
- if current not in self._nodes:
- self._nodes.add(current)
- self._edges.setdefault(current, set())
- for rev, parent in archive.revision_get_some_outbound_edges(current):
- self._nodes.add(rev)
- self._edges.setdefault(rev, set()).add(parent)
- stack.add(parent)
-
- # don't process nodes for which we've already retrieved outbound edges
- stack -= self._nodes
-
- def parent_ids(self) -> Set[Sha1Git]:
- """Get all the known parent ids in the current graph"""
- return self._nodes - {self.head_id}
-
- def __str__(self) -> str:
- return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "head": self.head_id.hex(),
- "graph": {
- node.hex(): sorted(parent.hex() for parent in parents)
- for node, parents in self._edges.items()
- },
- }
-
-
class IsochroneNode:
def __init__(
self,
diff --git a/swh/provenance/origin.py b/swh/provenance/algos/origin.py
rename from swh/provenance/origin.py
rename to swh/provenance/algos/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/algos/origin.py
@@ -6,17 +6,15 @@
from datetime import datetime
from itertools import islice
import logging
-from typing import Generator, Iterable, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, Tuple
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
-from .archive import ArchiveInterface
-from .graph import HistoryGraph
-from .interface import ProvenanceInterface
-from .model import OriginEntry
-
-ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_duration_seconds"
LOG_FORMAT = (
"%(levelname) -10s %(asctime)s %(name) -30s %(funcName) "
@@ -26,6 +24,50 @@
LOGGER = logging.getLogger(__name__)
+class HistoryGraph:
+ @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "HistoryGraph"})
+ def __init__(
+ self,
+ archive: ArchiveInterface,
+ revision: RevisionEntry,
+ ) -> None:
+ self.head_id = revision.id
+ self._nodes: Set[Sha1Git] = set()
+ # rev -> set(parents)
+ self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
+
+ stack = {self.head_id}
+ while stack:
+ current = stack.pop()
+
+ if current not in self._nodes:
+ self._nodes.add(current)
+ self._edges.setdefault(current, set())
+ for rev, parent in archive.revision_get_some_outbound_edges(current):
+ self._nodes.add(rev)
+ self._edges.setdefault(rev, set()).add(parent)
+ stack.add(parent)
+
+ # don't process nodes for which we've already retrieved outbound edges
+ stack -= self._nodes
+
+ def parent_ids(self) -> Set[Sha1Git]:
+ """Get all the known parent ids in the current graph"""
+ return self._nodes - {self.head_id}
+
+ def __str__(self) -> str:
+ return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
+
+ def as_dict(self) -> Dict[str, Any]:
+ return {
+ "head": self.head_id.hex(),
+ "graph": {
+ node.hex(): sorted(parent.hex() for parent in parents)
+ for node, parents in self._edges.items()
+ },
+ }
+
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
file.
diff --git a/swh/provenance/revision.py b/swh/provenance/algos/revision.py
rename from swh/provenance/revision.py
rename to swh/provenance/algos/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/algos/revision.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,14 +9,14 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
-from .archive import ArchiveInterface
from .directory import directory_flatten
-from .graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from .isochrone_graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+REVISION_DURATION_METRIC = "swh_provenance_revision_duration_seconds"
logger = logging.getLogger(__name__)
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -164,7 +164,7 @@
)
@click.pass_context
def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]):
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -298,7 +298,7 @@
min_size: int,
max_directory_size: int,
) -> None:
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -456,7 +456,7 @@
)
@click.pass_context
def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
- from swh.provenance.directory import directory_flatten_range
+ from swh.provenance.algos.directory import directory_flatten_range
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -495,9 +495,9 @@
min_size: int,
) -> None:
"""Process a provided list of directories in the isochrone frontier."""
- from . import get_provenance
- from .archive import get_archive
- from .directory import CSVDirectoryIterator, directory_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.directory import CSVDirectoryIterator, directory_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
directories_provider = generate_directory_ids(filename)
@@ -579,9 +579,9 @@
min_size: int,
) -> None:
"""Process a provided list of revisions."""
- from . import get_provenance
- from .archive import get_archive
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
revisions_provider = generate_revision_tuples(filename)
@@ -626,9 +626,9 @@
@deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead")
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None:
"""Process a provided list of origins."""
- from . import get_provenance
- from .archive import get_archive
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
origins_provider = generate_origin_tuples(filename)
diff --git a/swh/provenance/journal_client.py b/swh/provenance/journal_client.py
--- a/swh/provenance/journal_client.py
+++ b/swh/provenance/journal_client.py
@@ -13,11 +13,11 @@
import sentry_sdk
from swh.model.model import TimestampWithTimezone
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.revision import revision_add
EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
diff --git a/swh/provenance/tests/test_consistency.py b/swh/provenance/tests/test_consistency.py
--- a/swh/provenance/tests/test_consistency.py
+++ b/swh/provenance/tests/test_consistency.py
@@ -4,10 +4,10 @@
# See top-level LICENSE file for more information
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import DirectoryData, ProvenanceResult
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -8,8 +8,8 @@
from typing import Tuple
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.directory import directory_add, directory_flatten_range
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add, directory_flatten_range
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, FileEntry
from swh.provenance.storage.interface import DirectoryData, RelationData, RelationType
diff --git a/swh/provenance/tests/test_directory_iterator.py b/swh/provenance/tests/test_directory_iterator.py
--- a/swh/provenance/tests/test_directory_iterator.py
+++ b/swh/provenance/tests/test_directory_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.directory import CSVDirectoryIterator
+from swh.provenance.algos.directory import CSVDirectoryIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -7,11 +7,10 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.origin import HistoryGraph, origin_add_revision
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import HistoryGraph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add_revision
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -11,11 +11,15 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.isochrone_graph import (
+ DirectoryTooLarge,
+ IsochroneNode,
+ build_isochrone_graph,
+)
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_origin_iterator.py b/swh/provenance/tests/test_origin_iterator.py
--- a/swh/provenance/tests/test_origin_iterator.py
+++ b/swh/provenance/tests/test_origin_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.origin import CSVOriginIterator
+from swh.provenance.algos.origin import CSVOriginIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.algos.origin import (
iter_origin_visit_statuses,
diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py
--- a/swh/provenance/tests/test_origin_revision_layer.py
+++ b/swh/provenance/tests/test_origin_revision_layer.py
@@ -11,10 +11,10 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.origin import origin_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry
-from swh.provenance.origin import origin_add
from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -12,12 +12,12 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Origin, Sha1Git
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
from swh.provenance.provenance import Provenance
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py
--- a/swh/provenance/tests/test_revision_content_layer.py
+++ b/swh/provenance/tests/test_revision_content_layer.py
@@ -11,11 +11,11 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.directory import directory_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import (
fill_storage,
diff --git a/swh/provenance/tests/test_revision_iterator.py b/swh/provenance/tests/test_revision_iterator.py
--- a/swh/provenance/tests/test_revision_iterator.py
+++ b/swh/provenance/tests/test_revision_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.revision import CSVRevisionIterator
+from swh.provenance.algos.revision import CSVRevisionIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
from swh.storage.interface import StorageInterface

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 1:51 PM (3 d, 4 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227539

Event Timeline