Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123068
D8593.id31045.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
D8593.id31045.diff
View Options
diff --git a/swh/provenance/algos/__init__.py b/swh/provenance/algos/__init__.py
new file mode 100644
diff --git a/swh/provenance/directory.py b/swh/provenance/algos/directory.py
rename from swh/provenance/directory.py
rename to swh/provenance/algos/directory.py
--- a/swh/provenance/directory.py
+++ b/swh/provenance/algos/directory.py
@@ -8,10 +8,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry
REVISION_DURATION_METRIC = "swh_provenance_directory_duration_seconds"
diff --git a/swh/provenance/graph.py b/swh/provenance/algos/isochrone_graph.py
rename from swh/provenance/graph.py
rename to swh/provenance/algos/isochrone_graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/algos/isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
@@ -26,50 +25,6 @@
pass
-class HistoryGraph:
- @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
- def __init__(
- self,
- archive: ArchiveInterface,
- revision: RevisionEntry,
- ) -> None:
- self.head_id = revision.id
- self._nodes: Set[Sha1Git] = set()
- # rev -> set(parents)
- self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
-
- stack = {self.head_id}
- while stack:
- current = stack.pop()
-
- if current not in self._nodes:
- self._nodes.add(current)
- self._edges.setdefault(current, set())
- for rev, parent in archive.revision_get_some_outbound_edges(current):
- self._nodes.add(rev)
- self._edges.setdefault(rev, set()).add(parent)
- stack.add(parent)
-
- # don't process nodes for which we've already retrieved outbound edges
- stack -= self._nodes
-
- def parent_ids(self) -> Set[Sha1Git]:
- """Get all the known parent ids in the current graph"""
- return self._nodes - {self.head_id}
-
- def __str__(self) -> str:
- return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "head": self.head_id.hex(),
- "graph": {
- node.hex(): sorted(parent.hex() for parent in parents)
- for node, parents in self._edges.items()
- },
- }
-
-
class IsochroneNode:
def __init__(
self,
diff --git a/swh/provenance/origin.py b/swh/provenance/algos/origin.py
rename from swh/provenance/origin.py
rename to swh/provenance/algos/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/algos/origin.py
@@ -6,17 +6,15 @@
from datetime import datetime
from itertools import islice
import logging
-from typing import Generator, Iterable, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, Tuple
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
-from .archive import ArchiveInterface
-from .graph import HistoryGraph
-from .interface import ProvenanceInterface
-from .model import OriginEntry
-
-ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_duration_seconds"
LOG_FORMAT = (
"%(levelname) -10s %(asctime)s %(name) -30s %(funcName) "
@@ -26,6 +24,50 @@
LOGGER = logging.getLogger(__name__)
+class HistoryGraph:
+ @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "HistoryGraph"})
+ def __init__(
+ self,
+ archive: ArchiveInterface,
+ revision: RevisionEntry,
+ ) -> None:
+ self.head_id = revision.id
+ self._nodes: Set[Sha1Git] = set()
+ # rev -> set(parents)
+ self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
+
+ stack = {self.head_id}
+ while stack:
+ current = stack.pop()
+
+ if current not in self._nodes:
+ self._nodes.add(current)
+ self._edges.setdefault(current, set())
+ for rev, parent in archive.revision_get_some_outbound_edges(current):
+ self._nodes.add(rev)
+ self._edges.setdefault(rev, set()).add(parent)
+ stack.add(parent)
+
+ # don't process nodes for which we've already retrieved outbound edges
+ stack -= self._nodes
+
+ def parent_ids(self) -> Set[Sha1Git]:
+ """Get all the known parent ids in the current graph"""
+ return self._nodes - {self.head_id}
+
+ def __str__(self) -> str:
+ return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
+
+ def as_dict(self) -> Dict[str, Any]:
+ return {
+ "head": self.head_id.hex(),
+ "graph": {
+ node.hex(): sorted(parent.hex() for parent in parents)
+ for node, parents in self._edges.items()
+ },
+ }
+
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
file.
diff --git a/swh/provenance/revision.py b/swh/provenance/algos/revision.py
rename from swh/provenance/revision.py
rename to swh/provenance/algos/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/algos/revision.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,14 +9,14 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
-from .archive import ArchiveInterface
from .directory import directory_flatten
-from .graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from .isochrone_graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+REVISION_DURATION_METRIC = "swh_provenance_revision_duration_seconds"
logger = logging.getLogger(__name__)
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -164,7 +164,7 @@
)
@click.pass_context
def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]):
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -298,7 +298,7 @@
min_size: int,
max_directory_size: int,
) -> None:
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -456,7 +456,7 @@
)
@click.pass_context
def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
- from swh.provenance.directory import directory_flatten_range
+ from swh.provenance.algos.directory import directory_flatten_range
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -495,9 +495,9 @@
min_size: int,
) -> None:
"""Process a provided list of directories in the isochrone frontier."""
- from . import get_provenance
- from .archive import get_archive
- from .directory import CSVDirectoryIterator, directory_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.directory import CSVDirectoryIterator, directory_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
directories_provider = generate_directory_ids(filename)
@@ -579,9 +579,9 @@
min_size: int,
) -> None:
"""Process a provided list of revisions."""
- from . import get_provenance
- from .archive import get_archive
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
revisions_provider = generate_revision_tuples(filename)
@@ -626,9 +626,9 @@
@deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead")
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None:
"""Process a provided list of origins."""
- from . import get_provenance
- from .archive import get_archive
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
origins_provider = generate_origin_tuples(filename)
diff --git a/swh/provenance/journal_client.py b/swh/provenance/journal_client.py
--- a/swh/provenance/journal_client.py
+++ b/swh/provenance/journal_client.py
@@ -13,11 +13,11 @@
import sentry_sdk
from swh.model.model import TimestampWithTimezone
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.revision import revision_add
EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
diff --git a/swh/provenance/tests/test_consistency.py b/swh/provenance/tests/test_consistency.py
--- a/swh/provenance/tests/test_consistency.py
+++ b/swh/provenance/tests/test_consistency.py
@@ -4,10 +4,10 @@
# See top-level LICENSE file for more information
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import DirectoryData, ProvenanceResult
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -8,8 +8,8 @@
from typing import Tuple
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.directory import directory_add, directory_flatten_range
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add, directory_flatten_range
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, FileEntry
from swh.provenance.storage.interface import DirectoryData, RelationData, RelationType
diff --git a/swh/provenance/tests/test_directory_iterator.py b/swh/provenance/tests/test_directory_iterator.py
--- a/swh/provenance/tests/test_directory_iterator.py
+++ b/swh/provenance/tests/test_directory_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.directory import CSVDirectoryIterator
+from swh.provenance.algos.directory import CSVDirectoryIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -7,11 +7,10 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.origin import HistoryGraph, origin_add_revision
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import HistoryGraph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add_revision
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -11,11 +11,15 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.isochrone_graph import (
+ DirectoryTooLarge,
+ IsochroneNode,
+ build_isochrone_graph,
+)
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_origin_iterator.py b/swh/provenance/tests/test_origin_iterator.py
--- a/swh/provenance/tests/test_origin_iterator.py
+++ b/swh/provenance/tests/test_origin_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.origin import CSVOriginIterator
+from swh.provenance.algos.origin import CSVOriginIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.algos.origin import (
iter_origin_visit_statuses,
diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py
--- a/swh/provenance/tests/test_origin_revision_layer.py
+++ b/swh/provenance/tests/test_origin_revision_layer.py
@@ -11,10 +11,10 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.origin import origin_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry
-from swh.provenance.origin import origin_add
from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -12,12 +12,12 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Origin, Sha1Git
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
from swh.provenance.provenance import Provenance
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py
--- a/swh/provenance/tests/test_revision_content_layer.py
+++ b/swh/provenance/tests/test_revision_content_layer.py
@@ -11,11 +11,11 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.directory import directory_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import (
fill_storage,
diff --git a/swh/provenance/tests/test_revision_iterator.py b/swh/provenance/tests/test_revision_iterator.py
--- a/swh/provenance/tests/test_revision_iterator.py
+++ b/swh/provenance/tests/test_revision_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.revision import CSVRevisionIterator
+from swh.provenance.algos.revision import CSVRevisionIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
from swh.storage.interface import StorageInterface
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Dec 17, 7:02 PM (1 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227539
Attached To
D8593: Reorganize the code
Event Timeline
Log In to Comment