Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123682
D8593.id31038.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
62 KB
Subscribers
None
D8593.id31038.diff
View Options
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -5,66 +5,8 @@
from __future__ import annotations
-from typing import TYPE_CHECKING
-import warnings
-
-if TYPE_CHECKING:
- from .archive import ArchiveInterface
- from .interface import ProvenanceInterface, ProvenanceStorageInterface
-
-
-def get_archive(cls: str, **kwargs) -> ArchiveInterface:
- """Get an archive object of class ``cls`` with arguments ``args``.
-
- Args:
- cls: archive's class, either 'api', 'direct' or 'graph'
- args: dictionary of arguments passed to the archive class constructor
-
- Returns:
- an instance of archive object (either using swh.storage API or direct
- queries to the archive's database)
-
- Raises:
- :cls:`ValueError` if passed an unknown archive class.
- """
- if cls == "api":
- from swh.storage import get_storage
-
- from .storage.archive import ArchiveStorage
-
- return ArchiveStorage(get_storage(**kwargs["storage"]))
-
- elif cls == "direct":
- from swh.core.db import BaseDb
-
- from .postgresql.archive import ArchivePostgreSQL
-
- return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
-
- elif cls == "graph":
- try:
- from swh.storage import get_storage
-
- from .swhgraph.archive import ArchiveGraph
-
- return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"]))
-
- except ModuleNotFoundError:
- raise EnvironmentError(
- "Graph configuration required but module is not installed."
- )
- elif cls == "multiplexer":
-
- from .multiplexer.archive import ArchiveMultiplexed
-
- archives = []
- for ctr, archive in enumerate(kwargs["archives"]):
- name = archive.pop("name", f"backend_{ctr}")
- archives.append((name, get_archive(**archive)))
-
- return ArchiveMultiplexed(archives)
- else:
- raise ValueError
+from .interface import ProvenanceInterface
+from .storage import get_provenance_storage
def get_provenance(**kwargs) -> ProvenanceInterface:
@@ -82,43 +24,4 @@
return Provenance(get_provenance_storage(**kwargs))
-def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface:
- """Get an archive object of class ``cls`` with arguments ``args``.
-
- Args:
- cls: storage's class, only 'local' is currently supported
- args: dictionary of arguments passed to the storage class constructor
-
- Returns:
- an instance of storage object
-
- Raises:
- :cls:`ValueError` if passed an unknown archive class.
- """
- if cls in ["local", "postgresql"]:
- from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
-
- if cls == "local":
- warnings.warn(
- '"local" class is deprecated for provenance storage, please '
- 'use "postgresql" class instead.',
- DeprecationWarning,
- )
-
- raise_on_commit = kwargs.get("raise_on_commit", False)
- return ProvenanceStoragePostgreSql(
- raise_on_commit=raise_on_commit, **kwargs["db"]
- )
-
- elif cls == "rabbitmq":
- from .api.client import ProvenanceStorageRabbitMQClient
-
- rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs)
- if TYPE_CHECKING:
- assert isinstance(rmq_storage, ProvenanceStorageInterface)
- return rmq_storage
-
- raise ValueError
-
-
get_datastore = get_provenance_storage
diff --git a/swh/provenance/api/__init__.py b/swh/provenance/algos/__init__.py
rename from swh/provenance/api/__init__.py
rename to swh/provenance/algos/__init__.py
diff --git a/swh/provenance/directory.py b/swh/provenance/algos/directory.py
rename from swh/provenance/directory.py
rename to swh/provenance/algos/directory.py
--- a/swh/provenance/directory.py
+++ b/swh/provenance/algos/directory.py
@@ -8,10 +8,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry
REVISION_DURATION_METRIC = "swh_provenance_directory_duration_seconds"
diff --git a/swh/provenance/graph.py b/swh/provenance/algos/isochrone_graph.py
rename from swh/provenance/graph.py
rename to swh/provenance/algos/isochrone_graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/algos/isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
@@ -26,50 +25,6 @@
pass
-class HistoryGraph:
- @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
- def __init__(
- self,
- archive: ArchiveInterface,
- revision: RevisionEntry,
- ) -> None:
- self.head_id = revision.id
- self._nodes: Set[Sha1Git] = set()
- # rev -> set(parents)
- self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
-
- stack = {self.head_id}
- while stack:
- current = stack.pop()
-
- if current not in self._nodes:
- self._nodes.add(current)
- self._edges.setdefault(current, set())
- for rev, parent in archive.revision_get_some_outbound_edges(current):
- self._nodes.add(rev)
- self._edges.setdefault(rev, set()).add(parent)
- stack.add(parent)
-
- # don't process nodes for which we've already retrieved outbound edges
- stack -= self._nodes
-
- def parent_ids(self) -> Set[Sha1Git]:
- """Get all the known parent ids in the current graph"""
- return self._nodes - {self.head_id}
-
- def __str__(self) -> str:
- return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "head": self.head_id.hex(),
- "graph": {
- node.hex(): sorted(parent.hex() for parent in parents)
- for node, parents in self._edges.items()
- },
- }
-
-
class IsochroneNode:
def __init__(
self,
diff --git a/swh/provenance/origin.py b/swh/provenance/algos/origin.py
rename from swh/provenance/origin.py
rename to swh/provenance/algos/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/algos/origin.py
@@ -6,17 +6,15 @@
from datetime import datetime
from itertools import islice
import logging
-from typing import Generator, Iterable, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, Tuple
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
-from .archive import ArchiveInterface
-from .graph import HistoryGraph
-from .interface import ProvenanceInterface
-from .model import OriginEntry
-
-ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_duration_seconds"
LOG_FORMAT = (
"%(levelname) -10s %(asctime)s %(name) -30s %(funcName) "
@@ -26,6 +24,50 @@
LOGGER = logging.getLogger(__name__)
+class HistoryGraph:
+ @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "HistoryGraph"})
+ def __init__(
+ self,
+ archive: ArchiveInterface,
+ revision: RevisionEntry,
+ ) -> None:
+ self.head_id = revision.id
+ self._nodes: Set[Sha1Git] = set()
+ # rev -> set(parents)
+ self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
+
+ stack = {self.head_id}
+ while stack:
+ current = stack.pop()
+
+ if current not in self._nodes:
+ self._nodes.add(current)
+ self._edges.setdefault(current, set())
+ for rev, parent in archive.revision_get_some_outbound_edges(current):
+ self._nodes.add(rev)
+ self._edges.setdefault(rev, set()).add(parent)
+ stack.add(parent)
+
+ # don't process nodes for which we've already retrieved outbound edges
+ stack -= self._nodes
+
+ def parent_ids(self) -> Set[Sha1Git]:
+ """Get all the known parent ids in the current graph"""
+ return self._nodes - {self.head_id}
+
+ def __str__(self) -> str:
+ return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
+
+ def as_dict(self) -> Dict[str, Any]:
+ return {
+ "head": self.head_id.hex(),
+ "graph": {
+ node.hex(): sorted(parent.hex() for parent in parents)
+ for node, parents in self._edges.items()
+ },
+ }
+
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
file.
diff --git a/swh/provenance/revision.py b/swh/provenance/algos/revision.py
rename from swh/provenance/revision.py
rename to swh/provenance/algos/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/algos/revision.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,14 +9,14 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
-from .archive import ArchiveInterface
from .directory import directory_flatten
-from .graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from .isochrone_graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+REVISION_DURATION_METRIC = "swh_provenance_revision_duration_seconds"
logger = logging.getLogger(__name__)
diff --git a/swh/provenance/archive/__init__.py b/swh/provenance/archive/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/archive/__init__.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from __future__ import annotations
+
+from .interface import ArchiveInterface
+
+
+def get_archive(cls: str, **kwargs) -> ArchiveInterface:
+ """Get an ArchiveInterface-like object of class ``cls`` with arguments ``args``.
+
+ Args:
+ cls: archive's class, either 'api', 'direct' or 'graph'
+ args: dictionary of arguments passed to the archive class constructor
+
+ Returns:
+ an instance of archive object (either using swh.storage API or direct
+ queries to the archive's database)
+
+ Raises:
+ :cls:`ValueError` if passed an unknown archive class.
+ """
+ if cls == "api":
+ from swh.storage import get_storage
+
+ from .storage import ArchiveStorage
+
+ return ArchiveStorage(get_storage(**kwargs["storage"]))
+
+ elif cls == "direct":
+ from swh.core.db import BaseDb
+
+ from .postgresql import ArchivePostgreSQL
+
+ return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
+
+ elif cls == "graph":
+ try:
+ from swh.storage import get_storage
+
+ from .swhgraph import ArchiveGraph
+
+ return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"]))
+
+ except ModuleNotFoundError:
+ raise EnvironmentError(
+ "Graph configuration required but module is not installed."
+ )
+ elif cls == "multiplexer":
+
+ from .multiplexer import ArchiveMultiplexed
+
+ archives = []
+ for ctr, archive in enumerate(kwargs["archives"]):
+ name = archive.pop("name", f"backend_{ctr}")
+ archives.append((name, get_archive(**archive)))
+
+ return ArchiveMultiplexed(archives)
+ else:
+ raise ValueError
diff --git a/swh/provenance/archive.py b/swh/provenance/archive/interface.py
rename from swh/provenance/archive.py
rename to swh/provenance/archive/interface.py
diff --git a/swh/provenance/multiplexer/archive.py b/swh/provenance/archive/multiplexer.py
rename from swh/provenance/multiplexer/archive.py
rename to swh/provenance/archive/multiplexer.py
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/archive/postgresql.py
rename from swh/provenance/postgresql/archive.py
rename to swh/provenance/archive/postgresql.py
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/archive/storage.py
rename from swh/provenance/storage/archive.py
rename to swh/provenance/archive/storage.py
diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/archive/swhgraph.py
rename from swh/provenance/swhgraph/archive.py
rename to swh/provenance/archive/swhgraph.py
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -144,7 +144,8 @@
@cli.group(name="origin")
@click.pass_context
def origin(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -163,7 +164,7 @@
)
@click.pass_context
def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]):
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -222,7 +223,8 @@
@cli.group(name="revision")
@click.pass_context
def revision(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -296,7 +298,7 @@
min_size: int,
max_directory_size: int,
) -> None:
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -427,7 +429,8 @@
@cli.group(name="directory")
@click.pass_context
def directory(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -453,7 +456,7 @@
)
@click.pass_context
def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
- from swh.provenance.directory import directory_flatten_range
+ from swh.provenance.algos.directory import directory_flatten_range
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -492,8 +495,9 @@
min_size: int,
) -> None:
"""Process a provided list of directories in the isochrone frontier."""
- from . import get_archive, get_provenance
- from .directory import CSVDirectoryIterator, directory_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.directory import CSVDirectoryIterator, directory_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
directories_provider = generate_directory_ids(filename)
@@ -575,8 +579,9 @@
min_size: int,
) -> None:
"""Process a provided list of revisions."""
- from . import get_archive, get_provenance
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
revisions_provider = generate_revision_tuples(filename)
@@ -621,8 +626,9 @@
@deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead")
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None:
"""Process a provided list of origins."""
- from . import get_archive, get_provenance
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
origins_provider = generate_origin_tuples(filename)
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -5,232 +5,16 @@
from __future__ import annotations
-from dataclasses import dataclass
from datetime import datetime
-import enum
from types import TracebackType
-from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union
+from typing import Dict, Generator, Iterable, Optional, Type
from typing_extensions import Protocol, runtime_checkable
-from swh.core.api import remote_api_endpoint
from swh.model.model import Sha1Git
from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
-
-
-class EntityType(enum.Enum):
- CONTENT = "content"
- DIRECTORY = "directory"
- REVISION = "revision"
- ORIGIN = "origin"
-
-
-class RelationType(enum.Enum):
- CNT_EARLY_IN_REV = "content_in_revision"
- CNT_IN_DIR = "content_in_directory"
- DIR_IN_REV = "directory_in_revision"
- REV_IN_ORG = "revision_in_origin"
- REV_BEFORE_REV = "revision_before_revision"
-
-
-@dataclass(eq=True, frozen=True)
-class ProvenanceResult:
- content: Sha1Git
- revision: Sha1Git
- date: datetime
- origin: Optional[str]
- path: bytes
-
-
-@dataclass(eq=True, frozen=True)
-class DirectoryData:
- """Object representing the data associated to a directory in the provenance model,
- where `date` is the date of the directory in the isochrone frontier, and `flat` is a
- flag acknowledging that a flat model for the elements outside the frontier has
- already been created.
- """
-
- date: datetime
- flat: bool
-
-
-@dataclass(eq=True, frozen=True)
-class RevisionData:
- """Object representing the data associated to a revision in the provenance model,
- where `date` is the optional date of the revision (specifying it acknowledges that
- the revision was already processed by the revision-content algorithm); and `origin`
- identifies the preferred origin for the revision, if any.
- """
-
- date: Optional[datetime]
- origin: Optional[Sha1Git]
-
-
-@dataclass(eq=True, frozen=True)
-class RelationData:
- """Object representing a relation entry in the provenance model, where `src` and
- `dst` are the sha1 ids of the entities being related, and `path` is optional
- depending on the relation being represented.
- """
-
- dst: Sha1Git
- path: Optional[bytes]
-
-
-@runtime_checkable
-class ProvenanceStorageInterface(Protocol):
- def __enter__(self) -> ProvenanceStorageInterface:
- ...
-
- def __exit__(
- self,
- exc_type: Optional[Type[BaseException]],
- exc_val: Optional[BaseException],
- exc_tb: Optional[TracebackType],
- ) -> None:
- ...
-
- @remote_api_endpoint("close")
- def close(self) -> None:
- """Close connection to the storage and release resources."""
- ...
-
- @remote_api_endpoint("content_add")
- def content_add(self, cnts: Dict[Sha1Git, datetime]) -> bool:
- """Add blobs identified by sha1 ids, with an associated date (as paired in
- `cnts`) to the provenance storage. Return a boolean stating whether the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("content_find_first")
- def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
- """Retrieve the first occurrence of the blob identified by `id`."""
- ...
-
- @remote_api_endpoint("content_find_all")
- def content_find_all(
- self, id: Sha1Git, limit: Optional[int] = None
- ) -> Generator[ProvenanceResult, None, None]:
- """Retrieve all the occurrences of the blob identified by `id`."""
- ...
-
- @remote_api_endpoint("content_get")
- def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
- """Retrieve the associated date for each blob sha1 in `ids`."""
- ...
-
- @remote_api_endpoint("directory_add")
- def directory_add(self, dirs: Dict[Sha1Git, DirectoryData]) -> bool:
- """Add directories identified by sha1 ids, with associated date and (optional)
- flatten flag (as paired in `dirs`) to the provenance storage. If the flatten
- flag is set to None, the previous value present in the storage is preserved.
- Return a boolean stating if the information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("directory_get")
- def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, DirectoryData]:
- """Retrieve the associated date and (optional) flatten flag for each directory
- sha1 in `ids`. If some directories has no associated date, it is not present in
- the resulting dictionary.
- """
- ...
-
- @remote_api_endpoint("directory_iter_not_flattenned")
- def directory_iter_not_flattenned(
- self, limit: int, start_id: Sha1Git
- ) -> List[Sha1Git]:
- """Retrieve the unflattenned directories after ``start_id`` up to ``limit`` entries."""
- ...
-
- @remote_api_endpoint("entity_get_all")
- def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
- """Retrieve all sha1 ids for entities of type `entity` present in the provenance
- model. This method is used only in tests.
- """
- ...
-
- @remote_api_endpoint("location_add")
- def location_add(self, paths: Iterable[bytes]) -> bool:
- """Register the given `paths` in the storage."""
- ...
-
- @remote_api_endpoint("location_get_all")
- def location_get_all(self) -> Set[bytes]:
- """Retrieve all paths present in the provenance model.
- This method is used only in tests."""
- ...
-
- @remote_api_endpoint("open")
- def open(self) -> None:
- """Open connection to the storage and allocate necessary resources."""
- ...
-
- @remote_api_endpoint("origin_add")
- def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool:
- """Add origins identified by sha1 ids, with their corresponding url (as paired
- in `orgs`) to the provenance storage. Return a boolean stating if the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("origin_get")
- def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
- """Retrieve the associated url for each origin sha1 in `ids`."""
- ...
-
- @remote_api_endpoint("revision_add")
- def revision_add(
- self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]]
- ) -> bool:
- """Add revisions identified by sha1 ids, with optional associated date or origin
- (as paired in `revs`) to the provenance storage. Return a boolean stating if the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("revision_get")
- def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]:
- """Retrieve the associated date and origin for each revision sha1 in `ids`. If
- some revision has no associated date nor origin, it is not present in the
- resulting dictionary.
- """
- ...
-
- @remote_api_endpoint("relation_add")
- def relation_add(
- self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]
- ) -> bool:
- """Add entries in the selected `relation`. This method assumes all entities
- being related are already registered in the storage. See `content_add`,
- `directory_add`, `origin_add`, and `revision_add`.
- """
- ...
-
- @remote_api_endpoint("relation_get")
- def relation_get(
- self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False
- ) -> Dict[Sha1Git, Set[RelationData]]:
- """Retrieve all entries in the selected `relation` whose source entities are
- identified by some sha1 id in `ids`. If `reverse` is set, destination entities
- are matched instead.
- """
- ...
-
- @remote_api_endpoint("relation_get_all")
- def relation_get_all(
- self, relation: RelationType
- ) -> Dict[Sha1Git, Set[RelationData]]:
- """Retrieve all entries in the selected `relation` that are present in the
- provenance model. This method is used only in tests.
- """
- ...
-
- @remote_api_endpoint("with_path")
- def with_path(self) -> bool:
- ...
+from .storage.interface import ProvenanceResult, ProvenanceStorageInterface
@runtime_checkable
diff --git a/swh/provenance/journal_client.py b/swh/provenance/journal_client.py
--- a/swh/provenance/journal_client.py
+++ b/swh/provenance/journal_client.py
@@ -13,11 +13,11 @@
import sentry_sdk
from swh.model.model import TimestampWithTimezone
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.revision import revision_add
EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
diff --git a/swh/provenance/postgresql/__init__.py b/swh/provenance/postgresql/__init__.py
deleted file mode 100644
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -14,16 +14,16 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-from .interface import (
+from .interface import ProvenanceInterface
+from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
+from .storage.interface import (
DirectoryData,
- ProvenanceInterface,
ProvenanceResult,
ProvenanceStorageInterface,
RelationData,
RelationType,
RevisionData,
)
-from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
from .util import path_normalize
LOGGER = logging.getLogger(__name__)
diff --git a/swh/provenance/storage/__init__.py b/swh/provenance/storage/__init__.py
--- a/swh/provenance/storage/__init__.py
+++ b/swh/provenance/storage/__init__.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+import warnings
+
+from .interface import ProvenanceStorageInterface
+
+
+def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface:
+ """Get an archive object of class ``cls`` with arguments ``args``.
+
+ Args:
+ cls: storage's class, only 'local' is currently supported
+ args: dictionary of arguments passed to the storage class constructor
+
+ Returns:
+ an instance of storage object
+
+ Raises:
+ :cls:`ValueError` if passed an unknown archive class.
+ """
+ if cls in ["local", "postgresql"]:
+ from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
+
+ if cls == "local":
+ warnings.warn(
+ '"local" class is deprecated for provenance storage, please '
+ 'use "postgresql" class instead.',
+ DeprecationWarning,
+ )
+
+ raise_on_commit = kwargs.get("raise_on_commit", False)
+ return ProvenanceStoragePostgreSql(
+ raise_on_commit=raise_on_commit, **kwargs["db"]
+ )
+
+ elif cls == "rabbitmq":
+ from swh.provenance.storage.rabbitmq.client import (
+ ProvenanceStorageRabbitMQClient,
+ )
+
+ rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs)
+ if TYPE_CHECKING:
+ assert isinstance(rmq_storage, ProvenanceStorageInterface)
+ return rmq_storage
+
+ raise ValueError
diff --git a/swh/provenance/interface.py b/swh/provenance/storage/interface.py
copy from swh/provenance/interface.py
copy to swh/provenance/storage/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/storage/interface.py
@@ -16,8 +16,6 @@
from swh.core.api import remote_api_endpoint
from swh.model.model import Sha1Git
-from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
-
class EntityType(enum.Enum):
CONTENT = "content"
@@ -231,170 +229,3 @@
@remote_api_endpoint("with_path")
def with_path(self) -> bool:
...
-
-
-@runtime_checkable
-class ProvenanceInterface(Protocol):
- storage: ProvenanceStorageInterface
-
- def __enter__(self) -> ProvenanceInterface:
- ...
-
- def __exit__(
- self,
- exc_type: Optional[Type[BaseException]],
- exc_val: Optional[BaseException],
- exc_tb: Optional[TracebackType],
- ) -> None:
- ...
-
- def close(self) -> None:
- """Close connection to the underlying `storage` and release resources."""
- ...
-
- def flush(self) -> None:
- """Flush internal cache to the underlying `storage`."""
- ...
-
- def flush_if_necessary(self) -> bool:
- """Flush internal cache to the underlying `storage`, if the cache reached
- a threshold (MAX_CACHE_ELEMENTS).
- Return True if the cache is flushed, false otherwise.
- """
- ...
-
- def content_add_to_directory(
- self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
- ) -> None:
- """Associate `blob` with `directory` in the provenance model. `prefix` is the
- relative path from `directory` to `blob` (excluding `blob`'s name).
- """
- ...
-
- def content_add_to_revision(
- self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
- ) -> None:
- """Associate `blob` with `revision` in the provenance model. `prefix` is the
- absolute path from `revision`'s root directory to `blob` (excluding `blob`'s
- name).
- """
- ...
-
- def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
- """Retrieve the first occurrence of the blob identified by `id`."""
- ...
-
- def content_find_all(
- self, id: Sha1Git, limit: Optional[int] = None
- ) -> Generator[ProvenanceResult, None, None]:
- """Retrieve all the occurrences of the blob identified by `id`."""
- ...
-
- def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
- """Retrieve the earliest known date of `blob`."""
- ...
-
- def content_get_early_dates(
- self, blobs: Iterable[FileEntry]
- ) -> Dict[Sha1Git, datetime]:
- """Retrieve the earliest known date for each blob in `blobs`. If some blob has
- no associated date, it is not present in the resulting dictionary.
- """
- ...
-
- def content_set_early_date(self, blob: FileEntry, date: datetime) -> None:
- """Associate `date` to `blob` as it's earliest known date."""
- ...
-
- def directory_add_to_revision(
- self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
- ) -> None:
- """Associate `directory` with `revision` in the provenance model. `path` is the
- absolute path from `revision`'s root directory to `directory` (including
- `directory`'s name).
- """
- ...
-
- def directory_already_flattenned(self, directory: DirectoryEntry) -> Optional[bool]:
- """Check if the directory is already flattenned in the provenance model. If the
- directory is unknown for the model, the methods returns None.
- """
- ...
-
- def directory_flag_as_flattenned(self, directory: DirectoryEntry) -> None:
- """Mark the directory as flattenned in the provenance model. If the
- directory is unknown for the model, this method has no effect.
- """
- ...
-
- def directory_get_date_in_isochrone_frontier(
- self, directory: DirectoryEntry
- ) -> Optional[datetime]:
- """Retrieve the earliest known date of `directory` as an isochrone frontier in
- the provenance model.
- """
- ...
-
- def directory_get_dates_in_isochrone_frontier(
- self, dirs: Iterable[DirectoryEntry]
- ) -> Dict[Sha1Git, datetime]:
- """Retrieve the earliest known date for each directory in `dirs` as isochrone
- frontiers provenance model. If some directory has no associated date, it is not
- present in the resulting dictionary.
- """
- ...
-
- def directory_set_date_in_isochrone_frontier(
- self, directory: DirectoryEntry, date: datetime
- ) -> None:
- """Associate `date` to `directory` as it's earliest known date as an isochrone
- frontier in the provenance model.
- """
- ...
-
- def open(self) -> None:
- """Open connection to the underlying `storage` and allocate necessary
- resources.
- """
- ...
-
- def origin_add(self, origin: OriginEntry) -> None:
- """Add `origin` to the provenance model."""
- ...
-
- def revision_add(self, revision: RevisionEntry) -> None:
- """Add `revision` to the provenance model. This implies storing `revision`'s
- date in the model, thus `revision.date` must be a valid date.
- """
- ...
-
- def revision_add_before_revision(
- self, head_id: Sha1Git, revision_id: Sha1Git
- ) -> None:
- """Associate `revision_id` to `head_id` as an ancestor of the latter."""
- ...
-
- def revision_add_to_origin(
- self, origin: OriginEntry, revision: RevisionEntry
- ) -> None:
- """Associate `revision` to `origin` as a head revision of the latter (ie. the
- target of an snapshot for `origin` in the archive)."""
- ...
-
- def revision_is_head(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is associated as a head revision for some origin."""
- ...
-
- def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]:
- """Retrieve the date associated to `revision`."""
- ...
-
- def revision_get_preferred_origin(self, revision_id: Sha1Git) -> Optional[Sha1Git]:
- """Retrieve the preferred origin associated to `revision`."""
- ...
-
- def revision_set_preferred_origin(
- self, origin: OriginEntry, revision_id: Sha1Git
- ) -> None:
- """Associate `origin` as the preferred origin for `revision`."""
- ...
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/storage/postgresql.py
rename from swh/provenance/postgresql/provenance.py
rename to swh/provenance/storage/postgresql.py
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/storage/postgresql.py
@@ -19,8 +19,7 @@
from swh.core.db import BaseDb
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from ..interface import (
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
ProvenanceResult,
diff --git a/swh/provenance/multiplexer/__init__.py b/swh/provenance/storage/rabbitmq/__init__.py
rename from swh/provenance/multiplexer/__init__.py
rename to swh/provenance/storage/rabbitmq/__init__.py
diff --git a/swh/provenance/api/client.py b/swh/provenance/storage/rabbitmq/client.py
rename from swh/provenance/api/client.py
rename to swh/provenance/storage/rabbitmq/client.py
--- a/swh/provenance/api/client.py
+++ b/swh/provenance/storage/rabbitmq/client.py
@@ -24,9 +24,13 @@
from swh.core.api.serializers import encode_data_client as encode_data
from swh.core.api.serializers import msgpack_loads as decode_data
from swh.core.statsd import statsd
+from swh.provenance.storage import get_provenance_storage
+from swh.provenance.storage.interface import (
+ ProvenanceStorageInterface,
+ RelationData,
+ RelationType,
+)
-from .. import get_provenance_storage
-from ..interface import ProvenanceStorageInterface, RelationData, RelationType
from .serializers import DECODERS, ENCODERS
from .server import ProvenanceStorageRabbitMQServer
diff --git a/swh/provenance/api/serializers.py b/swh/provenance/storage/rabbitmq/serializers.py
rename from swh/provenance/api/serializers.py
rename to swh/provenance/storage/rabbitmq/serializers.py
diff --git a/swh/provenance/api/server.py b/swh/provenance/storage/rabbitmq/server.py
rename from swh/provenance/api/server.py
rename to swh/provenance/storage/rabbitmq/server.py
--- a/swh/provenance/api/server.py
+++ b/swh/provenance/storage/rabbitmq/server.py
@@ -14,7 +14,7 @@
import threading
from typing import Any, Callable
from typing import Counter as TCounter
-from typing import Dict, Generator, Iterable, List, Optional, Set, Tuple, Union, cast
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union, cast
import pika
import pika.channel
@@ -29,15 +29,15 @@
from swh.core.api.serializers import msgpack_loads as decode_data
from swh.model.hashutil import hash_to_hex
from swh.model.model import Sha1Git
-
-from ..interface import (
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
RelationData,
RelationType,
RevisionData,
)
-from ..util import path_id
+from swh.provenance.util import path_id
+
from .serializers import DECODERS, ENCODERS
LOG_FORMAT = (
@@ -540,6 +540,7 @@
:param int prefetch_count: Prefetch value for the RabbitMQ connection when
receiving messaged
+
"""
self._workers: List[ProvenanceStorageRabbitMQWorker] = []
for exchange in ProvenanceStorageRabbitMQServer.get_exchanges():
@@ -582,7 +583,7 @@
self._running = False
@staticmethod
- def get_binding_keys(exchange: str, range: int) -> Generator[str, None, None]:
+ def get_binding_keys(exchange: str, range: int) -> Iterator[str]:
for meth_name, relation in ProvenanceStorageRabbitMQServer.get_meth_names(
exchange
):
@@ -611,7 +612,7 @@
return exchange
@staticmethod
- def get_exchanges() -> Generator[str, None, None]:
+ def get_exchanges() -> Iterator[str]:
yield from [entity.value for entity in EntityType] + ["location"]
@staticmethod
@@ -624,7 +625,7 @@
@staticmethod
def get_meth_names(
exchange: str,
- ) -> Generator[Tuple[str, Optional[RelationType]], None, None]:
+ ) -> Iterator[Tuple[str, Optional[RelationType]]]:
if exchange == EntityType.CONTENT.value:
yield from [
("content_add", None),
@@ -648,7 +649,7 @@
yield "location_add", None
@staticmethod
- def get_ranges(unused_exchange: str) -> Generator[int, None, None]:
+ def get_ranges(unused_exchange: str) -> Iterator[int]:
# XXX: we might want to have a different range per exchange
yield from range(ProvenanceStorageRabbitMQServer.queue_count)
diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py
deleted file mode 100644
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -22,11 +22,13 @@
from swh.graph.http_rpc_server import make_app
from swh.journal.serializers import msgpack_ext_hook
from swh.model.model import BaseModel, TimestampWithTimezone
-from swh.provenance import get_provenance, get_provenance_storage
-from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import ProvenanceInterface, ProvenanceStorageInterface
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
-from swh.provenance.storage.archive import ArchiveStorage
+from swh.provenance import get_provenance
+from swh.provenance.archive.interface import ArchiveInterface
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.storage import get_provenance_storage
+from swh.provenance.storage.interface import ProvenanceStorageInterface
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from swh.storage.interface import StorageInterface
from swh.storage.replay import OBJECT_CONVERTERS, OBJECT_FIXERS, process_replay_objects
diff --git a/swh/provenance/tests/test_archive_interface.py b/swh/provenance/tests/test_archive_interface.py
--- a/swh/provenance/tests/test_archive_interface.py
+++ b/swh/provenance/tests/test_archive_interface.py
@@ -30,10 +30,10 @@
)
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.multiplexer.archive import ArchiveMultiplexed
-from swh.provenance.postgresql.archive import ArchivePostgreSQL
-from swh.provenance.storage.archive import ArchiveStorage
-from swh.provenance.swhgraph.archive import ArchiveGraph
+from swh.provenance.archive.multiplexer import ArchiveMultiplexed
+from swh.provenance.archive.postgresql import ArchivePostgreSQL
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.archive.swhgraph import ArchiveGraph
from swh.provenance.tests.conftest import fill_storage, grpc_server, load_repo_data
from swh.storage.interface import StorageInterface
from swh.storage.postgresql.storage import Storage
diff --git a/swh/provenance/tests/test_conflict_resolution.py b/swh/provenance/tests/test_conflict_resolution.py
--- a/swh/provenance/tests/test_conflict_resolution.py
+++ b/swh/provenance/tests/test_conflict_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,13 +8,13 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
-from swh.provenance.api.server import (
+from swh.provenance.storage.interface import DirectoryData, RelationData, RevisionData
+from swh.provenance.storage.rabbitmq.server import (
resolve_dates,
resolve_directory,
resolve_relation,
resolve_revision,
)
-from swh.provenance.interface import DirectoryData, RelationData, RevisionData
def test_resolve_dates() -> None:
diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py
--- a/swh/provenance/tests/test_conftest.py
+++ b/swh/provenance/tests/test_conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/provenance/tests/test_consistency.py b/swh/provenance/tests/test_consistency.py
--- a/swh/provenance/tests/test_consistency.py
+++ b/swh/provenance/tests/test_consistency.py
@@ -1,17 +1,14 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import (
- DirectoryData,
- ProvenanceInterface,
- ProvenanceResult,
-)
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import RevisionEntry
-from swh.provenance.revision import revision_add
+from swh.provenance.storage.interface import DirectoryData, ProvenanceResult
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,15 +8,11 @@
from typing import Tuple
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.directory import directory_add, directory_flatten_range
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add, directory_flatten_range
-from swh.provenance.interface import (
- DirectoryData,
- ProvenanceInterface,
- RelationData,
- RelationType,
-)
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, FileEntry
+from swh.provenance.storage.interface import DirectoryData, RelationData, RelationType
from swh.provenance.tests.conftest import fill_storage, load_repo_data
diff --git a/swh/provenance/tests/test_directory_iterator.py b/swh/provenance/tests/test_directory_iterator.py
--- a/swh/provenance/tests/test_directory_iterator.py
+++ b/swh/provenance/tests/test_directory_iterator.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
-from swh.provenance.directory import CSVDirectoryIterator
+from swh.provenance.algos.directory import CSVDirectoryIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -7,11 +7,10 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.origin import HistoryGraph, origin_add_revision
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import HistoryGraph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add_revision
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_init.py b/swh/provenance/tests/test_init.py
--- a/swh/provenance/tests/test_init.py
+++ b/swh/provenance/tests/test_init.py
@@ -3,10 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.provenance import get_archive
-from swh.provenance.multiplexer.archive import ArchiveMultiplexed
-from swh.provenance.storage.archive import ArchiveStorage
-from swh.provenance.swhgraph.archive import ArchiveGraph
+from swh.provenance.archive import get_archive
+from swh.provenance.archive.multiplexer import ArchiveMultiplexed
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.archive.swhgraph import ArchiveGraph
def test_multiplexer_configuration():
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,11 +11,15 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.isochrone_graph import (
+ DirectoryTooLarge,
+ IsochroneNode,
+ build_isochrone_graph,
+)
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_origin_iterator.py b/swh/provenance/tests/test_origin_iterator.py
--- a/swh/provenance/tests/test_origin_iterator.py
+++ b/swh/provenance/tests/test_origin_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.origin import CSVOriginIterator
+from swh.provenance.algos.origin import CSVOriginIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.algos.origin import (
iter_origin_visit_statuses,
diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py
--- a/swh/provenance/tests/test_origin_revision_layer.py
+++ b/swh/provenance/tests/test_origin_revision_layer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,11 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.origin import origin_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry
-from swh.provenance.origin import origin_add
+from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py
--- a/swh/provenance/tests/test_provenance_db.py
+++ b/swh/provenance/tests/test_provenance_db.py
@@ -1,10 +1,10 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.provenance.interface import ProvenanceInterface
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
def test_provenance_flavor(provenance: ProvenanceInterface) -> None:
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -12,21 +12,21 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Origin, Sha1Git
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import (
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
+from swh.provenance.provenance import Provenance
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
- ProvenanceInterface,
ProvenanceResult,
ProvenanceStorageInterface,
RelationData,
RelationType,
RevisionData,
)
-from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.provenance import Provenance
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_provenance_storage_rabbitmq.py b/swh/provenance/tests/test_provenance_storage_rabbitmq.py
--- a/swh/provenance/tests/test_provenance_storage_rabbitmq.py
+++ b/swh/provenance/tests/test_provenance_storage_rabbitmq.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from typing import Any, Dict, Generator
import pytest
from swh.provenance import get_provenance_storage
-from swh.provenance.interface import ProvenanceStorageInterface
+from swh.provenance.storage.interface import ProvenanceStorageInterface
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
@@ -15,7 +20,7 @@
) -> Generator[ProvenanceStorageInterface, None, None]:
"""Return a working and initialized ProvenanceStorageInterface object"""
- from swh.provenance.api.server import ProvenanceStorageRabbitMQServer
+ from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer
host = rabbitmq.args["host"]
port = rabbitmq.args["port"]
diff --git a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
--- a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
+++ b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_provenance_storage_without_path.py b/swh/provenance/tests/test_provenance_storage_without_path.py
--- a/swh/provenance/tests/test_provenance_storage_without_path.py
+++ b/swh/provenance/tests/test_provenance_storage_without_path.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
--- a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
+++ b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py
--- a/swh/provenance/tests/test_revision_content_layer.py
+++ b/swh/provenance/tests/test_revision_content_layer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,11 +11,12 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.directory import directory_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
-from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
+from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_revision_iterator.py b/swh/provenance/tests/test_revision_iterator.py
--- a/swh/provenance/tests/test_revision_iterator.py
+++ b/swh/provenance/tests/test_revision_iterator.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
-from swh.provenance.revision import CSVRevisionIterator
+from swh.provenance.algos.revision import CSVRevisionIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_routing_keys.py b/swh/provenance/tests/test_routing_keys.py
--- a/swh/provenance/tests/test_routing_keys.py
+++ b/swh/provenance/tests/test_routing_keys.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,8 +6,8 @@
import pytest
from swh.model.hashutil import hash_to_bytes
-from swh.provenance.api.server import ProvenanceStorageRabbitMQServer
-from swh.provenance.interface import RelationType
+from swh.provenance.storage.interface import RelationType
+from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer
def test_routing_keys_for_entity() -> None:
diff --git a/swh/provenance/tests/test_split_ranges.py b/swh/provenance/tests/test_split_ranges.py
--- a/swh/provenance/tests/test_split_ranges.py
+++ b/swh/provenance/tests/test_split_ranges.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,8 +8,8 @@
import pytest
from swh.model.hashutil import hash_to_bytes
-from swh.provenance.api.client import split_ranges
-from swh.provenance.interface import RelationData, RelationType
+from swh.provenance.storage.interface import RelationData, RelationType
+from swh.provenance.storage.rabbitmq.client import split_ranges
def test_split_ranges_for_relation() -> None:
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 19 2024, 6:42 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227590
Attached To
D8593: Reorganize the code
Event Timeline
Log In to Comment