Page MenuHomeSoftware Heritage

D8593.id31038.diff
No OneTemporary

D8593.id31038.diff

diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -5,66 +5,8 @@
from __future__ import annotations
-from typing import TYPE_CHECKING
-import warnings
-
-if TYPE_CHECKING:
- from .archive import ArchiveInterface
- from .interface import ProvenanceInterface, ProvenanceStorageInterface
-
-
-def get_archive(cls: str, **kwargs) -> ArchiveInterface:
- """Get an archive object of class ``cls`` with arguments ``args``.
-
- Args:
- cls: archive's class, either 'api', 'direct' or 'graph'
- args: dictionary of arguments passed to the archive class constructor
-
- Returns:
- an instance of archive object (either using swh.storage API or direct
- queries to the archive's database)
-
- Raises:
- :cls:`ValueError` if passed an unknown archive class.
- """
- if cls == "api":
- from swh.storage import get_storage
-
- from .storage.archive import ArchiveStorage
-
- return ArchiveStorage(get_storage(**kwargs["storage"]))
-
- elif cls == "direct":
- from swh.core.db import BaseDb
-
- from .postgresql.archive import ArchivePostgreSQL
-
- return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
-
- elif cls == "graph":
- try:
- from swh.storage import get_storage
-
- from .swhgraph.archive import ArchiveGraph
-
- return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"]))
-
- except ModuleNotFoundError:
- raise EnvironmentError(
- "Graph configuration required but module is not installed."
- )
- elif cls == "multiplexer":
-
- from .multiplexer.archive import ArchiveMultiplexed
-
- archives = []
- for ctr, archive in enumerate(kwargs["archives"]):
- name = archive.pop("name", f"backend_{ctr}")
- archives.append((name, get_archive(**archive)))
-
- return ArchiveMultiplexed(archives)
- else:
- raise ValueError
+from .interface import ProvenanceInterface
+from .storage import get_provenance_storage
def get_provenance(**kwargs) -> ProvenanceInterface:
@@ -82,43 +24,4 @@
return Provenance(get_provenance_storage(**kwargs))
-def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface:
- """Get an archive object of class ``cls`` with arguments ``args``.
-
- Args:
- cls: storage's class, only 'local' is currently supported
- args: dictionary of arguments passed to the storage class constructor
-
- Returns:
- an instance of storage object
-
- Raises:
- :cls:`ValueError` if passed an unknown archive class.
- """
- if cls in ["local", "postgresql"]:
- from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
-
- if cls == "local":
- warnings.warn(
- '"local" class is deprecated for provenance storage, please '
- 'use "postgresql" class instead.',
- DeprecationWarning,
- )
-
- raise_on_commit = kwargs.get("raise_on_commit", False)
- return ProvenanceStoragePostgreSql(
- raise_on_commit=raise_on_commit, **kwargs["db"]
- )
-
- elif cls == "rabbitmq":
- from .api.client import ProvenanceStorageRabbitMQClient
-
- rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs)
- if TYPE_CHECKING:
- assert isinstance(rmq_storage, ProvenanceStorageInterface)
- return rmq_storage
-
- raise ValueError
-
-
get_datastore = get_provenance_storage
diff --git a/swh/provenance/api/__init__.py b/swh/provenance/algos/__init__.py
rename from swh/provenance/api/__init__.py
rename to swh/provenance/algos/__init__.py
diff --git a/swh/provenance/directory.py b/swh/provenance/algos/directory.py
rename from swh/provenance/directory.py
rename to swh/provenance/algos/directory.py
--- a/swh/provenance/directory.py
+++ b/swh/provenance/algos/directory.py
@@ -8,10 +8,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry
REVISION_DURATION_METRIC = "swh_provenance_directory_duration_seconds"
diff --git a/swh/provenance/graph.py b/swh/provenance/algos/isochrone_graph.py
rename from swh/provenance/graph.py
rename to swh/provenance/algos/isochrone_graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/algos/isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,9 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from .archive import ArchiveInterface
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
GRAPH_DURATION_METRIC = "swh_provenance_graph_duration_seconds"
GRAPH_OPERATIONS_METRIC = "swh_provenance_graph_operations_total"
@@ -26,50 +25,6 @@
pass
-class HistoryGraph:
- @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
- def __init__(
- self,
- archive: ArchiveInterface,
- revision: RevisionEntry,
- ) -> None:
- self.head_id = revision.id
- self._nodes: Set[Sha1Git] = set()
- # rev -> set(parents)
- self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
-
- stack = {self.head_id}
- while stack:
- current = stack.pop()
-
- if current not in self._nodes:
- self._nodes.add(current)
- self._edges.setdefault(current, set())
- for rev, parent in archive.revision_get_some_outbound_edges(current):
- self._nodes.add(rev)
- self._edges.setdefault(rev, set()).add(parent)
- stack.add(parent)
-
- # don't process nodes for which we've already retrieved outbound edges
- stack -= self._nodes
-
- def parent_ids(self) -> Set[Sha1Git]:
- """Get all the known parent ids in the current graph"""
- return self._nodes - {self.head_id}
-
- def __str__(self) -> str:
- return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "head": self.head_id.hex(),
- "graph": {
- node.hex(): sorted(parent.hex() for parent in parents)
- for node, parents in self._edges.items()
- },
- }
-
-
class IsochroneNode:
def __init__(
self,
diff --git a/swh/provenance/origin.py b/swh/provenance/algos/origin.py
rename from swh/provenance/origin.py
rename to swh/provenance/algos/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/algos/origin.py
@@ -6,17 +6,15 @@
from datetime import datetime
from itertools import islice
import logging
-from typing import Generator, Iterable, Iterator, List, Optional, Tuple
+from typing import Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, Tuple
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
-from .archive import ArchiveInterface
-from .graph import HistoryGraph
-from .interface import ProvenanceInterface
-from .model import OriginEntry
-
-ORIGIN_DURATION_METRIC = "swh_provenance_origin_revision_layer_duration_seconds"
+ORIGIN_DURATION_METRIC = "swh_provenance_origin_duration_seconds"
LOG_FORMAT = (
"%(levelname) -10s %(asctime)s %(name) -30s %(funcName) "
@@ -26,6 +24,50 @@
LOGGER = logging.getLogger(__name__)
+class HistoryGraph:
+ @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "HistoryGraph"})
+ def __init__(
+ self,
+ archive: ArchiveInterface,
+ revision: RevisionEntry,
+ ) -> None:
+ self.head_id = revision.id
+ self._nodes: Set[Sha1Git] = set()
+ # rev -> set(parents)
+ self._edges: Dict[Sha1Git, Set[Sha1Git]] = {}
+
+ stack = {self.head_id}
+ while stack:
+ current = stack.pop()
+
+ if current not in self._nodes:
+ self._nodes.add(current)
+ self._edges.setdefault(current, set())
+ for rev, parent in archive.revision_get_some_outbound_edges(current):
+ self._nodes.add(rev)
+ self._edges.setdefault(rev, set()).add(parent)
+ stack.add(parent)
+
+ # don't process nodes for which we've already retrieved outbound edges
+ stack -= self._nodes
+
+ def parent_ids(self) -> Set[Sha1Git]:
+ """Get all the known parent ids in the current graph"""
+ return self._nodes - {self.head_id}
+
+ def __str__(self) -> str:
+ return f"<HistoryGraph: head={self.head_id.hex()}, edges={self._edges}"
+
+ def as_dict(self) -> Dict[str, Any]:
+ return {
+ "head": self.head_id.hex(),
+ "graph": {
+ node.hex(): sorted(parent.hex() for parent in parents)
+ for node, parents in self._edges.items()
+ },
+ }
+
+
class CSVOriginIterator:
"""Iterator over origin visit statuses typically present in the given CSV
file.
diff --git a/swh/provenance/revision.py b/swh/provenance/algos/revision.py
rename from swh/provenance/revision.py
rename to swh/provenance/algos/revision.py
--- a/swh/provenance/revision.py
+++ b/swh/provenance/algos/revision.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,14 +9,14 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
+from swh.provenance.archive import ArchiveInterface
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import DirectoryEntry, RevisionEntry
-from .archive import ArchiveInterface
from .directory import directory_flatten
-from .graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-from .interface import ProvenanceInterface
-from .model import DirectoryEntry, RevisionEntry
+from .isochrone_graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
-REVISION_DURATION_METRIC = "swh_provenance_revision_content_layer_duration_seconds"
+REVISION_DURATION_METRIC = "swh_provenance_revision_duration_seconds"
logger = logging.getLogger(__name__)
diff --git a/swh/provenance/archive/__init__.py b/swh/provenance/archive/__init__.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/archive/__init__.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from __future__ import annotations
+
+from .interface import ArchiveInterface
+
+
+def get_archive(cls: str, **kwargs) -> ArchiveInterface:
+ """Get an ArchiveInterface-like object of class ``cls`` with arguments ``args``.
+
+ Args:
+ cls: archive's class, either 'api', 'direct' or 'graph'
+ args: dictionary of arguments passed to the archive class constructor
+
+ Returns:
+ an instance of archive object (either using swh.storage API or direct
+ queries to the archive's database)
+
+ Raises:
+ :cls:`ValueError` if passed an unknown archive class.
+ """
+ if cls == "api":
+ from swh.storage import get_storage
+
+ from .storage import ArchiveStorage
+
+ return ArchiveStorage(get_storage(**kwargs["storage"]))
+
+ elif cls == "direct":
+ from swh.core.db import BaseDb
+
+ from .postgresql import ArchivePostgreSQL
+
+ return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
+
+ elif cls == "graph":
+ try:
+ from swh.storage import get_storage
+
+ from .swhgraph import ArchiveGraph
+
+ return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"]))
+
+ except ModuleNotFoundError:
+ raise EnvironmentError(
+ "Graph configuration required but module is not installed."
+ )
+ elif cls == "multiplexer":
+
+ from .multiplexer import ArchiveMultiplexed
+
+ archives = []
+ for ctr, archive in enumerate(kwargs["archives"]):
+ name = archive.pop("name", f"backend_{ctr}")
+ archives.append((name, get_archive(**archive)))
+
+ return ArchiveMultiplexed(archives)
+ else:
+ raise ValueError
diff --git a/swh/provenance/archive.py b/swh/provenance/archive/interface.py
rename from swh/provenance/archive.py
rename to swh/provenance/archive/interface.py
diff --git a/swh/provenance/multiplexer/archive.py b/swh/provenance/archive/multiplexer.py
rename from swh/provenance/multiplexer/archive.py
rename to swh/provenance/archive/multiplexer.py
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/archive/postgresql.py
rename from swh/provenance/postgresql/archive.py
rename to swh/provenance/archive/postgresql.py
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/archive/storage.py
rename from swh/provenance/storage/archive.py
rename to swh/provenance/archive/storage.py
diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/archive/swhgraph.py
rename from swh/provenance/swhgraph/archive.py
rename to swh/provenance/archive/swhgraph.py
diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py
--- a/swh/provenance/cli.py
+++ b/swh/provenance/cli.py
@@ -144,7 +144,8 @@
@cli.group(name="origin")
@click.pass_context
def origin(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -163,7 +164,7 @@
)
@click.pass_context
def origin_from_csv(ctx: click.core.Context, filename: str, limit: Optional[int]):
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -222,7 +223,8 @@
@cli.group(name="revision")
@click.pass_context
def revision(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -296,7 +298,7 @@
min_size: int,
max_directory_size: int,
) -> None:
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -427,7 +429,8 @@
@cli.group(name="directory")
@click.pass_context
def directory(ctx: click.core.Context):
- from . import get_archive, get_provenance
+ from . import get_provenance
+ from .archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"])
@@ -453,7 +456,7 @@
)
@click.pass_context
def directory_flatten(ctx: click.core.Context, range_from, range_to, min_size):
- from swh.provenance.directory import directory_flatten_range
+ from swh.provenance.algos.directory import directory_flatten_range
provenance = ctx.obj["provenance"]
archive = ctx.obj["archive"]
@@ -492,8 +495,9 @@
min_size: int,
) -> None:
"""Process a provided list of directories in the isochrone frontier."""
- from . import get_archive, get_provenance
- from .directory import CSVDirectoryIterator, directory_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.directory import CSVDirectoryIterator, directory_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
directories_provider = generate_directory_ids(filename)
@@ -575,8 +579,9 @@
min_size: int,
) -> None:
"""Process a provided list of revisions."""
- from . import get_archive, get_provenance
- from .revision import CSVRevisionIterator, revision_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.revision import CSVRevisionIterator, revision_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
revisions_provider = generate_revision_tuples(filename)
@@ -621,8 +626,9 @@
@deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead")
def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None:
"""Process a provided list of origins."""
- from . import get_archive, get_provenance
- from .origin import CSVOriginIterator, origin_add
+ from swh.provenance import get_provenance
+ from swh.provenance.algos.origin import CSVOriginIterator, origin_add
+ from swh.provenance.archive import get_archive
archive = get_archive(**ctx.obj["config"]["provenance"]["archive"])
origins_provider = generate_origin_tuples(filename)
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -5,232 +5,16 @@
from __future__ import annotations
-from dataclasses import dataclass
from datetime import datetime
-import enum
from types import TracebackType
-from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union
+from typing import Dict, Generator, Iterable, Optional, Type
from typing_extensions import Protocol, runtime_checkable
-from swh.core.api import remote_api_endpoint
from swh.model.model import Sha1Git
from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
-
-
-class EntityType(enum.Enum):
- CONTENT = "content"
- DIRECTORY = "directory"
- REVISION = "revision"
- ORIGIN = "origin"
-
-
-class RelationType(enum.Enum):
- CNT_EARLY_IN_REV = "content_in_revision"
- CNT_IN_DIR = "content_in_directory"
- DIR_IN_REV = "directory_in_revision"
- REV_IN_ORG = "revision_in_origin"
- REV_BEFORE_REV = "revision_before_revision"
-
-
-@dataclass(eq=True, frozen=True)
-class ProvenanceResult:
- content: Sha1Git
- revision: Sha1Git
- date: datetime
- origin: Optional[str]
- path: bytes
-
-
-@dataclass(eq=True, frozen=True)
-class DirectoryData:
- """Object representing the data associated to a directory in the provenance model,
- where `date` is the date of the directory in the isochrone frontier, and `flat` is a
- flag acknowledging that a flat model for the elements outside the frontier has
- already been created.
- """
-
- date: datetime
- flat: bool
-
-
-@dataclass(eq=True, frozen=True)
-class RevisionData:
- """Object representing the data associated to a revision in the provenance model,
- where `date` is the optional date of the revision (specifying it acknowledges that
- the revision was already processed by the revision-content algorithm); and `origin`
- identifies the preferred origin for the revision, if any.
- """
-
- date: Optional[datetime]
- origin: Optional[Sha1Git]
-
-
-@dataclass(eq=True, frozen=True)
-class RelationData:
- """Object representing a relation entry in the provenance model, where `src` and
- `dst` are the sha1 ids of the entities being related, and `path` is optional
- depending on the relation being represented.
- """
-
- dst: Sha1Git
- path: Optional[bytes]
-
-
-@runtime_checkable
-class ProvenanceStorageInterface(Protocol):
- def __enter__(self) -> ProvenanceStorageInterface:
- ...
-
- def __exit__(
- self,
- exc_type: Optional[Type[BaseException]],
- exc_val: Optional[BaseException],
- exc_tb: Optional[TracebackType],
- ) -> None:
- ...
-
- @remote_api_endpoint("close")
- def close(self) -> None:
- """Close connection to the storage and release resources."""
- ...
-
- @remote_api_endpoint("content_add")
- def content_add(self, cnts: Dict[Sha1Git, datetime]) -> bool:
- """Add blobs identified by sha1 ids, with an associated date (as paired in
- `cnts`) to the provenance storage. Return a boolean stating whether the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("content_find_first")
- def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
- """Retrieve the first occurrence of the blob identified by `id`."""
- ...
-
- @remote_api_endpoint("content_find_all")
- def content_find_all(
- self, id: Sha1Git, limit: Optional[int] = None
- ) -> Generator[ProvenanceResult, None, None]:
- """Retrieve all the occurrences of the blob identified by `id`."""
- ...
-
- @remote_api_endpoint("content_get")
- def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]:
- """Retrieve the associated date for each blob sha1 in `ids`."""
- ...
-
- @remote_api_endpoint("directory_add")
- def directory_add(self, dirs: Dict[Sha1Git, DirectoryData]) -> bool:
- """Add directories identified by sha1 ids, with associated date and (optional)
- flatten flag (as paired in `dirs`) to the provenance storage. If the flatten
- flag is set to None, the previous value present in the storage is preserved.
- Return a boolean stating if the information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("directory_get")
- def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, DirectoryData]:
- """Retrieve the associated date and (optional) flatten flag for each directory
- sha1 in `ids`. If some directories has no associated date, it is not present in
- the resulting dictionary.
- """
- ...
-
- @remote_api_endpoint("directory_iter_not_flattenned")
- def directory_iter_not_flattenned(
- self, limit: int, start_id: Sha1Git
- ) -> List[Sha1Git]:
- """Retrieve the unflattenned directories after ``start_id`` up to ``limit`` entries."""
- ...
-
- @remote_api_endpoint("entity_get_all")
- def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]:
- """Retrieve all sha1 ids for entities of type `entity` present in the provenance
- model. This method is used only in tests.
- """
- ...
-
- @remote_api_endpoint("location_add")
- def location_add(self, paths: Iterable[bytes]) -> bool:
- """Register the given `paths` in the storage."""
- ...
-
- @remote_api_endpoint("location_get_all")
- def location_get_all(self) -> Set[bytes]:
- """Retrieve all paths present in the provenance model.
- This method is used only in tests."""
- ...
-
- @remote_api_endpoint("open")
- def open(self) -> None:
- """Open connection to the storage and allocate necessary resources."""
- ...
-
- @remote_api_endpoint("origin_add")
- def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool:
- """Add origins identified by sha1 ids, with their corresponding url (as paired
- in `orgs`) to the provenance storage. Return a boolean stating if the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("origin_get")
- def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]:
- """Retrieve the associated url for each origin sha1 in `ids`."""
- ...
-
- @remote_api_endpoint("revision_add")
- def revision_add(
- self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]]
- ) -> bool:
- """Add revisions identified by sha1 ids, with optional associated date or origin
- (as paired in `revs`) to the provenance storage. Return a boolean stating if the
- information was successfully stored.
- """
- ...
-
- @remote_api_endpoint("revision_get")
- def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]:
- """Retrieve the associated date and origin for each revision sha1 in `ids`. If
- some revision has no associated date nor origin, it is not present in the
- resulting dictionary.
- """
- ...
-
- @remote_api_endpoint("relation_add")
- def relation_add(
- self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]]
- ) -> bool:
- """Add entries in the selected `relation`. This method assumes all entities
- being related are already registered in the storage. See `content_add`,
- `directory_add`, `origin_add`, and `revision_add`.
- """
- ...
-
- @remote_api_endpoint("relation_get")
- def relation_get(
- self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False
- ) -> Dict[Sha1Git, Set[RelationData]]:
- """Retrieve all entries in the selected `relation` whose source entities are
- identified by some sha1 id in `ids`. If `reverse` is set, destination entities
- are matched instead.
- """
- ...
-
- @remote_api_endpoint("relation_get_all")
- def relation_get_all(
- self, relation: RelationType
- ) -> Dict[Sha1Git, Set[RelationData]]:
- """Retrieve all entries in the selected `relation` that are present in the
- provenance model. This method is used only in tests.
- """
- ...
-
- @remote_api_endpoint("with_path")
- def with_path(self) -> bool:
- ...
+from .storage.interface import ProvenanceResult, ProvenanceStorageInterface
@runtime_checkable
diff --git a/swh/provenance/journal_client.py b/swh/provenance/journal_client.py
--- a/swh/provenance/journal_client.py
+++ b/swh/provenance/journal_client.py
@@ -13,11 +13,11 @@
import sentry_sdk
from swh.model.model import TimestampWithTimezone
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.revision import revision_add
EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
diff --git a/swh/provenance/postgresql/__init__.py b/swh/provenance/postgresql/__init__.py
deleted file mode 100644
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -14,16 +14,16 @@
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-from .interface import (
+from .interface import ProvenanceInterface
+from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
+from .storage.interface import (
DirectoryData,
- ProvenanceInterface,
ProvenanceResult,
ProvenanceStorageInterface,
RelationData,
RelationType,
RevisionData,
)
-from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
from .util import path_normalize
LOGGER = logging.getLogger(__name__)
diff --git a/swh/provenance/storage/__init__.py b/swh/provenance/storage/__init__.py
--- a/swh/provenance/storage/__init__.py
+++ b/swh/provenance/storage/__init__.py
@@ -0,0 +1,52 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+import warnings
+
+from .interface import ProvenanceStorageInterface
+
+
+def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface:
+ """Get an archive object of class ``cls`` with arguments ``args``.
+
+ Args:
+ cls: storage's class, only 'local' is currently supported
+ args: dictionary of arguments passed to the storage class constructor
+
+ Returns:
+ an instance of storage object
+
+ Raises:
+ :cls:`ValueError` if passed an unknown archive class.
+ """
+ if cls in ["local", "postgresql"]:
+ from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
+
+ if cls == "local":
+ warnings.warn(
+ '"local" class is deprecated for provenance storage, please '
+ 'use "postgresql" class instead.',
+ DeprecationWarning,
+ )
+
+ raise_on_commit = kwargs.get("raise_on_commit", False)
+ return ProvenanceStoragePostgreSql(
+ raise_on_commit=raise_on_commit, **kwargs["db"]
+ )
+
+ elif cls == "rabbitmq":
+ from swh.provenance.storage.rabbitmq.client import (
+ ProvenanceStorageRabbitMQClient,
+ )
+
+ rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs)
+ if TYPE_CHECKING:
+ assert isinstance(rmq_storage, ProvenanceStorageInterface)
+ return rmq_storage
+
+ raise ValueError
diff --git a/swh/provenance/interface.py b/swh/provenance/storage/interface.py
copy from swh/provenance/interface.py
copy to swh/provenance/storage/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/storage/interface.py
@@ -16,8 +16,6 @@
from swh.core.api import remote_api_endpoint
from swh.model.model import Sha1Git
-from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry
-
class EntityType(enum.Enum):
CONTENT = "content"
@@ -231,170 +229,3 @@
@remote_api_endpoint("with_path")
def with_path(self) -> bool:
...
-
-
-@runtime_checkable
-class ProvenanceInterface(Protocol):
- storage: ProvenanceStorageInterface
-
- def __enter__(self) -> ProvenanceInterface:
- ...
-
- def __exit__(
- self,
- exc_type: Optional[Type[BaseException]],
- exc_val: Optional[BaseException],
- exc_tb: Optional[TracebackType],
- ) -> None:
- ...
-
- def close(self) -> None:
- """Close connection to the underlying `storage` and release resources."""
- ...
-
- def flush(self) -> None:
- """Flush internal cache to the underlying `storage`."""
- ...
-
- def flush_if_necessary(self) -> bool:
- """Flush internal cache to the underlying `storage`, if the cache reached
- a threshold (MAX_CACHE_ELEMENTS).
- Return True if the cache is flushed, false otherwise.
- """
- ...
-
- def content_add_to_directory(
- self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes
- ) -> None:
- """Associate `blob` with `directory` in the provenance model. `prefix` is the
- relative path from `directory` to `blob` (excluding `blob`'s name).
- """
- ...
-
- def content_add_to_revision(
- self, revision: RevisionEntry, blob: FileEntry, prefix: bytes
- ) -> None:
- """Associate `blob` with `revision` in the provenance model. `prefix` is the
- absolute path from `revision`'s root directory to `blob` (excluding `blob`'s
- name).
- """
- ...
-
- def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]:
- """Retrieve the first occurrence of the blob identified by `id`."""
- ...
-
- def content_find_all(
- self, id: Sha1Git, limit: Optional[int] = None
- ) -> Generator[ProvenanceResult, None, None]:
- """Retrieve all the occurrences of the blob identified by `id`."""
- ...
-
- def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]:
- """Retrieve the earliest known date of `blob`."""
- ...
-
- def content_get_early_dates(
- self, blobs: Iterable[FileEntry]
- ) -> Dict[Sha1Git, datetime]:
- """Retrieve the earliest known date for each blob in `blobs`. If some blob has
- no associated date, it is not present in the resulting dictionary.
- """
- ...
-
- def content_set_early_date(self, blob: FileEntry, date: datetime) -> None:
- """Associate `date` to `blob` as it's earliest known date."""
- ...
-
- def directory_add_to_revision(
- self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes
- ) -> None:
- """Associate `directory` with `revision` in the provenance model. `path` is the
- absolute path from `revision`'s root directory to `directory` (including
- `directory`'s name).
- """
- ...
-
- def directory_already_flattenned(self, directory: DirectoryEntry) -> Optional[bool]:
- """Check if the directory is already flattenned in the provenance model. If the
- directory is unknown for the model, the methods returns None.
- """
- ...
-
- def directory_flag_as_flattenned(self, directory: DirectoryEntry) -> None:
- """Mark the directory as flattenned in the provenance model. If the
- directory is unknown for the model, this method has no effect.
- """
- ...
-
- def directory_get_date_in_isochrone_frontier(
- self, directory: DirectoryEntry
- ) -> Optional[datetime]:
- """Retrieve the earliest known date of `directory` as an isochrone frontier in
- the provenance model.
- """
- ...
-
- def directory_get_dates_in_isochrone_frontier(
- self, dirs: Iterable[DirectoryEntry]
- ) -> Dict[Sha1Git, datetime]:
- """Retrieve the earliest known date for each directory in `dirs` as isochrone
- frontiers provenance model. If some directory has no associated date, it is not
- present in the resulting dictionary.
- """
- ...
-
- def directory_set_date_in_isochrone_frontier(
- self, directory: DirectoryEntry, date: datetime
- ) -> None:
- """Associate `date` to `directory` as it's earliest known date as an isochrone
- frontier in the provenance model.
- """
- ...
-
- def open(self) -> None:
- """Open connection to the underlying `storage` and allocate necessary
- resources.
- """
- ...
-
- def origin_add(self, origin: OriginEntry) -> None:
- """Add `origin` to the provenance model."""
- ...
-
- def revision_add(self, revision: RevisionEntry) -> None:
- """Add `revision` to the provenance model. This implies storing `revision`'s
- date in the model, thus `revision.date` must be a valid date.
- """
- ...
-
- def revision_add_before_revision(
- self, head_id: Sha1Git, revision_id: Sha1Git
- ) -> None:
- """Associate `revision_id` to `head_id` as an ancestor of the latter."""
- ...
-
- def revision_add_to_origin(
- self, origin: OriginEntry, revision: RevisionEntry
- ) -> None:
- """Associate `revision` to `origin` as a head revision of the latter (ie. the
- target of an snapshot for `origin` in the archive)."""
- ...
-
- def revision_is_head(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is associated as a head revision for some origin."""
- ...
-
- def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]:
- """Retrieve the date associated to `revision`."""
- ...
-
- def revision_get_preferred_origin(self, revision_id: Sha1Git) -> Optional[Sha1Git]:
- """Retrieve the preferred origin associated to `revision`."""
- ...
-
- def revision_set_preferred_origin(
- self, origin: OriginEntry, revision_id: Sha1Git
- ) -> None:
- """Associate `origin` as the preferred origin for `revision`."""
- ...
diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/storage/postgresql.py
rename from swh/provenance/postgresql/provenance.py
rename to swh/provenance/storage/postgresql.py
--- a/swh/provenance/postgresql/provenance.py
+++ b/swh/provenance/storage/postgresql.py
@@ -19,8 +19,7 @@
from swh.core.db import BaseDb
from swh.core.statsd import statsd
from swh.model.model import Sha1Git
-
-from ..interface import (
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
ProvenanceResult,
diff --git a/swh/provenance/multiplexer/__init__.py b/swh/provenance/storage/rabbitmq/__init__.py
rename from swh/provenance/multiplexer/__init__.py
rename to swh/provenance/storage/rabbitmq/__init__.py
diff --git a/swh/provenance/api/client.py b/swh/provenance/storage/rabbitmq/client.py
rename from swh/provenance/api/client.py
rename to swh/provenance/storage/rabbitmq/client.py
--- a/swh/provenance/api/client.py
+++ b/swh/provenance/storage/rabbitmq/client.py
@@ -24,9 +24,13 @@
from swh.core.api.serializers import encode_data_client as encode_data
from swh.core.api.serializers import msgpack_loads as decode_data
from swh.core.statsd import statsd
+from swh.provenance.storage import get_provenance_storage
+from swh.provenance.storage.interface import (
+ ProvenanceStorageInterface,
+ RelationData,
+ RelationType,
+)
-from .. import get_provenance_storage
-from ..interface import ProvenanceStorageInterface, RelationData, RelationType
from .serializers import DECODERS, ENCODERS
from .server import ProvenanceStorageRabbitMQServer
diff --git a/swh/provenance/api/serializers.py b/swh/provenance/storage/rabbitmq/serializers.py
rename from swh/provenance/api/serializers.py
rename to swh/provenance/storage/rabbitmq/serializers.py
diff --git a/swh/provenance/api/server.py b/swh/provenance/storage/rabbitmq/server.py
rename from swh/provenance/api/server.py
rename to swh/provenance/storage/rabbitmq/server.py
--- a/swh/provenance/api/server.py
+++ b/swh/provenance/storage/rabbitmq/server.py
@@ -14,7 +14,7 @@
import threading
from typing import Any, Callable
from typing import Counter as TCounter
-from typing import Dict, Generator, Iterable, List, Optional, Set, Tuple, Union, cast
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union, cast
import pika
import pika.channel
@@ -29,15 +29,15 @@
from swh.core.api.serializers import msgpack_loads as decode_data
from swh.model.hashutil import hash_to_hex
from swh.model.model import Sha1Git
-
-from ..interface import (
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
RelationData,
RelationType,
RevisionData,
)
-from ..util import path_id
+from swh.provenance.util import path_id
+
from .serializers import DECODERS, ENCODERS
LOG_FORMAT = (
@@ -540,6 +540,7 @@
:param int prefetch_count: Prefetch value for the RabbitMQ connection when
receiving messaged
+
"""
self._workers: List[ProvenanceStorageRabbitMQWorker] = []
for exchange in ProvenanceStorageRabbitMQServer.get_exchanges():
@@ -582,7 +583,7 @@
self._running = False
@staticmethod
- def get_binding_keys(exchange: str, range: int) -> Generator[str, None, None]:
+ def get_binding_keys(exchange: str, range: int) -> Iterator[str]:
for meth_name, relation in ProvenanceStorageRabbitMQServer.get_meth_names(
exchange
):
@@ -611,7 +612,7 @@
return exchange
@staticmethod
- def get_exchanges() -> Generator[str, None, None]:
+ def get_exchanges() -> Iterator[str]:
yield from [entity.value for entity in EntityType] + ["location"]
@staticmethod
@@ -624,7 +625,7 @@
@staticmethod
def get_meth_names(
exchange: str,
- ) -> Generator[Tuple[str, Optional[RelationType]], None, None]:
+ ) -> Iterator[Tuple[str, Optional[RelationType]]]:
if exchange == EntityType.CONTENT.value:
yield from [
("content_add", None),
@@ -648,7 +649,7 @@
yield "location_add", None
@staticmethod
- def get_ranges(unused_exchange: str) -> Generator[int, None, None]:
+ def get_ranges(unused_exchange: str) -> Iterator[int]:
# XXX: we might want to have a different range per exchange
yield from range(ProvenanceStorageRabbitMQServer.queue_count)
diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py
deleted file mode 100644
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -22,11 +22,13 @@
from swh.graph.http_rpc_server import make_app
from swh.journal.serializers import msgpack_ext_hook
from swh.model.model import BaseModel, TimestampWithTimezone
-from swh.provenance import get_provenance, get_provenance_storage
-from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import ProvenanceInterface, ProvenanceStorageInterface
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
-from swh.provenance.storage.archive import ArchiveStorage
+from swh.provenance import get_provenance
+from swh.provenance.archive.interface import ArchiveInterface
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.storage import get_provenance_storage
+from swh.provenance.storage.interface import ProvenanceStorageInterface
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from swh.storage.interface import StorageInterface
from swh.storage.replay import OBJECT_CONVERTERS, OBJECT_FIXERS, process_replay_objects
diff --git a/swh/provenance/tests/test_archive_interface.py b/swh/provenance/tests/test_archive_interface.py
--- a/swh/provenance/tests/test_archive_interface.py
+++ b/swh/provenance/tests/test_archive_interface.py
@@ -30,10 +30,10 @@
)
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.multiplexer.archive import ArchiveMultiplexed
-from swh.provenance.postgresql.archive import ArchivePostgreSQL
-from swh.provenance.storage.archive import ArchiveStorage
-from swh.provenance.swhgraph.archive import ArchiveGraph
+from swh.provenance.archive.multiplexer import ArchiveMultiplexed
+from swh.provenance.archive.postgresql import ArchivePostgreSQL
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.archive.swhgraph import ArchiveGraph
from swh.provenance.tests.conftest import fill_storage, grpc_server, load_repo_data
from swh.storage.interface import StorageInterface
from swh.storage.postgresql.storage import Storage
diff --git a/swh/provenance/tests/test_conflict_resolution.py b/swh/provenance/tests/test_conflict_resolution.py
--- a/swh/provenance/tests/test_conflict_resolution.py
+++ b/swh/provenance/tests/test_conflict_resolution.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,13 +8,13 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
-from swh.provenance.api.server import (
+from swh.provenance.storage.interface import DirectoryData, RelationData, RevisionData
+from swh.provenance.storage.rabbitmq.server import (
resolve_dates,
resolve_directory,
resolve_relation,
resolve_revision,
)
-from swh.provenance.interface import DirectoryData, RelationData, RevisionData
def test_resolve_dates() -> None:
diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py
--- a/swh/provenance/tests/test_conftest.py
+++ b/swh/provenance/tests/test_conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
diff --git a/swh/provenance/tests/test_consistency.py b/swh/provenance/tests/test_consistency.py
--- a/swh/provenance/tests/test_consistency.py
+++ b/swh/provenance/tests/test_consistency.py
@@ -1,17 +1,14 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import (
- DirectoryData,
- ProvenanceInterface,
- ProvenanceResult,
-)
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import RevisionEntry
-from swh.provenance.revision import revision_add
+from swh.provenance.storage.interface import DirectoryData, ProvenanceResult
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py
--- a/swh/provenance/tests/test_directory_flatten.py
+++ b/swh/provenance/tests/test_directory_flatten.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,15 +8,11 @@
from typing import Tuple
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.directory import directory_add, directory_flatten_range
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add, directory_flatten_range
-from swh.provenance.interface import (
- DirectoryData,
- ProvenanceInterface,
- RelationData,
- RelationType,
-)
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, FileEntry
+from swh.provenance.storage.interface import DirectoryData, RelationData, RelationType
from swh.provenance.tests.conftest import fill_storage, load_repo_data
diff --git a/swh/provenance/tests/test_directory_iterator.py b/swh/provenance/tests/test_directory_iterator.py
--- a/swh/provenance/tests/test_directory_iterator.py
+++ b/swh/provenance/tests/test_directory_iterator.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
-from swh.provenance.directory import CSVDirectoryIterator
+from swh.provenance.algos.directory import CSVDirectoryIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -7,11 +7,10 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.origin import HistoryGraph, origin_add_revision
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import HistoryGraph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add_revision
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_init.py b/swh/provenance/tests/test_init.py
--- a/swh/provenance/tests/test_init.py
+++ b/swh/provenance/tests/test_init.py
@@ -3,10 +3,10 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from swh.provenance import get_archive
-from swh.provenance.multiplexer.archive import ArchiveMultiplexed
-from swh.provenance.storage.archive import ArchiveStorage
-from swh.provenance.swhgraph.archive import ArchiveGraph
+from swh.provenance.archive import get_archive
+from swh.provenance.archive.multiplexer import ArchiveMultiplexed
+from swh.provenance.archive.storage import ArchiveStorage
+from swh.provenance.archive.swhgraph import ArchiveGraph
def test_multiplexer_configuration():
diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py
--- a/swh/provenance/tests/test_isochrone_graph.py
+++ b/swh/provenance/tests/test_isochrone_graph.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,11 +11,15 @@
import yaml
from swh.model.hashutil import hash_to_bytes
+from swh.provenance.algos.isochrone_graph import (
+ DirectoryTooLarge,
+ IsochroneNode,
+ build_isochrone_graph,
+)
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.graph import DirectoryTooLarge, IsochroneNode, build_isochrone_graph
from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_origin_iterator.py b/swh/provenance/tests/test_origin_iterator.py
--- a/swh/provenance/tests/test_origin_iterator.py
+++ b/swh/provenance/tests/test_origin_iterator.py
@@ -5,7 +5,7 @@
import pytest
-from swh.provenance.origin import CSVOriginIterator
+from swh.provenance.algos.origin import CSVOriginIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data
from swh.storage.algos.origin import (
iter_origin_visit_statuses,
diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py
--- a/swh/provenance/tests/test_origin_revision_layer.py
+++ b/swh/provenance/tests/test_origin_revision_layer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,10 +11,11 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.origin import origin_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import OriginEntry
-from swh.provenance.origin import origin_add
+from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data
diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py
--- a/swh/provenance/tests/test_provenance_db.py
+++ b/swh/provenance/tests/test_provenance_db.py
@@ -1,10 +1,10 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from swh.provenance.interface import ProvenanceInterface
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
def test_provenance_flavor(provenance: ProvenanceInterface) -> None:
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -12,21 +12,21 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Origin, Sha1Git
+from swh.provenance.algos.origin import origin_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.interface import (
+from swh.provenance.interface import ProvenanceInterface
+from swh.provenance.model import OriginEntry, RevisionEntry
+from swh.provenance.provenance import Provenance
+from swh.provenance.storage.interface import (
DirectoryData,
EntityType,
- ProvenanceInterface,
ProvenanceResult,
ProvenanceStorageInterface,
RelationData,
RelationType,
RevisionData,
)
-from swh.provenance.model import OriginEntry, RevisionEntry
-from swh.provenance.origin import origin_add
-from swh.provenance.provenance import Provenance
-from swh.provenance.revision import revision_add
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
diff --git a/swh/provenance/tests/test_provenance_storage_rabbitmq.py b/swh/provenance/tests/test_provenance_storage_rabbitmq.py
--- a/swh/provenance/tests/test_provenance_storage_rabbitmq.py
+++ b/swh/provenance/tests/test_provenance_storage_rabbitmq.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from typing import Any, Dict, Generator
import pytest
from swh.provenance import get_provenance_storage
-from swh.provenance.interface import ProvenanceStorageInterface
+from swh.provenance.storage.interface import ProvenanceStorageInterface
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
@@ -15,7 +20,7 @@
) -> Generator[ProvenanceStorageInterface, None, None]:
"""Return a working and initialized ProvenanceStorageInterface object"""
- from swh.provenance.api.server import ProvenanceStorageRabbitMQServer
+ from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer
host = rabbitmq.args["host"]
port = rabbitmq.args["port"]
diff --git a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
--- a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
+++ b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_provenance_storage_without_path.py b/swh/provenance/tests/test_provenance_storage_without_path.py
--- a/swh/provenance/tests/test_provenance_storage_without_path.py
+++ b/swh/provenance/tests/test_provenance_storage_without_path.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
--- a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
+++ b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py
@@ -1,9 +1,14 @@
+# Copyright (C) 2021-2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
from functools import partial
from pytest_postgresql import factories
from swh.core.db.db_utils import initialize_database_for_module
-from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql
+from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql
from .test_provenance_storage import TestProvenanceStorage # noqa: F401
diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py
--- a/swh/provenance/tests/test_revision_content_layer.py
+++ b/swh/provenance/tests/test_revision_content_layer.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,11 +11,12 @@
from swh.model.hashutil import hash_to_bytes
from swh.model.model import Sha1Git
+from swh.provenance.algos.directory import directory_add
+from swh.provenance.algos.revision import revision_add
from swh.provenance.archive import ArchiveInterface
-from swh.provenance.directory import directory_add
-from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType
+from swh.provenance.interface import ProvenanceInterface
from swh.provenance.model import DirectoryEntry, RevisionEntry
-from swh.provenance.revision import revision_add
+from swh.provenance.storage.interface import EntityType, RelationType
from swh.provenance.tests.conftest import (
fill_storage,
get_datafile,
diff --git a/swh/provenance/tests/test_revision_iterator.py b/swh/provenance/tests/test_revision_iterator.py
--- a/swh/provenance/tests/test_revision_iterator.py
+++ b/swh/provenance/tests/test_revision_iterator.py
@@ -1,11 +1,11 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import pytest
-from swh.provenance.revision import CSVRevisionIterator
+from swh.provenance.algos.revision import CSVRevisionIterator
from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt
from swh.storage.interface import StorageInterface
diff --git a/swh/provenance/tests/test_routing_keys.py b/swh/provenance/tests/test_routing_keys.py
--- a/swh/provenance/tests/test_routing_keys.py
+++ b/swh/provenance/tests/test_routing_keys.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -6,8 +6,8 @@
import pytest
from swh.model.hashutil import hash_to_bytes
-from swh.provenance.api.server import ProvenanceStorageRabbitMQServer
-from swh.provenance.interface import RelationType
+from swh.provenance.storage.interface import RelationType
+from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer
def test_routing_keys_for_entity() -> None:
diff --git a/swh/provenance/tests/test_split_ranges.py b/swh/provenance/tests/test_split_ranges.py
--- a/swh/provenance/tests/test_split_ranges.py
+++ b/swh/provenance/tests/test_split_ranges.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 The Software Heritage developers
+# Copyright (C) 2021-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -8,8 +8,8 @@
import pytest
from swh.model.hashutil import hash_to_bytes
-from swh.provenance.api.client import split_ranges
-from swh.provenance.interface import RelationData, RelationType
+from swh.provenance.storage.interface import RelationData, RelationType
+from swh.provenance.storage.rabbitmq.client import split_ranges
def test_split_ranges_for_relation() -> None:

File Metadata

Mime Type
text/plain
Expires
Dec 19 2024, 6:42 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227590

Event Timeline