diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py --- a/swh/provenance/__init__.py +++ b/swh/provenance/__init__.py @@ -5,66 +5,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING -import warnings - -if TYPE_CHECKING: - from .archive import ArchiveInterface - from .interface import ProvenanceInterface, ProvenanceStorageInterface - - -def get_archive(cls: str, **kwargs) -> ArchiveInterface: - """Get an archive object of class ``cls`` with arguments ``args``. - - Args: - cls: archive's class, either 'api', 'direct' or 'graph' - args: dictionary of arguments passed to the archive class constructor - - Returns: - an instance of archive object (either using swh.storage API or direct - queries to the archive's database) - - Raises: - :cls:`ValueError` if passed an unknown archive class. - """ - if cls == "api": - from swh.storage import get_storage - - from .storage.archive import ArchiveStorage - - return ArchiveStorage(get_storage(**kwargs["storage"])) - - elif cls == "direct": - from swh.core.db import BaseDb - - from .postgresql.archive import ArchivePostgreSQL - - return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn) - - elif cls == "graph": - try: - from swh.storage import get_storage - - from .swhgraph.archive import ArchiveGraph - - return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"])) - - except ModuleNotFoundError: - raise EnvironmentError( - "Graph configuration required but module is not installed." - ) - elif cls == "multiplexer": - - from .multiplexer.archive import ArchiveMultiplexed - - archives = [] - for ctr, archive in enumerate(kwargs["archives"]): - name = archive.pop("name", f"backend_{ctr}") - archives.append((name, get_archive(**archive))) - - return ArchiveMultiplexed(archives) - else: - raise ValueError +from .interface import ProvenanceInterface +from .storage import get_provenance_storage def get_provenance(**kwargs) -> ProvenanceInterface: @@ -82,43 +24,4 @@ return Provenance(get_provenance_storage(**kwargs)) -def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface: - """Get an archive object of class ``cls`` with arguments ``args``. - - Args: - cls: storage's class, only 'local' is currently supported - args: dictionary of arguments passed to the storage class constructor - - Returns: - an instance of storage object - - Raises: - :cls:`ValueError` if passed an unknown archive class. - """ - if cls in ["local", "postgresql"]: - from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql - - if cls == "local": - warnings.warn( - '"local" class is deprecated for provenance storage, please ' - 'use "postgresql" class instead.', - DeprecationWarning, - ) - - raise_on_commit = kwargs.get("raise_on_commit", False) - return ProvenanceStoragePostgreSql( - raise_on_commit=raise_on_commit, **kwargs["db"] - ) - - elif cls == "rabbitmq": - from .api.client import ProvenanceStorageRabbitMQClient - - rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs) - if TYPE_CHECKING: - assert isinstance(rmq_storage, ProvenanceStorageInterface) - return rmq_storage - - raise ValueError - - get_datastore = get_provenance_storage diff --git a/swh/provenance/archive/__init__.py b/swh/provenance/archive/__init__.py new file mode 100644 --- /dev/null +++ b/swh/provenance/archive/__init__.py @@ -0,0 +1,62 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from __future__ import annotations + +from .interface import ArchiveInterface + + +def get_archive(cls: str, **kwargs) -> ArchiveInterface: + """Get an ArchiveInterface-like object of class ``cls`` with arguments ``args``. + + Args: + cls: archive's class, either 'api', 'direct' or 'graph' + args: dictionary of arguments passed to the archive class constructor + + Returns: + an instance of archive object (either using swh.storage API or direct + queries to the archive's database) + + Raises: + :cls:`ValueError` if passed an unknown archive class. + """ + if cls == "api": + from swh.storage import get_storage + + from .storage import ArchiveStorage + + return ArchiveStorage(get_storage(**kwargs["storage"])) + + elif cls == "direct": + from swh.core.db import BaseDb + + from .postgresql import ArchivePostgreSQL + + return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn) + + elif cls == "graph": + try: + from swh.storage import get_storage + + from .swhgraph import ArchiveGraph + + return ArchiveGraph(kwargs.get("url"), get_storage(**kwargs["storage"])) + + except ModuleNotFoundError: + raise EnvironmentError( + "Graph configuration required but module is not installed." + ) + elif cls == "multiplexer": + + from .multiplexer import ArchiveMultiplexed + + archives = [] + for ctr, archive in enumerate(kwargs["archives"]): + name = archive.pop("name", f"backend_{ctr}") + archives.append((name, get_archive(**archive))) + + return ArchiveMultiplexed(archives) + else: + raise ValueError diff --git a/swh/provenance/archive.py b/swh/provenance/archive/interface.py rename from swh/provenance/archive.py rename to swh/provenance/archive/interface.py diff --git a/swh/provenance/multiplexer/archive.py b/swh/provenance/archive/multiplexer.py rename from swh/provenance/multiplexer/archive.py rename to swh/provenance/archive/multiplexer.py diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/archive/postgresql.py rename from swh/provenance/postgresql/archive.py rename to swh/provenance/archive/postgresql.py diff --git a/swh/provenance/storage/archive.py b/swh/provenance/archive/storage.py rename from swh/provenance/storage/archive.py rename to swh/provenance/archive/storage.py diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/archive/swhgraph.py rename from swh/provenance/swhgraph/archive.py rename to swh/provenance/archive/swhgraph.py diff --git a/swh/provenance/cli.py b/swh/provenance/cli.py --- a/swh/provenance/cli.py +++ b/swh/provenance/cli.py @@ -144,7 +144,8 @@ @cli.group(name="origin") @click.pass_context def origin(ctx: click.core.Context): - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) @@ -222,7 +223,8 @@ @cli.group(name="revision") @click.pass_context def revision(ctx: click.core.Context): - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) @@ -427,7 +429,8 @@ @cli.group(name="directory") @click.pass_context def directory(ctx: click.core.Context): - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) provenance = get_provenance(**ctx.obj["config"]["provenance"]["storage"]) @@ -492,7 +495,8 @@ min_size: int, ) -> None: """Process a provided list of directories in the isochrone frontier.""" - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive from .directory import CSVDirectoryIterator, directory_add archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) @@ -575,7 +579,8 @@ min_size: int, ) -> None: """Process a provided list of revisions.""" - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive from .revision import CSVRevisionIterator, revision_add archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) @@ -621,7 +626,8 @@ @deprecated(version="0.0.1", reason="Use `swh provenance origin from-csv` instead") def iter_origins(ctx: click.core.Context, filename: str, limit: Optional[int]) -> None: """Process a provided list of origins.""" - from . import get_archive, get_provenance + from . import get_provenance + from .archive import get_archive from .origin import CSVOriginIterator, origin_add archive = get_archive(**ctx.obj["config"]["provenance"]["archive"]) diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py --- a/swh/provenance/interface.py +++ b/swh/provenance/interface.py @@ -5,232 +5,16 @@ from __future__ import annotations -from dataclasses import dataclass from datetime import datetime -import enum from types import TracebackType -from typing import Dict, Generator, Iterable, List, Optional, Set, Type, Union +from typing import Dict, Generator, Iterable, Optional, Type from typing_extensions import Protocol, runtime_checkable -from swh.core.api import remote_api_endpoint from swh.model.model import Sha1Git from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry - - -class EntityType(enum.Enum): - CONTENT = "content" - DIRECTORY = "directory" - REVISION = "revision" - ORIGIN = "origin" - - -class RelationType(enum.Enum): - CNT_EARLY_IN_REV = "content_in_revision" - CNT_IN_DIR = "content_in_directory" - DIR_IN_REV = "directory_in_revision" - REV_IN_ORG = "revision_in_origin" - REV_BEFORE_REV = "revision_before_revision" - - -@dataclass(eq=True, frozen=True) -class ProvenanceResult: - content: Sha1Git - revision: Sha1Git - date: datetime - origin: Optional[str] - path: bytes - - -@dataclass(eq=True, frozen=True) -class DirectoryData: - """Object representing the data associated to a directory in the provenance model, - where `date` is the date of the directory in the isochrone frontier, and `flat` is a - flag acknowledging that a flat model for the elements outside the frontier has - already been created. - """ - - date: datetime - flat: bool - - -@dataclass(eq=True, frozen=True) -class RevisionData: - """Object representing the data associated to a revision in the provenance model, - where `date` is the optional date of the revision (specifying it acknowledges that - the revision was already processed by the revision-content algorithm); and `origin` - identifies the preferred origin for the revision, if any. - """ - - date: Optional[datetime] - origin: Optional[Sha1Git] - - -@dataclass(eq=True, frozen=True) -class RelationData: - """Object representing a relation entry in the provenance model, where `src` and - `dst` are the sha1 ids of the entities being related, and `path` is optional - depending on the relation being represented. - """ - - dst: Sha1Git - path: Optional[bytes] - - -@runtime_checkable -class ProvenanceStorageInterface(Protocol): - def __enter__(self) -> ProvenanceStorageInterface: - ... - - def __exit__( - self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - ... - - @remote_api_endpoint("close") - def close(self) -> None: - """Close connection to the storage and release resources.""" - ... - - @remote_api_endpoint("content_add") - def content_add(self, cnts: Dict[Sha1Git, datetime]) -> bool: - """Add blobs identified by sha1 ids, with an associated date (as paired in - `cnts`) to the provenance storage. Return a boolean stating whether the - information was successfully stored. - """ - ... - - @remote_api_endpoint("content_find_first") - def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: - """Retrieve the first occurrence of the blob identified by `id`.""" - ... - - @remote_api_endpoint("content_find_all") - def content_find_all( - self, id: Sha1Git, limit: Optional[int] = None - ) -> Generator[ProvenanceResult, None, None]: - """Retrieve all the occurrences of the blob identified by `id`.""" - ... - - @remote_api_endpoint("content_get") - def content_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, datetime]: - """Retrieve the associated date for each blob sha1 in `ids`.""" - ... - - @remote_api_endpoint("directory_add") - def directory_add(self, dirs: Dict[Sha1Git, DirectoryData]) -> bool: - """Add directories identified by sha1 ids, with associated date and (optional) - flatten flag (as paired in `dirs`) to the provenance storage. If the flatten - flag is set to None, the previous value present in the storage is preserved. - Return a boolean stating if the information was successfully stored. - """ - ... - - @remote_api_endpoint("directory_get") - def directory_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, DirectoryData]: - """Retrieve the associated date and (optional) flatten flag for each directory - sha1 in `ids`. If some directories has no associated date, it is not present in - the resulting dictionary. - """ - ... - - @remote_api_endpoint("directory_iter_not_flattenned") - def directory_iter_not_flattenned( - self, limit: int, start_id: Sha1Git - ) -> List[Sha1Git]: - """Retrieve the unflattenned directories after ``start_id`` up to ``limit`` entries.""" - ... - - @remote_api_endpoint("entity_get_all") - def entity_get_all(self, entity: EntityType) -> Set[Sha1Git]: - """Retrieve all sha1 ids for entities of type `entity` present in the provenance - model. This method is used only in tests. - """ - ... - - @remote_api_endpoint("location_add") - def location_add(self, paths: Iterable[bytes]) -> bool: - """Register the given `paths` in the storage.""" - ... - - @remote_api_endpoint("location_get_all") - def location_get_all(self) -> Set[bytes]: - """Retrieve all paths present in the provenance model. - This method is used only in tests.""" - ... - - @remote_api_endpoint("open") - def open(self) -> None: - """Open connection to the storage and allocate necessary resources.""" - ... - - @remote_api_endpoint("origin_add") - def origin_add(self, orgs: Dict[Sha1Git, str]) -> bool: - """Add origins identified by sha1 ids, with their corresponding url (as paired - in `orgs`) to the provenance storage. Return a boolean stating if the - information was successfully stored. - """ - ... - - @remote_api_endpoint("origin_get") - def origin_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, str]: - """Retrieve the associated url for each origin sha1 in `ids`.""" - ... - - @remote_api_endpoint("revision_add") - def revision_add( - self, revs: Union[Iterable[Sha1Git], Dict[Sha1Git, RevisionData]] - ) -> bool: - """Add revisions identified by sha1 ids, with optional associated date or origin - (as paired in `revs`) to the provenance storage. Return a boolean stating if the - information was successfully stored. - """ - ... - - @remote_api_endpoint("revision_get") - def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]: - """Retrieve the associated date and origin for each revision sha1 in `ids`. If - some revision has no associated date nor origin, it is not present in the - resulting dictionary. - """ - ... - - @remote_api_endpoint("relation_add") - def relation_add( - self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]] - ) -> bool: - """Add entries in the selected `relation`. This method assumes all entities - being related are already registered in the storage. See `content_add`, - `directory_add`, `origin_add`, and `revision_add`. - """ - ... - - @remote_api_endpoint("relation_get") - def relation_get( - self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False - ) -> Dict[Sha1Git, Set[RelationData]]: - """Retrieve all entries in the selected `relation` whose source entities are - identified by some sha1 id in `ids`. If `reverse` is set, destination entities - are matched instead. - """ - ... - - @remote_api_endpoint("relation_get_all") - def relation_get_all( - self, relation: RelationType - ) -> Dict[Sha1Git, Set[RelationData]]: - """Retrieve all entries in the selected `relation` that are present in the - provenance model. This method is used only in tests. - """ - ... - - @remote_api_endpoint("with_path") - def with_path(self) -> bool: - ... +from .storage.interface import ProvenanceResult, ProvenanceStorageInterface @runtime_checkable diff --git a/swh/provenance/multiplexer/__init__.py b/swh/provenance/multiplexer/__init__.py deleted file mode 100644 diff --git a/swh/provenance/postgresql/__init__.py b/swh/provenance/postgresql/__init__.py deleted file mode 100644 diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -14,16 +14,16 @@ from swh.core.statsd import statsd from swh.model.model import Sha1Git -from .interface import ( +from .interface import ProvenanceInterface +from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry +from .storage.interface import ( DirectoryData, - ProvenanceInterface, ProvenanceResult, ProvenanceStorageInterface, RelationData, RelationType, RevisionData, ) -from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry from .util import path_normalize LOGGER = logging.getLogger(__name__) diff --git a/swh/provenance/storage/__init__.py b/swh/provenance/storage/__init__.py --- a/swh/provenance/storage/__init__.py +++ b/swh/provenance/storage/__init__.py @@ -0,0 +1,52 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from __future__ import annotations + +from typing import TYPE_CHECKING +import warnings + +from .interface import ProvenanceStorageInterface + + +def get_provenance_storage(cls: str, **kwargs) -> ProvenanceStorageInterface: + """Get an archive object of class ``cls`` with arguments ``args``. + + Args: + cls: storage's class, only 'local' is currently supported + args: dictionary of arguments passed to the storage class constructor + + Returns: + an instance of storage object + + Raises: + :cls:`ValueError` if passed an unknown archive class. + """ + if cls in ["local", "postgresql"]: + from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql + + if cls == "local": + warnings.warn( + '"local" class is deprecated for provenance storage, please ' + 'use "postgresql" class instead.', + DeprecationWarning, + ) + + raise_on_commit = kwargs.get("raise_on_commit", False) + return ProvenanceStoragePostgreSql( + raise_on_commit=raise_on_commit, **kwargs["db"] + ) + + elif cls == "rabbitmq": + from swh.provenance.storage.rabbitmq.client import ( + ProvenanceStorageRabbitMQClient, + ) + + rmq_storage = ProvenanceStorageRabbitMQClient(**kwargs) + if TYPE_CHECKING: + assert isinstance(rmq_storage, ProvenanceStorageInterface) + return rmq_storage + + raise ValueError diff --git a/swh/provenance/interface.py b/swh/provenance/storage/interface.py copy from swh/provenance/interface.py copy to swh/provenance/storage/interface.py --- a/swh/provenance/interface.py +++ b/swh/provenance/storage/interface.py @@ -16,8 +16,6 @@ from swh.core.api import remote_api_endpoint from swh.model.model import Sha1Git -from .model import DirectoryEntry, FileEntry, OriginEntry, RevisionEntry - class EntityType(enum.Enum): CONTENT = "content" @@ -231,170 +229,3 @@ @remote_api_endpoint("with_path") def with_path(self) -> bool: ... - - -@runtime_checkable -class ProvenanceInterface(Protocol): - storage: ProvenanceStorageInterface - - def __enter__(self) -> ProvenanceInterface: - ... - - def __exit__( - self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - ... - - def close(self) -> None: - """Close connection to the underlying `storage` and release resources.""" - ... - - def flush(self) -> None: - """Flush internal cache to the underlying `storage`.""" - ... - - def flush_if_necessary(self) -> bool: - """Flush internal cache to the underlying `storage`, if the cache reached - a threshold (MAX_CACHE_ELEMENTS). - Return True if the cache is flushed, false otherwise. - """ - ... - - def content_add_to_directory( - self, directory: DirectoryEntry, blob: FileEntry, prefix: bytes - ) -> None: - """Associate `blob` with `directory` in the provenance model. `prefix` is the - relative path from `directory` to `blob` (excluding `blob`'s name). - """ - ... - - def content_add_to_revision( - self, revision: RevisionEntry, blob: FileEntry, prefix: bytes - ) -> None: - """Associate `blob` with `revision` in the provenance model. `prefix` is the - absolute path from `revision`'s root directory to `blob` (excluding `blob`'s - name). - """ - ... - - def content_find_first(self, id: Sha1Git) -> Optional[ProvenanceResult]: - """Retrieve the first occurrence of the blob identified by `id`.""" - ... - - def content_find_all( - self, id: Sha1Git, limit: Optional[int] = None - ) -> Generator[ProvenanceResult, None, None]: - """Retrieve all the occurrences of the blob identified by `id`.""" - ... - - def content_get_early_date(self, blob: FileEntry) -> Optional[datetime]: - """Retrieve the earliest known date of `blob`.""" - ... - - def content_get_early_dates( - self, blobs: Iterable[FileEntry] - ) -> Dict[Sha1Git, datetime]: - """Retrieve the earliest known date for each blob in `blobs`. If some blob has - no associated date, it is not present in the resulting dictionary. - """ - ... - - def content_set_early_date(self, blob: FileEntry, date: datetime) -> None: - """Associate `date` to `blob` as it's earliest known date.""" - ... - - def directory_add_to_revision( - self, revision: RevisionEntry, directory: DirectoryEntry, path: bytes - ) -> None: - """Associate `directory` with `revision` in the provenance model. `path` is the - absolute path from `revision`'s root directory to `directory` (including - `directory`'s name). - """ - ... - - def directory_already_flattenned(self, directory: DirectoryEntry) -> Optional[bool]: - """Check if the directory is already flattenned in the provenance model. If the - directory is unknown for the model, the methods returns None. - """ - ... - - def directory_flag_as_flattenned(self, directory: DirectoryEntry) -> None: - """Mark the directory as flattenned in the provenance model. If the - directory is unknown for the model, this method has no effect. - """ - ... - - def directory_get_date_in_isochrone_frontier( - self, directory: DirectoryEntry - ) -> Optional[datetime]: - """Retrieve the earliest known date of `directory` as an isochrone frontier in - the provenance model. - """ - ... - - def directory_get_dates_in_isochrone_frontier( - self, dirs: Iterable[DirectoryEntry] - ) -> Dict[Sha1Git, datetime]: - """Retrieve the earliest known date for each directory in `dirs` as isochrone - frontiers provenance model. If some directory has no associated date, it is not - present in the resulting dictionary. - """ - ... - - def directory_set_date_in_isochrone_frontier( - self, directory: DirectoryEntry, date: datetime - ) -> None: - """Associate `date` to `directory` as it's earliest known date as an isochrone - frontier in the provenance model. - """ - ... - - def open(self) -> None: - """Open connection to the underlying `storage` and allocate necessary - resources. - """ - ... - - def origin_add(self, origin: OriginEntry) -> None: - """Add `origin` to the provenance model.""" - ... - - def revision_add(self, revision: RevisionEntry) -> None: - """Add `revision` to the provenance model. This implies storing `revision`'s - date in the model, thus `revision.date` must be a valid date. - """ - ... - - def revision_add_before_revision( - self, head_id: Sha1Git, revision_id: Sha1Git - ) -> None: - """Associate `revision_id` to `head_id` as an ancestor of the latter.""" - ... - - def revision_add_to_origin( - self, origin: OriginEntry, revision: RevisionEntry - ) -> None: - """Associate `revision` to `origin` as a head revision of the latter (ie. the - target of an snapshot for `origin` in the archive).""" - ... - - def revision_is_head(self, revision: RevisionEntry) -> bool: - """Check if `revision` is associated as a head revision for some origin.""" - ... - - def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]: - """Retrieve the date associated to `revision`.""" - ... - - def revision_get_preferred_origin(self, revision_id: Sha1Git) -> Optional[Sha1Git]: - """Retrieve the preferred origin associated to `revision`.""" - ... - - def revision_set_preferred_origin( - self, origin: OriginEntry, revision_id: Sha1Git - ) -> None: - """Associate `origin` as the preferred origin for `revision`.""" - ... diff --git a/swh/provenance/postgresql/provenance.py b/swh/provenance/storage/postgresql.py rename from swh/provenance/postgresql/provenance.py rename to swh/provenance/storage/postgresql.py --- a/swh/provenance/postgresql/provenance.py +++ b/swh/provenance/storage/postgresql.py @@ -19,8 +19,7 @@ from swh.core.db import BaseDb from swh.core.statsd import statsd from swh.model.model import Sha1Git - -from ..interface import ( +from swh.provenance.storage.interface import ( DirectoryData, EntityType, ProvenanceResult, diff --git a/swh/provenance/api/__init__.py b/swh/provenance/storage/rabbitmq/__init__.py rename from swh/provenance/api/__init__.py rename to swh/provenance/storage/rabbitmq/__init__.py diff --git a/swh/provenance/api/client.py b/swh/provenance/storage/rabbitmq/client.py rename from swh/provenance/api/client.py rename to swh/provenance/storage/rabbitmq/client.py --- a/swh/provenance/api/client.py +++ b/swh/provenance/storage/rabbitmq/client.py @@ -24,9 +24,13 @@ from swh.core.api.serializers import encode_data_client as encode_data from swh.core.api.serializers import msgpack_loads as decode_data from swh.core.statsd import statsd +from swh.provenance.storage import get_provenance_storage +from swh.provenance.storage.interface import ( + ProvenanceStorageInterface, + RelationData, + RelationType, +) -from .. import get_provenance_storage -from ..interface import ProvenanceStorageInterface, RelationData, RelationType from .serializers import DECODERS, ENCODERS from .server import ProvenanceStorageRabbitMQServer diff --git a/swh/provenance/api/serializers.py b/swh/provenance/storage/rabbitmq/serializers.py rename from swh/provenance/api/serializers.py rename to swh/provenance/storage/rabbitmq/serializers.py diff --git a/swh/provenance/api/server.py b/swh/provenance/storage/rabbitmq/server.py rename from swh/provenance/api/server.py rename to swh/provenance/storage/rabbitmq/server.py --- a/swh/provenance/api/server.py +++ b/swh/provenance/storage/rabbitmq/server.py @@ -14,7 +14,7 @@ import threading from typing import Any, Callable from typing import Counter as TCounter -from typing import Dict, Generator, Iterable, List, Optional, Set, Tuple, Union, cast +from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union, cast import pika import pika.channel @@ -29,15 +29,15 @@ from swh.core.api.serializers import msgpack_loads as decode_data from swh.model.hashutil import hash_to_hex from swh.model.model import Sha1Git - -from ..interface import ( +from swh.provenance.storage.interface import ( DirectoryData, EntityType, RelationData, RelationType, RevisionData, ) -from ..util import path_id +from swh.provenance.util import path_id + from .serializers import DECODERS, ENCODERS LOG_FORMAT = ( @@ -540,6 +540,7 @@ :param int prefetch_count: Prefetch value for the RabbitMQ connection when receiving messaged + """ self._workers: List[ProvenanceStorageRabbitMQWorker] = [] for exchange in ProvenanceStorageRabbitMQServer.get_exchanges(): @@ -582,7 +583,7 @@ self._running = False @staticmethod - def get_binding_keys(exchange: str, range: int) -> Generator[str, None, None]: + def get_binding_keys(exchange: str, range: int) -> Iterator[str]: for meth_name, relation in ProvenanceStorageRabbitMQServer.get_meth_names( exchange ): @@ -611,7 +612,7 @@ return exchange @staticmethod - def get_exchanges() -> Generator[str, None, None]: + def get_exchanges() -> Iterator[str]: yield from [entity.value for entity in EntityType] + ["location"] @staticmethod @@ -624,7 +625,7 @@ @staticmethod def get_meth_names( exchange: str, - ) -> Generator[Tuple[str, Optional[RelationType]], None, None]: + ) -> Iterator[Tuple[str, Optional[RelationType]]]: if exchange == EntityType.CONTENT.value: yield from [ ("content_add", None), @@ -648,7 +649,7 @@ yield "location_add", None @staticmethod - def get_ranges(unused_exchange: str) -> Generator[int, None, None]: + def get_ranges(unused_exchange: str) -> Iterator[int]: # XXX: we might want to have a different range per exchange yield from range(ProvenanceStorageRabbitMQServer.queue_count) diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py deleted file mode 100644 diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py --- a/swh/provenance/tests/conftest.py +++ b/swh/provenance/tests/conftest.py @@ -22,11 +22,13 @@ from swh.graph.http_rpc_server import make_app from swh.journal.serializers import msgpack_ext_hook from swh.model.model import BaseModel, TimestampWithTimezone -from swh.provenance import get_provenance, get_provenance_storage -from swh.provenance.archive import ArchiveInterface -from swh.provenance.interface import ProvenanceInterface, ProvenanceStorageInterface -from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql -from swh.provenance.storage.archive import ArchiveStorage +from swh.provenance import get_provenance +from swh.provenance.archive.interface import ArchiveInterface +from swh.provenance.archive.storage import ArchiveStorage +from swh.provenance.interface import ProvenanceInterface +from swh.provenance.storage import get_provenance_storage +from swh.provenance.storage.interface import ProvenanceStorageInterface +from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql from swh.storage.interface import StorageInterface from swh.storage.replay import OBJECT_CONVERTERS, OBJECT_FIXERS, process_replay_objects diff --git a/swh/provenance/tests/test_archive_interface.py b/swh/provenance/tests/test_archive_interface.py --- a/swh/provenance/tests/test_archive_interface.py +++ b/swh/provenance/tests/test_archive_interface.py @@ -30,10 +30,10 @@ ) from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID from swh.provenance.archive import ArchiveInterface -from swh.provenance.multiplexer.archive import ArchiveMultiplexed -from swh.provenance.postgresql.archive import ArchivePostgreSQL -from swh.provenance.storage.archive import ArchiveStorage -from swh.provenance.swhgraph.archive import ArchiveGraph +from swh.provenance.archive.multiplexer import ArchiveMultiplexed +from swh.provenance.archive.postgresql import ArchivePostgreSQL +from swh.provenance.archive.storage import ArchiveStorage +from swh.provenance.archive.swhgraph import ArchiveGraph from swh.provenance.tests.conftest import fill_storage, grpc_server, load_repo_data from swh.storage.interface import StorageInterface from swh.storage.postgresql.storage import Storage diff --git a/swh/provenance/tests/test_conflict_resolution.py b/swh/provenance/tests/test_conflict_resolution.py --- a/swh/provenance/tests/test_conflict_resolution.py +++ b/swh/provenance/tests/test_conflict_resolution.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,13 +8,13 @@ from swh.model.hashutil import hash_to_bytes from swh.model.model import Sha1Git -from swh.provenance.api.server import ( +from swh.provenance.storage.interface import DirectoryData, RelationData, RevisionData +from swh.provenance.storage.rabbitmq.server import ( resolve_dates, resolve_directory, resolve_relation, resolve_revision, ) -from swh.provenance.interface import DirectoryData, RelationData, RevisionData def test_resolve_dates() -> None: diff --git a/swh/provenance/tests/test_conftest.py b/swh/provenance/tests/test_conftest.py --- a/swh/provenance/tests/test_conftest.py +++ b/swh/provenance/tests/test_conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/provenance/tests/test_consistency.py b/swh/provenance/tests/test_consistency.py --- a/swh/provenance/tests/test_consistency.py +++ b/swh/provenance/tests/test_consistency.py @@ -1,17 +1,14 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.model.hashutil import hash_to_bytes from swh.provenance.archive import ArchiveInterface -from swh.provenance.interface import ( - DirectoryData, - ProvenanceInterface, - ProvenanceResult, -) +from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import RevisionEntry from swh.provenance.revision import revision_add +from swh.provenance.storage.interface import DirectoryData, ProvenanceResult from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt diff --git a/swh/provenance/tests/test_directory_flatten.py b/swh/provenance/tests/test_directory_flatten.py --- a/swh/provenance/tests/test_directory_flatten.py +++ b/swh/provenance/tests/test_directory_flatten.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -10,13 +10,9 @@ from swh.model.hashutil import hash_to_bytes from swh.provenance.archive import ArchiveInterface from swh.provenance.directory import directory_add, directory_flatten_range -from swh.provenance.interface import ( - DirectoryData, - ProvenanceInterface, - RelationData, - RelationType, -) +from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import DirectoryEntry, FileEntry +from swh.provenance.storage.interface import DirectoryData, RelationData, RelationType from swh.provenance.tests.conftest import fill_storage, load_repo_data diff --git a/swh/provenance/tests/test_directory_iterator.py b/swh/provenance/tests/test_directory_iterator.py --- a/swh/provenance/tests/test_directory_iterator.py +++ b/swh/provenance/tests/test_directory_iterator.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py --- a/swh/provenance/tests/test_history_graph.py +++ b/swh/provenance/tests/test_history_graph.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/provenance/tests/test_init.py b/swh/provenance/tests/test_init.py --- a/swh/provenance/tests/test_init.py +++ b/swh/provenance/tests/test_init.py @@ -3,10 +3,10 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.provenance import get_archive -from swh.provenance.multiplexer.archive import ArchiveMultiplexed -from swh.provenance.storage.archive import ArchiveStorage -from swh.provenance.swhgraph.archive import ArchiveGraph +from swh.provenance.archive import get_archive +from swh.provenance.archive.multiplexer import ArchiveMultiplexed +from swh.provenance.archive.storage import ArchiveStorage +from swh.provenance.archive.swhgraph import ArchiveGraph def test_multiplexer_configuration(): diff --git a/swh/provenance/tests/test_isochrone_graph.py b/swh/provenance/tests/test_isochrone_graph.py --- a/swh/provenance/tests/test_isochrone_graph.py +++ b/swh/provenance/tests/test_isochrone_graph.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/provenance/tests/test_origin_revision_layer.py b/swh/provenance/tests/test_origin_revision_layer.py --- a/swh/provenance/tests/test_origin_revision_layer.py +++ b/swh/provenance/tests/test_origin_revision_layer.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -12,9 +12,10 @@ from swh.model.hashutil import hash_to_bytes from swh.model.model import Sha1Git from swh.provenance.archive import ArchiveInterface -from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType +from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import OriginEntry from swh.provenance.origin import origin_add +from swh.provenance.storage.interface import EntityType, RelationType from swh.provenance.tests.conftest import fill_storage, get_datafile, load_repo_data diff --git a/swh/provenance/tests/test_provenance_db.py b/swh/provenance/tests/test_provenance_db.py --- a/swh/provenance/tests/test_provenance_db.py +++ b/swh/provenance/tests/test_provenance_db.py @@ -1,10 +1,10 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.provenance.interface import ProvenanceInterface -from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql +from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql def test_provenance_flavor(provenance: ProvenanceInterface) -> None: diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py --- a/swh/provenance/tests/test_provenance_storage.py +++ b/swh/provenance/tests/test_provenance_storage.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,20 +13,20 @@ from swh.model.hashutil import hash_to_bytes from swh.model.model import Origin, Sha1Git from swh.provenance.archive import ArchiveInterface -from swh.provenance.interface import ( +from swh.provenance.interface import ProvenanceInterface +from swh.provenance.model import OriginEntry, RevisionEntry +from swh.provenance.origin import origin_add +from swh.provenance.provenance import Provenance +from swh.provenance.revision import revision_add +from swh.provenance.storage.interface import ( DirectoryData, EntityType, - ProvenanceInterface, ProvenanceResult, ProvenanceStorageInterface, RelationData, RelationType, RevisionData, ) -from swh.provenance.model import OriginEntry, RevisionEntry -from swh.provenance.origin import origin_add -from swh.provenance.provenance import Provenance -from swh.provenance.revision import revision_add from swh.provenance.tests.conftest import fill_storage, load_repo_data, ts2dt diff --git a/swh/provenance/tests/test_provenance_storage_rabbitmq.py b/swh/provenance/tests/test_provenance_storage_rabbitmq.py --- a/swh/provenance/tests/test_provenance_storage_rabbitmq.py +++ b/swh/provenance/tests/test_provenance_storage_rabbitmq.py @@ -1,9 +1,14 @@ +# Copyright (C) 2021-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from typing import Any, Dict, Generator import pytest from swh.provenance import get_provenance_storage -from swh.provenance.interface import ProvenanceStorageInterface +from swh.provenance.storage.interface import ProvenanceStorageInterface from .test_provenance_storage import TestProvenanceStorage # noqa: F401 @@ -15,7 +20,7 @@ ) -> Generator[ProvenanceStorageInterface, None, None]: """Return a working and initialized ProvenanceStorageInterface object""" - from swh.provenance.api.server import ProvenanceStorageRabbitMQServer + from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer host = rabbitmq.args["host"] port = rabbitmq.args["port"] diff --git a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py --- a/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py +++ b/swh/provenance/tests/test_provenance_storage_with_path_denormalized.py @@ -1,9 +1,14 @@ +# Copyright (C) 2021-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from functools import partial from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module -from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql +from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql from .test_provenance_storage import TestProvenanceStorage # noqa: F401 diff --git a/swh/provenance/tests/test_provenance_storage_without_path.py b/swh/provenance/tests/test_provenance_storage_without_path.py --- a/swh/provenance/tests/test_provenance_storage_without_path.py +++ b/swh/provenance/tests/test_provenance_storage_without_path.py @@ -1,9 +1,14 @@ +# Copyright (C) 2021-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from functools import partial from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module -from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql +from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql from .test_provenance_storage import TestProvenanceStorage # noqa: F401 diff --git a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py --- a/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py +++ b/swh/provenance/tests/test_provenance_storage_without_path_denormalized.py @@ -1,9 +1,14 @@ +# Copyright (C) 2021-2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + from functools import partial from pytest_postgresql import factories from swh.core.db.db_utils import initialize_database_for_module -from swh.provenance.postgresql.provenance import ProvenanceStoragePostgreSql +from swh.provenance.storage.postgresql import ProvenanceStoragePostgreSql from .test_provenance_storage import TestProvenanceStorage # noqa: F401 diff --git a/swh/provenance/tests/test_revision_content_layer.py b/swh/provenance/tests/test_revision_content_layer.py --- a/swh/provenance/tests/test_revision_content_layer.py +++ b/swh/provenance/tests/test_revision_content_layer.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,9 +13,10 @@ from swh.model.model import Sha1Git from swh.provenance.archive import ArchiveInterface from swh.provenance.directory import directory_add -from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType +from swh.provenance.interface import ProvenanceInterface from swh.provenance.model import DirectoryEntry, RevisionEntry from swh.provenance.revision import revision_add +from swh.provenance.storage.interface import EntityType, RelationType from swh.provenance.tests.conftest import ( fill_storage, get_datafile, diff --git a/swh/provenance/tests/test_revision_iterator.py b/swh/provenance/tests/test_revision_iterator.py --- a/swh/provenance/tests/test_revision_iterator.py +++ b/swh/provenance/tests/test_revision_iterator.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/provenance/tests/test_routing_keys.py b/swh/provenance/tests/test_routing_keys.py --- a/swh/provenance/tests/test_routing_keys.py +++ b/swh/provenance/tests/test_routing_keys.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -6,8 +6,8 @@ import pytest from swh.model.hashutil import hash_to_bytes -from swh.provenance.api.server import ProvenanceStorageRabbitMQServer -from swh.provenance.interface import RelationType +from swh.provenance.storage.interface import RelationType +from swh.provenance.storage.rabbitmq.server import ProvenanceStorageRabbitMQServer def test_routing_keys_for_entity() -> None: diff --git a/swh/provenance/tests/test_split_ranges.py b/swh/provenance/tests/test_split_ranges.py --- a/swh/provenance/tests/test_split_ranges.py +++ b/swh/provenance/tests/test_split_ranges.py @@ -1,4 +1,4 @@ -# Copyright (C) 2021 The Software Heritage developers +# Copyright (C) 2021-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -8,8 +8,8 @@ import pytest from swh.model.hashutil import hash_to_bytes -from swh.provenance.api.client import split_ranges -from swh.provenance.interface import RelationData, RelationType +from swh.provenance.storage.interface import RelationData, RelationType +from swh.provenance.storage.rabbitmq.client import split_ranges def test_split_ranges_for_relation() -> None: