Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345608
D6054.id21945.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
D6054.id21945.diff
View Options
diff --git a/mypy.ini b/mypy.ini
--- a/mypy.ini
+++ b/mypy.ini
@@ -20,5 +20,8 @@
[mypy-pytest.*]
ignore_missing_imports = True
+[mypy-pytest_postgresql.*]
+ignore_missing_imports = True
+
[mypy-psycopg2.*]
ignore_missing_imports = True
diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py
--- a/swh/provenance/tests/conftest.py
+++ b/swh/provenance/tests/conftest.py
@@ -10,6 +10,7 @@
import msgpack
import psycopg2.extensions
import pytest
+from pytest_postgresql.factories import postgresql
from swh.journal.serializers import msgpack_ext_hook
from swh.provenance import get_provenance, get_provenance_storage
@@ -49,23 +50,43 @@
@pytest.fixture(params=["local", "remote"])
-def provenance(
+def provenance_storage(
request: SubRequest,
populated_db: Dict[str, str],
swh_rpc_client: RemoteProvenanceStorage,
-) -> ProvenanceInterface:
- """Return a working and initialized ProvenanceInterface object"""
+) -> ProvenanceStorageInterface:
+ """Return a working and initialized ProvenanceStorageInterface object"""
if request.param == "remote":
- from swh.provenance.provenance import Provenance
-
assert isinstance(swh_rpc_client, ProvenanceStorageInterface)
- return Provenance(swh_rpc_client)
+ return swh_rpc_client
else:
# in test sessions, we DO want to raise any exception occurring at commit time
- prov = get_provenance(cls=request.param, db=populated_db, raise_on_commit=True)
- return prov
+ storage = get_provenance_storage(
+ cls=request.param, db=populated_db, raise_on_commit=True
+ )
+ return storage
+
+
+provenance_postgresql = postgresql("postgresql_proc", dbname="provenance_tests")
+
+
+@pytest.fixture
+def provenance(
+ provenance_postgresql: psycopg2.extensions.connection,
+) -> ProvenanceInterface:
+ """Return a working and initialized ProvenanceInterface object"""
+
+ from swh.core.cli.db import populate_database_for_package
+
+ populate_database_for_package(
+ "swh.provenance", provenance_postgresql.dsn, flavor="with-path"
+ )
+ # in test sessions, we DO want to raise any exception occurring at commit time
+ return get_provenance(
+ cls="local", db=provenance_postgresql.get_dsn_parameters(), raise_on_commit=True
+ )
@pytest.fixture
diff --git a/swh/provenance/tests/test_provenance_storage.py b/swh/provenance/tests/test_provenance_storage.py
--- a/swh/provenance/tests/test_provenance_storage.py
+++ b/swh/provenance/tests/test_provenance_storage.py
@@ -3,9 +3,315 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from datetime import datetime
import inspect
+import os
+from typing import Any, Dict, Iterable, Optional, Set
-from ..interface import ProvenanceInterface, ProvenanceStorageInterface
+import pytest
+
+from swh.model.hashutil import hash_to_bytes
+from swh.model.identifiers import origin_identifier
+from swh.model.model import Sha1Git
+from swh.provenance.interface import (
+ EntityType,
+ ProvenanceInterface,
+ ProvenanceResult,
+ ProvenanceStorageInterface,
+ RelationData,
+ RelationType,
+)
+from swh.provenance.tests.conftest import load_repo_data
+from swh.provenance.tests.test_provenance_db import ts2dt
+
+
+def relation_add_and_compare_result(
+ relation: RelationType,
+ data: Set[RelationData],
+ refstorage: ProvenanceStorageInterface,
+ storage: ProvenanceStorageInterface,
+ with_path: bool = True,
+) -> None:
+ assert data
+ assert refstorage.relation_add(relation, data) == storage.relation_add(
+ relation, data
+ )
+
+ assert relation_compare_result(
+ refstorage.relation_get(relation, (reldata.src for reldata in data)),
+ storage.relation_get(relation, (reldata.src for reldata in data)),
+ with_path,
+ )
+ assert relation_compare_result(
+ refstorage.relation_get(
+ relation,
+ (reldata.dst for reldata in data),
+ reverse=True,
+ ),
+ storage.relation_get(
+ relation,
+ (reldata.dst for reldata in data),
+ reverse=True,
+ ),
+ with_path,
+ )
+ assert relation_compare_result(
+ refstorage.relation_get_all(relation),
+ storage.relation_get_all(relation),
+ with_path,
+ )
+
+
+def relation_compare_result(
+ expected: Set[RelationData], computed: Set[RelationData], with_path: bool
+) -> bool:
+ return {
+ RelationData(reldata.src, reldata.dst, reldata.path if with_path else None)
+ for reldata in expected
+ } == computed
+
+
+def dircontent(
+ data: Dict[str, Any],
+ ref: Sha1Git,
+ dir: Dict[str, Any],
+ prefix: bytes = b"",
+) -> Iterable[RelationData]:
+ content = {
+ RelationData(entry["target"], ref, os.path.join(prefix, entry["name"]))
+ for entry in dir["entries"]
+ if entry["type"] == "file"
+ }
+ for entry in dir["entries"]:
+ if entry["type"] == "dir":
+ child = next(
+ subdir
+ for subdir in data["directory"]
+ if subdir["id"] == entry["target"]
+ )
+ content.update(
+ dircontent(data, ref, child, os.path.join(prefix, entry["name"]))
+ )
+ return content
+
+
+@pytest.mark.parametrize(
+ "repo",
+ ("cmdbts2", "out-of-order", "with-merges"),
+)
+def test_provenance_storage(
+ provenance: ProvenanceInterface,
+ provenance_storage: ProvenanceStorageInterface,
+ repo: str,
+) -> None:
+ """Tests every ProvenanceStorageInterface implementation against the one provided
+ for provenance.storage."""
+ # Read data/README.md for more details on how these datasets are generated.
+ data = load_repo_data(repo)
+
+ # Assuming provenance.storage has the 'with-path' flavor.
+ assert provenance.storage.with_path()
+
+ # Test content methods.
+ # Add all content present in the current repo to both storages, just assigning their
+ # creation dates. Then check that the inserted content is the same in both cases.
+ cnt_dates = {cnt["sha1_git"]: cnt["ctime"] for cnt in data["content"]}
+ assert cnt_dates
+ assert provenance.storage.content_set_date(
+ cnt_dates
+ ) == provenance_storage.content_set_date(cnt_dates)
+
+ assert provenance.storage.content_get(cnt_dates) == provenance_storage.content_get(
+ cnt_dates
+ )
+ assert provenance.storage.entity_get_all(
+ EntityType.CONTENT
+ ) == provenance_storage.entity_get_all(EntityType.CONTENT)
+
+ # Test directory methods.
+ # Of all directories present in the current repo, only assign a date to those
+ # containing blobs (picking the max date among the available ones). Then check that
+ # the inserted data is the same in both storages.
+ def getmaxdate(
+ dir: Dict[str, Any], cnt_dates: Dict[Sha1Git, datetime]
+ ) -> Optional[datetime]:
+ dates = [
+ cnt_dates[entry["target"]]
+ for entry in dir["entries"]
+ if entry["type"] == "file"
+ ]
+ return max(dates) if dates else None
+
+ dir_dates = {dir["id"]: getmaxdate(dir, cnt_dates) for dir in data["directory"]}
+ assert dir_dates
+ assert provenance.storage.directory_set_date(
+ {sha1: date for sha1, date in dir_dates.items() if date is not None}
+ ) == provenance_storage.directory_set_date(
+ {sha1: date for sha1, date in dir_dates.items() if date is not None}
+ )
+
+ assert provenance.storage.directory_get(
+ dir_dates
+ ) == provenance_storage.directory_get(dir_dates)
+ assert provenance.storage.entity_get_all(
+ EntityType.DIRECTORY
+ ) == provenance_storage.entity_get_all(EntityType.DIRECTORY)
+
+ # Test origin methods.
+ # Add all origins present in the current repo to both storages. Then check that the
+ # inserted data is the same in both cases.
+ org_urls = {
+ hash_to_bytes(origin_identifier(org)): org["url"] for org in data["origin"]
+ }
+ assert org_urls
+ assert provenance.storage.origin_set_url(
+ org_urls
+ ) == provenance_storage.origin_set_url(org_urls)
+
+ assert provenance.storage.origin_get(org_urls) == provenance_storage.origin_get(
+ org_urls
+ )
+ assert provenance.storage.entity_get_all(
+ EntityType.ORIGIN
+ ) == provenance_storage.entity_get_all(EntityType.ORIGIN)
+
+ # Test revision methods.
+ # Add all revisions present in the current repo to both storages, assigning their
+ # dataes and an arbitrary origin to each one. Then check that the inserted data is
+ # the same in both cases.
+ rev_dates = {rev["id"]: ts2dt(rev["date"]) for rev in data["revision"]}
+ assert rev_dates
+ assert provenance.storage.revision_set_date(
+ rev_dates
+ ) == provenance_storage.revision_set_date(rev_dates)
+
+ rev_origins = {
+ rev["id"]: next(iter(org_urls)) # any arbitrary origin will do
+ for rev in data["revision"]
+ }
+ assert rev_origins
+ assert provenance.storage.revision_set_origin(
+ rev_origins
+ ) == provenance_storage.revision_set_origin(rev_origins)
+
+ assert provenance.storage.revision_get(
+ rev_dates
+ ) == provenance_storage.revision_get(rev_dates)
+ assert provenance.storage.entity_get_all(
+ EntityType.REVISION
+ ) == provenance_storage.entity_get_all(EntityType.REVISION)
+
+ # Test content-in-revision relation.
+ # Create flat models of every root directory for the revisions in the dataset.
+ cnt_in_rev: Set[RelationData] = set()
+ for rev in data["revision"]:
+ root = next(
+ subdir for subdir in data["directory"] if subdir["id"] == rev["directory"]
+ )
+ cnt_in_rev.update(dircontent(data, rev["id"], root))
+
+ relation_add_and_compare_result(
+ RelationType.CNT_EARLY_IN_REV,
+ cnt_in_rev,
+ provenance.storage,
+ provenance_storage,
+ provenance_storage.with_path(),
+ )
+
+ # Test content-in-directory relation.
+ # Create flat models for every directory in the dataset.
+ cnt_in_dir: Set[RelationData] = set()
+ for dir in data["directory"]:
+ cnt_in_dir.update(dircontent(data, dir["id"], dir))
+
+ relation_add_and_compare_result(
+ RelationType.CNT_IN_DIR,
+ cnt_in_dir,
+ provenance.storage,
+ provenance_storage,
+ provenance_storage.with_path(),
+ )
+
+ # Test content-in-directory relation.
+ # Add root directories to their correspondent revision in the dataset.
+ dir_in_rev = {
+ RelationData(rev["directory"], rev["id"], b".") for rev in data["revision"]
+ }
+
+ relation_add_and_compare_result(
+ RelationType.DIR_IN_REV,
+ dir_in_rev,
+ provenance.storage,
+ provenance_storage,
+ provenance_storage.with_path(),
+ )
+
+ # Test revision-in-origin relation.
+ # Add all revisions that are head of some snapshot branch to the corresponding
+ # origin.
+ rev_in_org = {
+ RelationData(
+ branch["target"],
+ hash_to_bytes(origin_identifier({"url": status["origin"]})),
+ None,
+ )
+ for status in data["origin_visit_status"]
+ if status["snapshot"] is not None
+ for snapshot in data["snapshot"]
+ if snapshot["id"] == status["snapshot"]
+ for _, branch in snapshot["branches"].items()
+ if branch["target_type"] == "revision"
+ }
+
+ relation_add_and_compare_result(
+ RelationType.REV_IN_ORG,
+ rev_in_org,
+ provenance.storage,
+ provenance_storage,
+ )
+
+ # Test revision-before-revision relation.
+ # For each revision in the data set add an entry for each parent to the relation.
+ rev_before_rev = {
+ RelationData(parent, rev["id"], None)
+ for rev in data["revision"]
+ for parent in rev["parents"]
+ }
+
+ relation_add_and_compare_result(
+ RelationType.REV_BEFORE_REV,
+ rev_before_rev,
+ provenance.storage,
+ provenance_storage,
+ )
+
+ # Test location_get.
+ if provenance_storage.with_path():
+ assert provenance.storage.location_get() == provenance_storage.location_get()
+
+ # Test content_find_first and content_find_all.
+ def adapt_result(
+ result: Optional[ProvenanceResult], with_path: bool
+ ) -> Optional[ProvenanceResult]:
+ if result is not None:
+ return ProvenanceResult(
+ result.content,
+ result.revision,
+ result.date,
+ result.origin,
+ result.path if with_path else b"",
+ )
+ return result
+
+ for cnt in cnt_dates:
+ assert adapt_result(
+ provenance.storage.content_find_first(cnt), provenance_storage.with_path()
+ ) == provenance_storage.content_find_first(cnt)
+
+ assert {
+ adapt_result(occur, provenance_storage.with_path())
+ for occur in provenance.storage.content_find_all(cnt)
+ } == set(provenance_storage.content_find_all(cnt))
def test_types(provenance: ProvenanceInterface) -> None:
@@ -14,7 +320,6 @@
# Create an instance of the protocol (which cannot be instantiated
# directly, so this creates a subclass, then instantiates it)
interface = type("_", (ProvenanceStorageInterface,), {})()
- storage = provenance.storage
assert "content_find_first" in dir(interface)
@@ -25,7 +330,7 @@
continue
interface_meth = getattr(interface, meth_name)
try:
- concrete_meth = getattr(storage, meth_name)
+ concrete_meth = getattr(provenance.storage, meth_name)
except AttributeError:
if not getattr(interface_meth, "deprecated_endpoint", False):
# The backend is missing a (non-deprecated) endpoint
@@ -43,4 +348,4 @@
# But there's no harm in double-checking.
# And we could replace the assertions above by this one, but unlike
# the assertions above, it doesn't explain what is missing.
- assert isinstance(storage, ProvenanceStorageInterface)
+ assert isinstance(provenance.storage, ProvenanceStorageInterface)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:26 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3234244
Attached To
D6054: Add test for the different `ProvenanceStorageInterface` implementations
Event Timeline
Log In to Comment