Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_storage.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import inspect | import inspect | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional, Set, Tuple | from typing import Any, Dict, Iterable, Optional, Set, Tuple | ||||
import pytest | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Origin, Sha1Git | from swh.model.model import Origin, Sha1Git | ||||
from swh.provenance.archive import ArchiveInterface | from swh.provenance.archive import ArchiveInterface | ||||
from swh.provenance.interface import ( | from swh.provenance.interface import ( | ||||
DirectoryData, | DirectoryData, | ||||
EntityType, | EntityType, | ||||
ProvenanceInterface, | ProvenanceInterface, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
▲ Show 20 Lines • Show All 81 Lines • ▼ Show 20 Lines | ) -> None: | ||||
paths = {entry["name"] for dir in data["directory"] for entry in dir["entries"]} | paths = {entry["name"] for dir in data["directory"] for entry in dir["entries"]} | ||||
assert provenance_storage.location_add(paths) | assert provenance_storage.location_add(paths) | ||||
if provenance_storage.with_path(): | if provenance_storage.with_path(): | ||||
assert provenance_storage.location_get_all() == paths | assert provenance_storage.location_get_all() == paths | ||||
else: | else: | ||||
assert provenance_storage.location_get_all() == set() | assert provenance_storage.location_get_all() == set() | ||||
@pytest.mark.origin_layer | |||||
def test_provenance_storage_origin( | def test_provenance_storage_origin( | ||||
self, | self, | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | """Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
Show All 36 Lines | ) -> None: | ||||
assert revs | assert revs | ||||
assert provenance_storage.revision_add(revs) | assert provenance_storage.revision_add(revs) | ||||
assert provenance_storage.revision_add(rev_data) | assert provenance_storage.revision_add(rev_data) | ||||
assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data | assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data | ||||
assert provenance_storage.entity_get_all(EntityType.REVISION) == revs | set( | assert provenance_storage.entity_get_all(EntityType.REVISION) == revs | set( | ||||
rev_data.keys() | rev_data.keys() | ||||
) | ) | ||||
def test_provenance_storage_relation( | def test_provenance_storage_relation_revision_layer( | ||||
self, | self, | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests relation methods for every `ProvenanceStorageInterface` implementation.""" | """Tests relation methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
Show All 28 Lines | ) -> None: | ||||
for rev in data["revision"]: | for rev in data["revision"]: | ||||
dir_in_rev.setdefault(rev["directory"], set()).add( | dir_in_rev.setdefault(rev["directory"], set()).add( | ||||
RelationData(dst=rev["id"], path=b".") | RelationData(dst=rev["id"], path=b".") | ||||
) | ) | ||||
relation_add_and_compare_result( | relation_add_and_compare_result( | ||||
provenance_storage, RelationType.DIR_IN_REV, dir_in_rev | provenance_storage, RelationType.DIR_IN_REV, dir_in_rev | ||||
) | ) | ||||
@pytest.mark.origin_layer | |||||
def test_provenance_storage_relation_orign_layer( | |||||
self, | |||||
provenance_storage: ProvenanceStorageInterface, | |||||
) -> None: | |||||
"""Tests relation methods for every `ProvenanceStorageInterface` implementation.""" | |||||
# Read data/README.md for more details on how these datasets are generated. | |||||
data = load_repo_data("cmdbts2") | |||||
# Test revision-in-origin relation. | # Test revision-in-origin relation. | ||||
# Origins must be inserted in advance (cannot be done by `entity_add` inside | # Origins must be inserted in advance (cannot be done by `entity_add` inside | ||||
# `relation_add_and_compare_result`). | # `relation_add_and_compare_result`). | ||||
orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]} | orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]} | ||||
assert provenance_storage.origin_add(orgs) | assert provenance_storage.origin_add(orgs) | ||||
# Add all revisions that are head of some snapshot branch to the corresponding | # Add all revisions that are head of some snapshot branch to the corresponding | ||||
# origin. | # origin. | ||||
rev_in_org: Dict[Sha1Git, Set[RelationData]] = {} | rev_in_org: Dict[Sha1Git, Set[RelationData]] = {} | ||||
Show All 20 Lines | ) -> None: | ||||
for parent in rev["parents"]: | for parent in rev["parents"]: | ||||
rev_before_rev.setdefault(parent, set()).add( | rev_before_rev.setdefault(parent, set()).add( | ||||
RelationData(dst=rev["id"], path=None) | RelationData(dst=rev["id"], path=None) | ||||
) | ) | ||||
relation_add_and_compare_result( | relation_add_and_compare_result( | ||||
provenance_storage, RelationType.REV_BEFORE_REV, rev_before_rev | provenance_storage, RelationType.REV_BEFORE_REV, rev_before_rev | ||||
) | ) | ||||
def test_provenance_storage_find( | def test_provenance_storage_find_revision_layer( | ||||
self, | self, | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests `content_find_first` and `content_find_all` methods for every | """Tests `content_find_first` and `content_find_all` methods for every | ||||
`ProvenanceStorageInterface` implementation. | `ProvenanceStorageInterface` implementation. | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | ) -> None: | ||||
provenance.storage.content_find_first(cnt), | provenance.storage.content_find_first(cnt), | ||||
provenance_storage.with_path(), | provenance_storage.with_path(), | ||||
) == provenance_storage.content_find_first(cnt) | ) == provenance_storage.content_find_first(cnt) | ||||
assert { | assert { | ||||
adapt_result(occur, provenance_storage.with_path()) | adapt_result(occur, provenance_storage.with_path()) | ||||
for occur in provenance.storage.content_find_all(cnt) | for occur in provenance.storage.content_find_all(cnt) | ||||
} == set(provenance_storage.content_find_all(cnt)) | } == set(provenance_storage.content_find_all(cnt)) | ||||
@pytest.mark.origin_layer | |||||
def test_provenance_storage_find_origin_layer( | |||||
self, | |||||
provenance: ProvenanceInterface, | |||||
provenance_storage: ProvenanceStorageInterface, | |||||
archive: ArchiveInterface, | |||||
) -> None: | |||||
"""Tests `content_find_first` and `content_find_all` methods for every | |||||
`ProvenanceStorageInterface` implementation. | |||||
""" | |||||
# Read data/README.md for more details on how these datasets are generated. | |||||
data = load_repo_data("cmdbts2") | |||||
fill_storage(archive.storage, data) | |||||
# Execute the revision-content algorithm on both storages. | |||||
revisions = [ | |||||
RevisionEntry(id=rev["id"], date=ts2dt(rev["date"]), root=rev["directory"]) | |||||
for rev in data["revision"] | |||||
] | |||||
revision_add(provenance, archive, revisions) | |||||
revision_add(Provenance(provenance_storage), archive, revisions) | |||||
# Test content_find_first and content_find_all, first only executing the | |||||
# revision-content algorithm, then adding the origin-revision layer. | |||||
def adapt_result( | |||||
result: Optional[ProvenanceResult], with_path: bool | |||||
) -> Optional[ProvenanceResult]: | |||||
if result is not None: | |||||
return ProvenanceResult( | |||||
result.content, | |||||
result.revision, | |||||
result.date, | |||||
result.origin, | |||||
result.path if with_path else b"", | |||||
) | |||||
return result | |||||
# Execute the origin-revision algorithm on both storages. | # Execute the origin-revision algorithm on both storages. | ||||
origins = [ | origins = [ | ||||
OriginEntry(url=sta["origin"], snapshot=sta["snapshot"]) | OriginEntry(url=sta["origin"], snapshot=sta["snapshot"]) | ||||
for sta in data["origin_visit_status"] | for sta in data["origin_visit_status"] | ||||
if sta["snapshot"] is not None | if sta["snapshot"] is not None | ||||
] | ] | ||||
origin_add(provenance, archive, origins) | origin_add(provenance, archive, origins) | ||||
origin_add(Provenance(provenance_storage), archive, origins) | origin_add(Provenance(provenance_storage), archive, origins) | ||||
▲ Show 20 Lines • Show All 162 Lines • Show Last 20 Lines |