Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_storage.py
# Copyright (C) 2021-2022 The Software Heritage developers | # Copyright (C) 2021-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
vlorentz: ditto | |||||
import hashlib | |||||
import inspect | import inspect | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional, Set, Tuple | from typing import Any, Dict, Iterable, Optional, Set, Tuple | ||||
import pytest | import pytest | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Origin, Sha1Git | from swh.model.model import Origin, Sha1Git | ||||
▲ Show 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | class TestProvenanceStorage: | ||||
) -> None: | ) -> None: | ||||
"""Tests location methods for every `ProvenanceStorageInterface` implementation.""" | """Tests location methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
# Add all names of entries present in the directories of the current repo as paths | # Add all names of entries present in the directories of the current repo as paths | ||||
# to the storage. Then check that the returned results when querying are the same. | # to the storage. Then check that the returned results when querying are the same. | ||||
paths = {entry["name"] for dir in data["directory"] for entry in dir["entries"]} | paths = { | ||||
hashlib.sha1(entry["name"]).digest(): entry["name"] | |||||
for dir in data["directory"] | |||||
for entry in dir["entries"] | |||||
} | |||||
assert provenance_storage.location_add(paths) | assert provenance_storage.location_add(paths) | ||||
if provenance_storage.with_path(): | if provenance_storage.with_path(): | ||||
assert provenance_storage.location_get_all() == paths | assert provenance_storage.location_get_all() == paths | ||||
else: | else: | ||||
assert provenance_storage.location_get_all() == set() | assert not provenance_storage.location_get_all() | ||||
@pytest.mark.origin_layer | @pytest.mark.origin_layer | ||||
def test_provenance_storage_origin( | def test_provenance_storage_origin( | ||||
self, | self, | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | """Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | ||||
Show All 21 Lines | ) -> None: | ||||
# Test revision methods. | # Test revision methods. | ||||
# Add all revisions present in the current repo to the storage, assigning their | # Add all revisions present in the current repo to the storage, assigning their | ||||
# dates and an arbitrary origin to each one. Then check that the returned results | # dates and an arbitrary origin to each one. Then check that the returned results | ||||
# when querying are the same. | # when querying are the same. | ||||
origin = Origin(url=next(iter(data["origin"]))["url"]) | origin = Origin(url=next(iter(data["origin"]))["url"]) | ||||
# Origin must be inserted in advance. | # Origin must be inserted in advance. | ||||
assert provenance_storage.origin_add({origin.id: origin.url}) | assert provenance_storage.origin_add({origin.id: origin.url}) | ||||
revs = {rev["id"] for idx, rev in enumerate(data["revision"]) if idx % 6 == 0} | revs = {rev["id"] for idx, rev in enumerate(data["revision"])} | ||||
rev_data = { | rev_data = { | ||||
rev["id"]: RevisionData( | rev["id"]: RevisionData( | ||||
date=ts2dt(rev["date"]) if idx % 2 != 0 else None, | date=ts2dt(rev["date"]) if idx % 2 != 0 else None, | ||||
origin=origin.id if idx % 3 != 0 else None, | origin=origin.id if idx % 3 != 0 else None, | ||||
) | ) | ||||
for idx, rev in enumerate(data["revision"]) | for idx, rev in enumerate(data["revision"]) | ||||
if idx % 6 != 0 | |||||
} | } | ||||
assert revs | assert revs | ||||
assert provenance_storage.revision_add(revs) | |||||
assert provenance_storage.revision_add(rev_data) | assert provenance_storage.revision_add(rev_data) | ||||
assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data | assert provenance_storage.revision_get(set(rev_data.keys())) == { | ||||
assert provenance_storage.entity_get_all(EntityType.REVISION) == revs | set( | k: v | ||||
rev_data.keys() | for (k, v) in rev_data.items() | ||||
) | if v.date is not None or v.origin is not None | ||||
} | |||||
assert provenance_storage.entity_get_all(EntityType.REVISION) == set(rev_data) | |||||
def test_provenance_storage_relation_revision_layer( | def test_provenance_storage_relation_revision_layer( | ||||
self, | self, | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests relation methods for every `ProvenanceStorageInterface` implementation.""" | """Tests relation methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
▲ Show 20 Lines • Show All 301 Lines • ▼ Show 20 Lines | ) -> None: | ||||
srcs = {sha1 for sha1 in data} | srcs = {sha1 for sha1 in data} | ||||
if src != "origin": | if src != "origin": | ||||
assert entity_add(storage, EntityType(src), srcs) | assert entity_add(storage, EntityType(src), srcs) | ||||
dsts = {rel.dst for rels in data.values() for rel in rels} | dsts = {rel.dst for rels in data.values() for rel in rels} | ||||
if dst != "origin": | if dst != "origin": | ||||
assert entity_add(storage, EntityType(dst), dsts) | assert entity_add(storage, EntityType(dst), dsts) | ||||
if storage.with_path(): | if storage.with_path(): | ||||
assert storage.location_add( | assert storage.location_add( | ||||
{rel.path for rels in data.values() for rel in rels if rel.path is not None} | { | ||||
hashlib.sha1(rel.path).digest(): rel.path | |||||
for rels in data.values() | |||||
for rel in rels | |||||
if rel.path is not None | |||||
} | |||||
) | ) | ||||
assert data | assert data | ||||
assert storage.relation_add(relation, data) | assert storage.relation_add(relation, data) | ||||
for src_sha1 in srcs: | for src_sha1 in srcs: | ||||
relation_compare_result( | relation_compare_result( | ||||
storage.relation_get(relation, [src_sha1]), | storage.relation_get(relation, [src_sha1]), | ||||
Show All 34 Lines |
ditto