Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_storage.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import inspect | import inspect | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional, Set, Tuple | from typing import Any, Dict, Iterable, Optional, Set, Tuple | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.identifiers import origin_identifier | from swh.model.model import Origin, Sha1Git | ||||
from swh.model.model import Sha1Git | |||||
from swh.provenance.archive import ArchiveInterface | from swh.provenance.archive import ArchiveInterface | ||||
from swh.provenance.interface import ( | from swh.provenance.interface import ( | ||||
EntityType, | EntityType, | ||||
ProvenanceInterface, | ProvenanceInterface, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
ProvenanceStorageInterface, | ProvenanceStorageInterface, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
▲ Show 20 Lines • Show All 102 Lines • ▼ Show 20 Lines | ) -> None: | ||||
"""Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | """Tests origin methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
# Test origin methods. | # Test origin methods. | ||||
# Add all origins present in the current repo to the storage. Then check that the | # Add all origins present in the current repo to the storage. Then check that the | ||||
# returned results when querying are the same. | # returned results when querying are the same. | ||||
orgs = {hash_to_bytes(origin_identifier(org)): org["url"] for org in data["origin"]} | orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]} | ||||
assert orgs | assert orgs | ||||
assert provenance_storage.origin_add(orgs) | assert provenance_storage.origin_add(orgs) | ||||
assert provenance_storage.origin_get(set(orgs.keys())) == orgs | assert provenance_storage.origin_get(set(orgs.keys())) == orgs | ||||
assert provenance_storage.entity_get_all(EntityType.ORIGIN) == set(orgs.keys()) | assert provenance_storage.entity_get_all(EntityType.ORIGIN) == set(orgs.keys()) | ||||
def test_provenance_storage_revision( | def test_provenance_storage_revision( | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests revision methods for every `ProvenanceStorageInterface` implementation.""" | """Tests revision methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
# Test revision methods. | # Test revision methods. | ||||
# Add all revisions present in the current repo to the storage, assigning their | # Add all revisions present in the current repo to the storage, assigning their | ||||
# dates and an arbitrary origin to each one. Then check that the returned results | # dates and an arbitrary origin to each one. Then check that the returned results | ||||
# when querying are the same. | # when querying are the same. | ||||
origin = next(iter(data["origin"])) | origin = Origin(url=next(iter(data["origin"]))["url"]) | ||||
origin_sha1 = hash_to_bytes(origin_identifier(origin)) | |||||
# Origin must be inserted in advance. | # Origin must be inserted in advance. | ||||
assert provenance_storage.origin_add({origin_sha1: origin["url"]}) | assert provenance_storage.origin_add({origin.id: origin.url}) | ||||
revs = {rev["id"] for idx, rev in enumerate(data["revision"]) if idx % 6 == 0} | revs = {rev["id"] for idx, rev in enumerate(data["revision"]) if idx % 6 == 0} | ||||
rev_data = { | rev_data = { | ||||
rev["id"]: RevisionData( | rev["id"]: RevisionData( | ||||
date=ts2dt(rev["date"]) if idx % 2 != 0 else None, | date=ts2dt(rev["date"]) if idx % 2 != 0 else None, | ||||
origin=origin_sha1 if idx % 3 != 0 else None, | origin=origin.id if idx % 3 != 0 else None, | ||||
) | ) | ||||
for idx, rev in enumerate(data["revision"]) | for idx, rev in enumerate(data["revision"]) | ||||
if idx % 6 != 0 | if idx % 6 != 0 | ||||
} | } | ||||
assert revs | assert revs | ||||
assert provenance_storage.revision_add(revs) | assert provenance_storage.revision_add(revs) | ||||
assert provenance_storage.revision_add(rev_data) | assert provenance_storage.revision_add(rev_data) | ||||
assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data | assert provenance_storage.revision_get(set(rev_data.keys())) == rev_data | ||||
▲ Show 20 Lines • Show All 142 Lines • ▼ Show 20 Lines | for rev in data["revision"]: | ||||
) | ) | ||||
relation_add_and_compare_result( | relation_add_and_compare_result( | ||||
provenance_storage, RelationType.DIR_IN_REV, dir_in_rev | provenance_storage, RelationType.DIR_IN_REV, dir_in_rev | ||||
) | ) | ||||
# Test revision-in-origin relation. | # Test revision-in-origin relation. | ||||
# Origins must be inserted in advance (cannot be done by `entity_add` inside | # Origins must be inserted in advance (cannot be done by `entity_add` inside | ||||
# `relation_add_and_compare_result`). | # `relation_add_and_compare_result`). | ||||
orgs = { | orgs = {Origin(url=org["url"]).id: org["url"] for org in data["origin"]} | ||||
hash_to_bytes(origin_identifier(origin)): origin["url"] | |||||
for origin in data["origin"] | |||||
} | |||||
assert provenance_storage.origin_add(orgs) | assert provenance_storage.origin_add(orgs) | ||||
# Add all revisions that are head of some snapshot branch to the corresponding | # Add all revisions that are head of some snapshot branch to the corresponding | ||||
# origin. | # origin. | ||||
rev_in_org: Dict[Sha1Git, Set[RelationData]] = {} | rev_in_org: Dict[Sha1Git, Set[RelationData]] = {} | ||||
for status in data["origin_visit_status"]: | for status in data["origin_visit_status"]: | ||||
if status["snapshot"] is not None: | if status["snapshot"] is not None: | ||||
for snapshot in data["snapshot"]: | for snapshot in data["snapshot"]: | ||||
if snapshot["id"] == status["snapshot"]: | if snapshot["id"] == status["snapshot"]: | ||||
for branch in snapshot["branches"].values(): | for branch in snapshot["branches"].values(): | ||||
if branch["target_type"] == "revision": | if branch["target_type"] == "revision": | ||||
rev_in_org.setdefault(branch["target"], set()).add( | rev_in_org.setdefault(branch["target"], set()).add( | ||||
RelationData( | RelationData( | ||||
dst=hash_to_bytes( | dst=Origin(url=status["origin"]).id, | ||||
origin_identifier({"url": status["origin"]}) | |||||
), | |||||
path=None, | path=None, | ||||
) | ) | ||||
) | ) | ||||
relation_add_and_compare_result( | relation_add_and_compare_result( | ||||
provenance_storage, RelationType.REV_IN_ORG, rev_in_org | provenance_storage, RelationType.REV_IN_ORG, rev_in_org | ||||
) | ) | ||||
# Test revision-before-revision relation. | # Test revision-before-revision relation. | ||||
▲ Show 20 Lines • Show All 137 Lines • Show Last 20 Lines |