Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_storage.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
import inspect | import inspect | ||||
import os | import os | ||||
from typing import Any, Dict, Iterable, Optional, Set, Tuple | from typing import Any, Dict, Iterable, Optional, Set, Tuple | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Origin, Sha1Git | from swh.model.model import Origin, Sha1Git | ||||
from swh.provenance.archive import ArchiveInterface | from swh.provenance.archive import ArchiveInterface | ||||
from swh.provenance.interface import ( | from swh.provenance.interface import ( | ||||
DirectoryData, | |||||
EntityType, | EntityType, | ||||
ProvenanceInterface, | ProvenanceInterface, | ||||
ProvenanceResult, | ProvenanceResult, | ||||
ProvenanceStorageInterface, | ProvenanceStorageInterface, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
RevisionData, | RevisionData, | ||||
) | ) | ||||
Show All 10 Lines | |||||
) -> None: | ) -> None: | ||||
"""Tests content methods for every `ProvenanceStorageInterface` implementation.""" | """Tests content methods for every `ProvenanceStorageInterface` implementation.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
# Add all content present in the current repo to the storage, just assigning their | # Add all content present in the current repo to the storage, just assigning their | ||||
# creation dates. Then check that the returned results when querying are the same. | # creation dates. Then check that the returned results when querying are the same. | ||||
cnts = {cnt["sha1_git"] for idx, cnt in enumerate(data["content"]) if idx % 2 == 0} | |||||
cnt_dates = { | cnt_dates = { | ||||
cnt["sha1_git"]: cnt["ctime"] | cnt["sha1_git"]: cnt["ctime"] for idx, cnt in enumerate(data["content"]) | ||||
for idx, cnt in enumerate(data["content"]) | |||||
if idx % 2 == 1 | |||||
} | } | ||||
assert cnts or cnt_dates | |||||
assert provenance_storage.content_add(cnts) | |||||
assert provenance_storage.content_add(cnt_dates) | assert provenance_storage.content_add(cnt_dates) | ||||
assert provenance_storage.content_get(set(cnt_dates.keys())) == cnt_dates | assert provenance_storage.content_get(set(cnt_dates.keys())) == cnt_dates | ||||
assert provenance_storage.entity_get_all(EntityType.CONTENT) == cnts | set( | assert provenance_storage.entity_get_all(EntityType.CONTENT) == set( | ||||
cnt_dates.keys() | cnt_dates.keys() | ||||
) | ) | ||||
def test_provenance_storage_directory( | def test_provenance_storage_directory( | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests directory methods for every `ProvenanceStorageInterface` implementation.""" | """Tests directory methods for every `ProvenanceStorageInterface` implementation.""" | ||||
Show All 10 Lines | ) -> Optional[datetime]: | ||||
dates = [ | dates = [ | ||||
content["ctime"] | content["ctime"] | ||||
for entry in directory["entries"] | for entry in directory["entries"] | ||||
for content in contents | for content in contents | ||||
if entry["type"] == "file" and entry["target"] == content["sha1_git"] | if entry["type"] == "file" and entry["target"] == content["sha1_git"] | ||||
] | ] | ||||
return max(dates) if dates else None | return max(dates) if dates else None | ||||
dirs = { | flat_values = (False, True) | ||||
dir["id"] | dir_dates = {} | ||||
for dir in data["directory"] | for idx, dir in enumerate(data["directory"]): | ||||
if getmaxdate(dir, data["content"]) is None | date = getmaxdate(dir, data["content"]) | ||||
} | if date is not None: | ||||
dir_dates = { | dir_dates[dir["id"]] = DirectoryData(date=date, flat=flat_values[idx % 2]) | ||||
dir["id"]: getmaxdate(dir, data["content"]) | |||||
for dir in data["directory"] | |||||
if getmaxdate(dir, data["content"]) is not None | |||||
} | |||||
assert dirs | |||||
assert provenance_storage.directory_add(dirs) | |||||
assert provenance_storage.directory_add(dir_dates) | assert provenance_storage.directory_add(dir_dates) | ||||
assert provenance_storage.directory_get(set(dir_dates.keys())) == dir_dates | assert provenance_storage.directory_get(set(dir_dates.keys())) == dir_dates | ||||
assert provenance_storage.entity_get_all(EntityType.DIRECTORY) == dirs | set( | assert provenance_storage.entity_get_all(EntityType.DIRECTORY) == set( | ||||
dir_dates.keys() | dir_dates.keys() | ||||
) | ) | ||||
def test_provenance_storage_location( | def test_provenance_storage_location( | ||||
provenance_storage: ProvenanceStorageInterface, | provenance_storage: ProvenanceStorageInterface, | ||||
) -> None: | ) -> None: | ||||
"""Tests location methods for every `ProvenanceStorageInterface` implementation.""" | """Tests location methods for every `ProvenanceStorageInterface` implementation.""" | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | for entry in dir["entries"]: | ||||
dircontent(data, ref, child, os.path.join(prefix, entry["name"])) | dircontent(data, ref, child, os.path.join(prefix, entry["name"])) | ||||
) | ) | ||||
return content | return content | ||||
def entity_add( | def entity_add( | ||||
storage: ProvenanceStorageInterface, entity: EntityType, ids: Set[Sha1Git] | storage: ProvenanceStorageInterface, entity: EntityType, ids: Set[Sha1Git] | ||||
) -> bool: | ) -> bool: | ||||
now = datetime.now(tz=timezone.utc) | |||||
if entity == EntityType.CONTENT: | if entity == EntityType.CONTENT: | ||||
return storage.content_add({sha1: None for sha1 in ids}) | return storage.content_add({sha1: now for sha1 in ids}) | ||||
elif entity == EntityType.DIRECTORY: | elif entity == EntityType.DIRECTORY: | ||||
return storage.directory_add({sha1: None for sha1 in ids}) | return storage.directory_add( | ||||
{sha1: DirectoryData(date=now, flat=False) for sha1 in ids} | |||||
) | |||||
else: # entity == EntityType.REVISION: | else: # entity == EntityType.REVISION: | ||||
return storage.revision_add( | return storage.revision_add( | ||||
{sha1: RevisionData(date=None, origin=None) for sha1 in ids} | {sha1: RevisionData(date=None, origin=None) for sha1 in ids} | ||||
) | ) | ||||
def relation_add_and_compare_result( | def relation_add_and_compare_result( | ||||
storage: ProvenanceStorageInterface, | storage: ProvenanceStorageInterface, | ||||
▲ Show 20 Lines • Show All 262 Lines • Show Last 20 Lines |