Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_storage.py
Show First 20 Lines • Show All 105 Lines • ▼ Show 20 Lines | ) -> None: | ||||
"""Tests every ProvenanceStorageInterface implementation against the one provided | """Tests every ProvenanceStorageInterface implementation against the one provided | ||||
for provenance.storage.""" | for provenance.storage.""" | ||||
# Read data/README.md for more details on how these datasets are generated. | # Read data/README.md for more details on how these datasets are generated. | ||||
data = load_repo_data(repo) | data = load_repo_data(repo) | ||||
# Assuming provenance.storage has the 'with-path' flavor. | # Assuming provenance.storage has the 'with-path' flavor. | ||||
assert provenance.storage.with_path() | assert provenance.storage.with_path() | ||||
# Test content methods. | |||||
# Add all content present in the current repo to both storages, just assigning their | |||||
# creation dates. Then check that the inserted content is the same in both cases. | |||||
cnt_dates = {cnt["sha1_git"]: cnt["ctime"] for cnt in data["content"]} | |||||
assert cnt_dates | |||||
assert provenance.storage.content_set_date( | |||||
cnt_dates | |||||
) == provenance_storage.content_set_date(cnt_dates) | |||||
assert provenance.storage.content_get(cnt_dates) == provenance_storage.content_get( | |||||
cnt_dates | |||||
) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.CONTENT | |||||
) == provenance_storage.entity_get_all(EntityType.CONTENT) | |||||
# Test directory methods. | |||||
# Of all directories present in the current repo, only assign a date to those | |||||
# containing blobs (picking the max date among the available ones). Then check that | |||||
# the inserted data is the same in both storages. | |||||
def getmaxdate( | |||||
dir: Dict[str, Any], cnt_dates: Dict[Sha1Git, datetime] | |||||
) -> Optional[datetime]: | |||||
dates = [ | |||||
cnt_dates[entry["target"]] | |||||
for entry in dir["entries"] | |||||
if entry["type"] == "file" | |||||
] | |||||
return max(dates) if dates else None | |||||
dir_dates = {dir["id"]: getmaxdate(dir, cnt_dates) for dir in data["directory"]} | |||||
assert dir_dates | |||||
assert provenance.storage.directory_set_date( | |||||
{sha1: date for sha1, date in dir_dates.items() if date is not None} | |||||
) == provenance_storage.directory_set_date( | |||||
{sha1: date for sha1, date in dir_dates.items() if date is not None} | |||||
) | |||||
assert provenance.storage.directory_get( | |||||
dir_dates | |||||
) == provenance_storage.directory_get(dir_dates) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.DIRECTORY | |||||
) == provenance_storage.entity_get_all(EntityType.DIRECTORY) | |||||
# Test origin methods. | # Test origin methods. | ||||
# Add all origins present in the current repo to both storages. Then check that the | # Add all origins present in the current repo to both storages. Then check that the | ||||
# inserted data is the same in both cases. | # inserted data is the same in both cases. | ||||
org_urls = { | org_urls = { | ||||
hash_to_bytes(origin_identifier(org)): org["url"] for org in data["origin"] | hash_to_bytes(origin_identifier(org)): org["url"] for org in data["origin"] | ||||
} | } | ||||
assert org_urls | assert org_urls | ||||
assert provenance.storage.origin_set_url( | assert provenance.storage.origin_set_url( | ||||
org_urls | org_urls | ||||
) == provenance_storage.origin_set_url(org_urls) | ) == provenance_storage.origin_set_url(org_urls) | ||||
assert provenance.storage.origin_get(org_urls) == provenance_storage.origin_get( | assert provenance.storage.origin_get(org_urls) == provenance_storage.origin_get( | ||||
org_urls | org_urls | ||||
) | ) | ||||
assert provenance.storage.entity_get_all( | assert provenance.storage.entity_get_all( | ||||
EntityType.ORIGIN | EntityType.ORIGIN | ||||
) == provenance_storage.entity_get_all(EntityType.ORIGIN) | ) == provenance_storage.entity_get_all(EntityType.ORIGIN) | ||||
# Test revision methods. | |||||
# Add all revisions present in the current repo to both storages, assigning their | |||||
# dataes and an arbitrary origin to each one. Then check that the inserted data is | |||||
# the same in both cases. | |||||
rev_dates = {rev["id"]: ts2dt(rev["date"]) for rev in data["revision"]} | |||||
assert rev_dates | |||||
assert provenance.storage.revision_set_date( | |||||
rev_dates | |||||
) == provenance_storage.revision_set_date(rev_dates) | |||||
rev_origins = { | |||||
rev["id"]: next(iter(org_urls)) # any arbitrary origin will do | |||||
for rev in data["revision"] | |||||
} | |||||
assert rev_origins | |||||
assert provenance.storage.revision_set_origin( | |||||
rev_origins | |||||
) == provenance_storage.revision_set_origin(rev_origins) | |||||
assert provenance.storage.revision_get( | |||||
rev_dates | |||||
) == provenance_storage.revision_get(rev_dates) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.REVISION | |||||
) == provenance_storage.entity_get_all(EntityType.REVISION) | |||||
# Test content-in-revision relation. | # Test content-in-revision relation. | ||||
# Create flat models of every root directory for the revisions in the dataset. | # Create flat models of every root directory for the revisions in the dataset. | ||||
cnt_in_rev: Set[RelationData] = set() | cnt_in_rev: Set[RelationData] = set() | ||||
for rev in data["revision"]: | for rev in data["revision"]: | ||||
root = next( | root = next( | ||||
subdir for subdir in data["directory"] if subdir["id"] == rev["directory"] | subdir for subdir in data["directory"] if subdir["id"] == rev["directory"] | ||||
) | ) | ||||
cnt_in_rev.update(dircontent(data, rev["id"], root)) | cnt_in_rev.update(dircontent(data, rev["id"], root)) | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | ) -> None: | ||||
relation_add_and_compare_result( | relation_add_and_compare_result( | ||||
RelationType.REV_BEFORE_REV, | RelationType.REV_BEFORE_REV, | ||||
rev_before_rev, | rev_before_rev, | ||||
provenance.storage, | provenance.storage, | ||||
provenance_storage, | provenance_storage, | ||||
) | ) | ||||
# Test content methods. | |||||
# Add all content present in the current repo to both storages, just assigning their | |||||
# creation dates. Then check that the inserted content is the same in both cases. | |||||
cnt_dates = {cnt["sha1_git"]: cnt["ctime"] for cnt in data["content"]} | |||||
assert cnt_dates | |||||
assert provenance.storage.content_set_date( | |||||
cnt_dates | |||||
) == provenance_storage.content_set_date(cnt_dates) | |||||
assert provenance.storage.content_get(cnt_dates) == provenance_storage.content_get( | |||||
cnt_dates | |||||
) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.CONTENT | |||||
) == provenance_storage.entity_get_all(EntityType.CONTENT) | |||||
# Test directory methods. | |||||
# Of all directories present in the current repo, only assign a date to those | |||||
# containing blobs (picking the max date among the available ones). Then check that | |||||
# the inserted data is the same in both storages. | |||||
def getmaxdate( | |||||
dir: Dict[str, Any], cnt_dates: Dict[Sha1Git, datetime] | |||||
) -> Optional[datetime]: | |||||
dates = [ | |||||
cnt_dates[entry["target"]] | |||||
for entry in dir["entries"] | |||||
if entry["type"] == "file" | |||||
] | |||||
return max(dates) if dates else None | |||||
dir_dates = {dir["id"]: getmaxdate(dir, cnt_dates) for dir in data["directory"]} | |||||
assert dir_dates | |||||
assert provenance.storage.directory_set_date( | |||||
{sha1: date for sha1, date in dir_dates.items() if date is not None} | |||||
) == provenance_storage.directory_set_date( | |||||
{sha1: date for sha1, date in dir_dates.items() if date is not None} | |||||
) | |||||
assert provenance.storage.directory_get( | |||||
dir_dates | |||||
) == provenance_storage.directory_get(dir_dates) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.DIRECTORY | |||||
) == provenance_storage.entity_get_all(EntityType.DIRECTORY) | |||||
# Test revision methods. | |||||
# Add all revisions present in the current repo to both storages, assigning their | |||||
# dataes and an arbitrary origin to each one. Then check that the inserted data is | |||||
# the same in both cases. | |||||
rev_dates = {rev["id"]: ts2dt(rev["date"]) for rev in data["revision"]} | |||||
assert rev_dates | |||||
assert provenance.storage.revision_set_date( | |||||
rev_dates | |||||
) == provenance_storage.revision_set_date(rev_dates) | |||||
rev_origins = { | |||||
rev["id"]: next(iter(org_urls)) # any arbitrary origin will do | |||||
for rev in data["revision"] | |||||
} | |||||
assert rev_origins | |||||
assert provenance.storage.revision_set_origin( | |||||
rev_origins | |||||
) == provenance_storage.revision_set_origin(rev_origins) | |||||
assert provenance.storage.revision_get( | |||||
rev_dates | |||||
) == provenance_storage.revision_get(rev_dates) | |||||
assert provenance.storage.entity_get_all( | |||||
EntityType.REVISION | |||||
) == provenance_storage.entity_get_all(EntityType.REVISION) | |||||
# Test location_get. | # Test location_get. | ||||
if provenance_storage.with_path(): | if provenance_storage.with_path(): | ||||
assert provenance.storage.location_get() == provenance_storage.location_get() | assert provenance.storage.location_get() == provenance_storage.location_get() | ||||
# Test content_find_first and content_find_all. | # Test content_find_first and content_find_all. | ||||
def adapt_result( | def adapt_result( | ||||
result: Optional[ProvenanceResult], with_path: bool | result: Optional[ProvenanceResult], with_path: bool | ||||
) -> Optional[ProvenanceResult]: | ) -> Optional[ProvenanceResult]: | ||||
if result is not None: | if result is not None: | ||||
return ProvenanceResult( | return ProvenanceResult( | ||||
result.content, | result.content, | ||||
result.revision, | result.revision, | ||||
result.date, | result.date, | ||||
result.origin, | result.origin, | ||||
result.path if with_path else b"", | result.path if with_path else b"", | ||||
) | ) | ||||
return result | return result | ||||
for cnt in cnt_dates: | for cnt in cnt_dates: | ||||
aeviso: This empty line should be removed | |||||
assert adapt_result( | assert adapt_result( | ||||
provenance.storage.content_find_first(cnt), provenance_storage.with_path() | provenance.storage.content_find_first(cnt), provenance_storage.with_path() | ||||
) == provenance_storage.content_find_first(cnt) | ) == provenance_storage.content_find_first(cnt) | ||||
assert { | assert { | ||||
adapt_result(occur, provenance_storage.with_path()) | adapt_result(occur, provenance_storage.with_path()) | ||||
for occur in provenance.storage.content_find_all(cnt) | for occur in provenance.storage.content_find_all(cnt) | ||||
} == set(provenance_storage.content_find_all(cnt)) | } == set(provenance_storage.content_find_all(cnt)) | ||||
Show All 37 Lines |
This empty line should be removed