Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_directory_flatten.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | from datetime import datetime, timezone | ||||
from typing import Tuple | |||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.provenance.archive import ArchiveInterface | from swh.provenance.archive import ArchiveInterface | ||||
from swh.provenance.directory import directory_add | from swh.provenance.directory import directory_add, directory_flatten_range | ||||
from swh.provenance.interface import ( | from swh.provenance.interface import ( | ||||
DirectoryData, | DirectoryData, | ||||
ProvenanceInterface, | ProvenanceInterface, | ||||
RelationData, | RelationData, | ||||
RelationType, | RelationType, | ||||
) | ) | ||||
from swh.provenance.model import DirectoryEntry, FileEntry | from swh.provenance.model import DirectoryEntry, FileEntry | ||||
from swh.provenance.tests.conftest import fill_storage, load_repo_data | from swh.provenance.tests.conftest import fill_storage, load_repo_data | ||||
def test_directory_add( | def prepare( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, archive: ArchiveInterface | ||||
archive: ArchiveInterface, | ) -> Tuple[datetime, DirectoryEntry, FileEntry, FileEntry]: | ||||
) -> None: | """Prepare the provenance database with some content suitable for flattening tests""" | ||||
# read data/README.md for more details on how these datasets are generated | # read data/README.md for more details on how these datasets are generated | ||||
data = load_repo_data("cmdbts2") | data = load_repo_data("cmdbts2") | ||||
fill_storage(archive.storage, data) | fill_storage(archive.storage, data) | ||||
# just take a directory that is known to exists in cmdbts2 | # just take a directory that is known to exists in cmdbts2 | ||||
directory = DirectoryEntry( | directory = DirectoryEntry( | ||||
id=hash_to_bytes("48007c961cc734d1f63886d0413a6dc605e3e2ea") | id=hash_to_bytes("48007c961cc734d1f63886d0413a6dc605e3e2ea") | ||||
) | ) | ||||
Show All 19 Lines | assert provenance.storage.content_get([content1.id, content2.id]) == { | ||||
content2.id: date, | content2.id: date, | ||||
} | } | ||||
# this query forces the directory date to be retrieved from the storage and cached | # this query forces the directory date to be retrieved from the storage and cached | ||||
# (otherwise, the flush below won't update the directory flatten flag) | # (otherwise, the flush below won't update the directory flatten flag) | ||||
flattenned = provenance.directory_already_flattenned(directory) | flattenned = provenance.directory_already_flattenned(directory) | ||||
assert flattenned is not None and not flattenned | assert flattenned is not None and not flattenned | ||||
return date, directory, content1, content2 | |||||
def test_directory_add( | |||||
provenance: ProvenanceInterface, | |||||
archive: ArchiveInterface, | |||||
) -> None: | |||||
date, directory, content1, content2 = prepare(provenance, archive) | |||||
# flatten the directory and check the expected result | # flatten the directory and check the expected result | ||||
directory_add(provenance, archive, [directory]) | directory_add(provenance, archive, [directory]) | ||||
assert provenance.storage.directory_get([directory.id]) == { | assert provenance.storage.directory_get([directory.id]) == { | ||||
directory.id: DirectoryData(date=date, flat=True) | directory.id: DirectoryData(date=date, flat=True) | ||||
} | } | ||||
assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == { | assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == { | ||||
content1.id: { | content1.id: { | ||||
RelationData(dst=directory.id, path=b"a"), | RelationData(dst=directory.id, path=b"a"), | ||||
RelationData(dst=directory.id, path=b"C/a"), | RelationData(dst=directory.id, path=b"C/a"), | ||||
}, | }, | ||||
content2.id: {RelationData(dst=directory.id, path=b"C/b")}, | content2.id: {RelationData(dst=directory.id, path=b"C/b")}, | ||||
} | } | ||||
def test_directory_flatten_range( | |||||
provenance: ProvenanceInterface, | |||||
archive: ArchiveInterface, | |||||
) -> None: | |||||
date, directory, content1, content2 = prepare(provenance, archive) | |||||
# flatten the directory and check the expected result | |||||
directory_flatten_range(provenance, archive, directory.id[:-1], directory.id) | |||||
assert provenance.storage.directory_get([directory.id]) == { | |||||
directory.id: DirectoryData(date=date, flat=True) | |||||
} | |||||
assert provenance.storage.relation_get_all(RelationType.CNT_IN_DIR) == { | |||||
content1.id: { | |||||
RelationData(dst=directory.id, path=b"a"), | |||||
RelationData(dst=directory.id, path=b"C/a"), | |||||
}, | |||||
content2.id: {RelationData(dst=directory.id, path=b"C/b")}, | |||||
} | |||||
olasd: This could probably move to a common initialization function for the directory flattening tests. |
This could probably move to a common initialization function for the directory flattening tests.