Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_revision_content_layer.py
# Copyright (C) 2021 The Software Heritage developers | # Copyright (C) 2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple | from typing import Any, Dict, Iterable, Iterator, List, Optional, Set, Tuple | ||||
import pytest | import pytest | ||||
from typing_extensions import TypedDict | from typing_extensions import TypedDict | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Sha1Git | from swh.model.model import Sha1Git | ||||
from swh.provenance.archive import ArchiveInterface | from swh.provenance.archive import ArchiveInterface | ||||
from swh.provenance.directory import directory_add | |||||
from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType | from swh.provenance.interface import EntityType, ProvenanceInterface, RelationType | ||||
from swh.provenance.model import RevisionEntry | from swh.provenance.model import DirectoryEntry, RevisionEntry | ||||
from swh.provenance.revision import revision_add | from swh.provenance.revision import revision_add | ||||
from swh.provenance.tests.conftest import ( | from swh.provenance.tests.conftest import ( | ||||
fill_storage, | fill_storage, | ||||
get_datafile, | get_datafile, | ||||
load_repo_data, | load_repo_data, | ||||
ts2dt, | ts2dt, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | for row in synth_rev[1:]: | ||||
dst=hash_to_bytes(row["sha1"]), | dst=hash_to_bytes(row["sha1"]), | ||||
rel_ts=float(row["ts"]), | rel_ts=float(row["ts"]), | ||||
) | ) | ||||
) | ) | ||||
return rev | return rev | ||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"repo, lower, mindepth", | "repo, lower, mindepth, flatten", | ||||
( | ( | ||||
("cmdbts2", True, 1), | ("cmdbts2", True, 1, True), | ||||
("cmdbts2", False, 1), | ("cmdbts2", True, 1, False), | ||||
("cmdbts2", True, 2), | ("cmdbts2", False, 1, True), | ||||
("cmdbts2", False, 2), | ("cmdbts2", False, 1, False), | ||||
("out-of-order", True, 1), | ("cmdbts2", True, 2, True), | ||||
("cmdbts2", True, 2, False), | |||||
("cmdbts2", False, 2, True), | |||||
("cmdbts2", False, 2, False), | |||||
("out-of-order", True, 1, True), | |||||
("out-of-order", True, 1, False), | |||||
), | ), | ||||
) | ) | ||||
def test_revision_content_result( | def test_revision_content_result( | ||||
provenance: ProvenanceInterface, | provenance: ProvenanceInterface, | ||||
archive: ArchiveInterface, | archive: ArchiveInterface, | ||||
repo: str, | repo: str, | ||||
lower: bool, | lower: bool, | ||||
mindepth: int, | mindepth: int, | ||||
flatten: bool, | |||||
) -> None: | ) -> None: | ||||
# read data/README.md for more details on how these datasets are generated | # read data/README.md for more details on how these datasets are generated | ||||
data = load_repo_data(repo) | data = load_repo_data(repo) | ||||
fill_storage(archive.storage, data) | fill_storage(archive.storage, data) | ||||
syntheticfile = get_datafile( | syntheticfile = get_datafile( | ||||
f"synthetic_{repo}_{'lower' if lower else 'upper'}_{mindepth}.txt" | f"synthetic_{repo}_{'lower' if lower else 'upper'}_{mindepth}.txt" | ||||
) | ) | ||||
Show All 16 Lines | ) -> None: | ||||
for synth_rev in synthetic_revision_content_result(syntheticfile): | for synth_rev in synthetic_revision_content_result(syntheticfile): | ||||
revision = revisions[synth_rev["sha1"]] | revision = revisions[synth_rev["sha1"]] | ||||
entry = RevisionEntry( | entry = RevisionEntry( | ||||
id=revision["id"], | id=revision["id"], | ||||
date=ts2dt(revision["date"]), | date=ts2dt(revision["date"]), | ||||
root=revision["directory"], | root=revision["directory"], | ||||
) | ) | ||||
if flatten: | |||||
revision_add(provenance, archive, [entry], lower=lower, mindepth=mindepth) | revision_add(provenance, archive, [entry], lower=lower, mindepth=mindepth) | ||||
else: | |||||
prev_directories = provenance.storage.entity_get_all(EntityType.DIRECTORY) | |||||
revision_add( | |||||
provenance, | |||||
archive, | |||||
[entry], | |||||
lower=lower, | |||||
mindepth=mindepth, | |||||
flatten=False, | |||||
) | |||||
olasd: Before doing directory_add, could you check that the directories are indeed not flattened yet? | |||||
Done Inline ActionsSure, I'll update the diff in a minute aeviso: Sure, I'll update the diff in a minute | |||||
directories = [ | |||||
DirectoryEntry(id=sha1) | |||||
for sha1 in provenance.storage.entity_get_all( | |||||
EntityType.DIRECTORY | |||||
).difference(prev_directories) | |||||
] | |||||
for directory in directories: | |||||
assert not provenance.directory_already_flattenned(directory) | |||||
directory_add(provenance, archive, directories) | |||||
# each "entry" in the synth file is one new revision | # each "entry" in the synth file is one new revision | ||||
rows["revision"].add(synth_rev["sha1"]) | rows["revision"].add(synth_rev["sha1"]) | ||||
assert rows["revision"] == provenance.storage.entity_get_all( | assert rows["revision"] == provenance.storage.entity_get_all( | ||||
EntityType.REVISION | EntityType.REVISION | ||||
), synth_rev["msg"] | ), synth_rev["msg"] | ||||
# check the timestamp of the revision | # check the timestamp of the revision | ||||
rev_ts = synth_rev["date"] | rev_ts = synth_rev["date"] | ||||
▲ Show 20 Lines • Show All 247 Lines • Show Last 20 Lines |
Before doing directory_add, could you check that the directories are indeed not flattened yet?