provenance = <swh.provenance.postgresql.provenancedb_with_path.ProvenanceWithPathDB object at 0x7fde0b5a1198>
swh_storage = <swh.storage.postgresql.storage.Storage object at 0x7fde0b5510f0>
archive = <swh.provenance.postgresql.archive.ArchivePostgreSQL object at 0x7fde0b5c80f0>
repo = 'out-of-order', lower = True, mindepth = 1
@pytest.mark.parametrize(
"repo, lower, mindepth",
(
("cmdbts2", True, 1),
("cmdbts2", False, 1),
("cmdbts2", True, 2),
("cmdbts2", False, 2),
("out-of-order", True, 1),
),
)
def test_provenance_heuristics(provenance, swh_storage, archive, repo, lower, mindepth):
# read data/README.md for more details on how these datasets are generated
data = load_repo_data(repo)
fill_storage(swh_storage, data)
syntheticfile = get_datafile(
f"synthetic_{repo}_{'lower' if lower else 'upper'}_{mindepth}.txt"
)
revisions = {rev["id"]: rev for rev in data["revision"]}
rows = {
"content": set(),
"content_in_dir": set(),
"content_early_in_rev": set(),
"directory": set(),
"directory_in_rev": set(),
"location": set(),
"revision": set(),
}
for synth_rev in synthetic_result(syntheticfile):
revision = revisions[synth_rev["sha1"]]
entry = RevisionEntry(
id=revision["id"],
date=ts2dt(revision["date"]),
root=revision["directory"],
)
revision_add(provenance, archive, [entry], lower=lower, mindepth=mindepth)
# each "entry" in the synth file is one new revision
rows["revision"].add(synth_rev["sha1"].hex())
assert rows["revision"] == sha1s(provenance.cursor, "revision"), synth_rev[
"msg"
]
# check the timestamp of the revision
rev_ts = synth_rev["date"]
assert get_timestamp(
provenance.cursor, "revision", synth_rev["sha1"].hex()
) == [rev_ts], synth_rev["msg"]
# this revision might have added new content objects
rows["content"] |= set(x["dst"].hex() for x in synth_rev["R_C"])
rows["content"] |= set(x["dst"].hex() for x in synth_rev["D_C"])
assert rows["content"] == sha1s(provenance.cursor, "content"), synth_rev["msg"]
# check for R-C (direct) entries
# these are added directly in the content_early_in_rev table
rows["content_early_in_rev"] |= set(
(x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_C"]
)
assert rows["content_early_in_rev"] == relations(
provenance.cursor, "content", "revision"
), synth_rev["msg"]
# check timestamps
for rc in synth_rev["R_C"]:
assert get_timestamp(provenance.cursor, "content", rc["dst"]) == [
rev_ts + rc["rel_ts"]
], synth_rev["msg"]
# check directories
# each directory stored in the provenance index is an entry
# in the "directory" table...
rows["directory"] |= set(x["dst"].hex() for x in synth_rev["R_D"])
assert rows["directory"] == sha1s(provenance.cursor, "directory"), synth_rev[
"msg"
]
# ... + a number of rows in the "directory_in_rev" table...
# check for R-D entries
rows["directory_in_rev"] |= set(
(x["dst"].hex(), x["src"].hex(), x["path"]) for x in synth_rev["R_D"]
)
assert rows["directory_in_rev"] == relations(
provenance.cursor, "directory", "revision"
), synth_rev["msg"]
# check timestamps
for rd in synth_rev["R_D"]:
> assert get_timestamp(provenance.cursor, "directory", rd["dst"]) == [
rev_ts + rd["rel_ts"]
], synth_rev["msg"]
E AssertionError: R06
E assert [1000000010.0] == [1000000005.0]
E At index 0 diff: 1000000010.0 != 1000000005.0
E Full diff:
E - [1000000005.0]
E ? -
E + [1000000010.0]
E ? +
.tox/py3/lib/python3.7/site-packages/swh/provenance/tests/test_provenance_heuristics.py:163: AssertionError
TEST RESULT
TEST RESULT
- Run At
- Jun 3 2021, 3:34 PM