Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/tests/test_provenance_db.py
Show All 25 Lines | |||||
def test_provenance_origin_add(provenance, swh_storage_with_objects): | def test_provenance_origin_add(provenance, swh_storage_with_objects): | ||||
"""Test the ProvenanceDB.origin_add() method""" | """Test the ProvenanceDB.origin_add() method""" | ||||
for origin in TEST_OBJECTS["origin"]: | for origin in TEST_OBJECTS["origin"]: | ||||
entry = OriginEntry(url=origin.url, revisions=[]) | entry = OriginEntry(url=origin.url, revisions=[]) | ||||
origin_add(ArchiveStorage(swh_storage_with_objects), provenance, entry) | origin_add(ArchiveStorage(swh_storage_with_objects), provenance, entry) | ||||
# TODO: check some facts here | # TODO: check some facts here | ||||
def test_provenance_add_revision(provenance, storage_and_CMDBTS, archive_pg): | def test_provenance_add_revision(provenance, storage_and_CMDBTS, archive): | ||||
storage, data = storage_and_CMDBTS | storage, data = storage_and_CMDBTS | ||||
for i in range(2): | for i in range(2): | ||||
# do it twice, there should be no change in results | # do it twice, there should be no change in results | ||||
for revision in data["revision"]: | for revision in data["revision"]: | ||||
entry = RevisionEntry( | entry = RevisionEntry( | ||||
id=revision["id"], | id=revision["id"], | ||||
date=ts2dt(revision["date"]), | date=ts2dt(revision["date"]), | ||||
root=revision["directory"], | root=revision["directory"], | ||||
) | ) | ||||
revision_add(provenance, archive_pg, entry) | revision_add(provenance, archive, entry) | ||||
# there should be as many entries in 'revision' as revisions from the | # there should be as many entries in 'revision' as revisions from the | ||||
# test dataset | # test dataset | ||||
provenance.cursor.execute("SELECT count(*) FROM revision") | provenance.cursor.execute("SELECT count(*) FROM revision") | ||||
assert provenance.cursor.fetchone()[0] == len(data["revision"]) | assert provenance.cursor.fetchone()[0] == len(data["revision"]) | ||||
# there should be no 'location' for the empty path | # there should be no 'location' for the empty path | ||||
provenance.cursor.execute("SELECT count(*) FROM location WHERE path=''") | provenance.cursor.execute("SELECT count(*) FROM location WHERE path=''") | ||||
Show All 19 Lines | for i in range(2): | ||||
provenance.cursor.execute("SELECT count(*) FROM content") | provenance.cursor.execute("SELECT count(*) FROM content") | ||||
assert provenance.cursor.fetchone()[0] == len(data["content"]) | assert provenance.cursor.fetchone()[0] == len(data["content"]) | ||||
provenance.cursor.execute("SELECT count(*) FROM content_in_dir") | provenance.cursor.execute("SELECT count(*) FROM content_in_dir") | ||||
assert provenance.cursor.fetchone()[0] == 16 | assert provenance.cursor.fetchone()[0] == 16 | ||||
provenance.cursor.execute("SELECT count(*) FROM content_early_in_rev") | provenance.cursor.execute("SELECT count(*) FROM content_early_in_rev") | ||||
assert provenance.cursor.fetchone()[0] == 13 | assert provenance.cursor.fetchone()[0] == 13 | ||||
def test_provenance_content_find_first(provenance, storage_and_CMDBTS, archive_pg): | def test_provenance_content_find_first(provenance, storage_and_CMDBTS, archive): | ||||
storage, data = storage_and_CMDBTS | storage, data = storage_and_CMDBTS | ||||
for revision in data["revision"]: | for revision in data["revision"]: | ||||
entry = RevisionEntry( | entry = RevisionEntry( | ||||
id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], | id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], | ||||
) | ) | ||||
revision_add(provenance, archive_pg, entry) | revision_add(provenance, archive, entry) | ||||
first_expected_content = [ | first_expected_content = [ | ||||
{ | { | ||||
"content": "43f3c871310a8e524004e91f033e7fb3b0bc8475", | "content": "43f3c871310a8e524004e91f033e7fb3b0bc8475", | ||||
"rev": "35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5", | "rev": "35ccb8dd1b53d2d8a5c1375eb513ef2beaa79ae5", | ||||
"date": 1609757158, | "date": 1609757158, | ||||
"path": "README.md", | "path": "README.md", | ||||
}, | }, | ||||
▲ Show 20 Lines • Show All 67 Lines • ▼ Show 20 Lines | |||||
@pytest.mark.parametrize( | @pytest.mark.parametrize( | ||||
"syntheticfile, args", | "syntheticfile, args", | ||||
( | ( | ||||
("synthetic_noroot_lower.txt", {"lower": True, "mindepth": 1}), | ("synthetic_noroot_lower.txt", {"lower": True, "mindepth": 1}), | ||||
("synthetic_noroot_upper.txt", {"lower": False, "mindepth": 1}), | ("synthetic_noroot_upper.txt", {"lower": False, "mindepth": 1}), | ||||
), | ), | ||||
) | ) | ||||
def test_provenance_db(provenance, storage_and_CMDBTS, archive_pg, syntheticfile, args): | def test_provenance_db(provenance, storage_and_CMDBTS, archive, syntheticfile, args): | ||||
storage, data = storage_and_CMDBTS | storage, data = storage_and_CMDBTS | ||||
revisions = {rev["id"]: rev for rev in data["revision"]} | revisions = {rev["id"]: rev for rev in data["revision"]} | ||||
rows = { | rows = { | ||||
"content": set(), | "content": set(), | ||||
"content_in_dir": set(), | "content_in_dir": set(), | ||||
"content_early_in_rev": set(), | "content_early_in_rev": set(), | ||||
"directory": set(), | "directory": set(), | ||||
"directory_in_rev": set(), | "directory_in_rev": set(), | ||||
"location": set(), | "location": set(), | ||||
"revision": set(), | "revision": set(), | ||||
} | } | ||||
def db_count(table): | def db_count(table): | ||||
provenance.cursor.execute(f"SELECT count(*) FROM {table}") | provenance.cursor.execute(f"SELECT count(*) FROM {table}") | ||||
return provenance.cursor.fetchone()[0] | return provenance.cursor.fetchone()[0] | ||||
for synth_rev in synthetic_result(syntheticfile): | for synth_rev in synthetic_result(syntheticfile): | ||||
revision = revisions[synth_rev["sha1"]] | revision = revisions[synth_rev["sha1"]] | ||||
entry = RevisionEntry( | entry = RevisionEntry( | ||||
id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], | id=revision["id"], date=ts2dt(revision["date"]), root=revision["directory"], | ||||
) | ) | ||||
revision_add(provenance, archive_pg, entry, **args) | revision_add(provenance, archive, entry, **args) | ||||
# import pdb; pdb.set_trace() | # import pdb; pdb.set_trace() | ||||
# each "entry" in the synth file is one new revision | # each "entry" in the synth file is one new revision | ||||
rows["revision"].add(synth_rev["sha1"]) | rows["revision"].add(synth_rev["sha1"]) | ||||
assert len(rows["revision"]) == db_count("revision") | assert len(rows["revision"]) == db_count("revision") | ||||
# this revision might have added new content objects | # this revision might have added new content objects | ||||
rows["content"] |= set(x["dst"] for x in synth_rev["R_C"]) | rows["content"] |= set(x["dst"] for x in synth_rev["R_C"]) | ||||
Show All 30 Lines |