diff --git a/swh/storage/tests/test_buffer.py b/swh/storage/tests/test_buffer.py --- a/swh/storage/tests/test_buffer.py +++ b/swh/storage/tests/test_buffer.py @@ -9,55 +9,55 @@ def get_storage_with_buffer_config(**buffer_config): storage_config = { "cls": "pipeline", - "steps": [ - {"cls": "validate"}, - {"cls": "buffer", **buffer_config}, - {"cls": "memory"}, - ], + "steps": [{"cls": "buffer", **buffer_config}, {"cls": "memory"},], } return get_storage(**storage_config) -def test_buffering_proxy_storage_content_threshold_not_hit(sample_data): - contents = sample_data["content"] +def test_buffering_proxy_storage_content_threshold_not_hit(sample_data_model): + contents = sample_data_model["content"] + contents_dict = [c.to_dict() for c in contents] + storage = get_storage_with_buffer_config(min_batch_size={"content": 10,}) s = storage.content_add([contents[0], contents[1]]) assert s == {} # contents have not been written to storage - missing_contents = storage.content_missing([contents[0], contents[1]]) - assert set(missing_contents) == set([contents[0]["sha1"], contents[1]["sha1"]]) + missing_contents = storage.content_missing(contents_dict) + assert set(missing_contents) == set([contents[0].sha1, contents[1].sha1]) s = storage.flush() assert s == { "content:add": 1 + 1, - "content:add:bytes": contents[0]["length"] + contents[1]["length"], + "content:add:bytes": contents[0].length + contents[1].length, } - missing_contents = storage.content_missing([contents[0], contents[1]]) + missing_contents = storage.content_missing(contents_dict) assert list(missing_contents) == [] -def test_buffering_proxy_storage_content_threshold_nb_hit(sample_data): - contents = sample_data["content"] +def test_buffering_proxy_storage_content_threshold_nb_hit(sample_data_model): + content = sample_data_model["content"][0] + content_dict = content.to_dict() + storage = get_storage_with_buffer_config(min_batch_size={"content": 1,}) - s = storage.content_add([contents[0]]) + s = storage.content_add([content]) assert s == { "content:add": 1, - "content:add:bytes": contents[0]["length"], + "content:add:bytes": content.length, } - missing_contents = storage.content_missing([contents[0]]) + missing_contents = storage.content_missing([content_dict]) assert list(missing_contents) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_content_deduplicate(sample_data): - contents = sample_data["content"] +def test_buffering_proxy_storage_content_deduplicate(sample_data_model): + contents = sample_data_model["content"] storage = get_storage_with_buffer_config(min_batch_size={"content": 2,}) s = storage.content_add([contents[0], contents[0]]) @@ -69,71 +69,72 @@ s = storage.content_add([contents[1]]) assert s == { "content:add": 1 + 1, - "content:add:bytes": contents[0]["length"] + contents[1]["length"], + "content:add:bytes": contents[0].length + contents[1].length, } - missing_contents = storage.content_missing([contents[0], contents[1]]) + missing_contents = storage.content_missing([c.to_dict() for c in contents]) assert list(missing_contents) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data): - contents = sample_data["content"] +def test_buffering_proxy_storage_content_threshold_bytes_hit(sample_data_model): + contents = sample_data_model["content"] content_bytes_min_batch_size = 2 storage = get_storage_with_buffer_config( min_batch_size={"content": 10, "content_bytes": content_bytes_min_batch_size,} ) - assert contents[0]["length"] > content_bytes_min_batch_size + assert contents[0].length > content_bytes_min_batch_size s = storage.content_add([contents[0]]) assert s == { "content:add": 1, - "content:add:bytes": contents[0]["length"], + "content:add:bytes": contents[0].length, } - missing_contents = storage.content_missing([contents[0]]) + missing_contents = storage.content_missing([contents[0].to_dict()]) assert list(missing_contents) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_skipped_content_threshold_not_hit(sample_data): - contents = sample_data["skipped_content"] +def test_buffering_proxy_storage_skipped_content_threshold_not_hit(sample_data_model): + contents = sample_data_model["skipped_content"] + contents_dict = [c.to_dict() for c in contents] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 10,}) s = storage.skipped_content_add([contents[0], contents[1]]) assert s == {} # contents have not been written to storage - missing_contents = storage.skipped_content_missing([contents[0], contents[1]]) - assert {c["sha1"] for c in missing_contents} == {c["sha1"] for c in contents} + missing_contents = storage.skipped_content_missing(contents_dict) + assert {c["sha1"] for c in missing_contents} == {c.sha1 for c in contents} s = storage.flush() assert s == {"skipped_content:add": 1 + 1} - missing_contents = storage.skipped_content_missing([contents[0], contents[1]]) + missing_contents = storage.skipped_content_missing(contents_dict) assert list(missing_contents) == [] -def test_buffering_proxy_storage_skipped_content_threshold_nb_hit(sample_data): - contents = sample_data["skipped_content"] +def test_buffering_proxy_storage_skipped_content_threshold_nb_hit(sample_data_model): + contents = sample_data_model["skipped_content"] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 1,}) s = storage.skipped_content_add([contents[0]]) assert s == {"skipped_content:add": 1} - missing_contents = storage.skipped_content_missing([contents[0]]) + missing_contents = storage.skipped_content_missing([contents[0].to_dict()]) assert list(missing_contents) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_skipped_content_deduplicate(sample_data): - contents = sample_data["skipped_content"] +def test_buffering_proxy_storage_skipped_content_deduplicate(sample_data_model): + contents = sample_data_model["skipped_content"][:2] storage = get_storage_with_buffer_config(min_batch_size={"skipped_content": 2,}) s = storage.skipped_content_add([contents[0], contents[0]]) @@ -147,20 +148,20 @@ "skipped_content:add": 1 + 1, } - missing_contents = storage.skipped_content_missing([contents[0], contents[1]]) + missing_contents = storage.skipped_content_missing([c.to_dict() for c in contents]) assert list(missing_contents) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_directory_threshold_not_hit(sample_data): - directories = sample_data["directory"] +def test_buffering_proxy_storage_directory_threshold_not_hit(sample_data_model): + directories = sample_data_model["directory"] storage = get_storage_with_buffer_config(min_batch_size={"directory": 10,}) s = storage.directory_add([directories[0]]) assert s == {} - directory_id = directories[0]["id"] + directory_id = directories[0].id missing_directories = storage.directory_missing([directory_id]) assert list(missing_directories) == [directory_id] @@ -173,23 +174,23 @@ assert list(missing_directories) == [] -def test_buffering_proxy_storage_directory_threshold_hit(sample_data): - directories = sample_data["directory"] +def test_buffering_proxy_storage_directory_threshold_hit(sample_data_model): + directories = sample_data_model["directory"] storage = get_storage_with_buffer_config(min_batch_size={"directory": 1,}) s = storage.directory_add([directories[0]]) assert s == { "directory:add": 1, } - missing_directories = storage.directory_missing([directories[0]["id"]]) + missing_directories = storage.directory_missing([directories[0].id]) assert list(missing_directories) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_directory_deduplicate(sample_data): - directories = sample_data["directory"] +def test_buffering_proxy_storage_directory_deduplicate(sample_data_model): + directories = sample_data_model["directory"][:2] storage = get_storage_with_buffer_config(min_batch_size={"directory": 2,}) s = storage.directory_add([directories[0], directories[0]]) @@ -203,22 +204,20 @@ "directory:add": 1 + 1, } - missing_directories = storage.directory_missing( - [directories[0]["id"], directories[1]["id"]] - ) + missing_directories = storage.directory_missing([d.id for d in directories]) assert list(missing_directories) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_revision_threshold_not_hit(sample_data): - revisions = sample_data["revision"] +def test_buffering_proxy_storage_revision_threshold_not_hit(sample_data_model): + revisions = sample_data_model["revision"] storage = get_storage_with_buffer_config(min_batch_size={"revision": 10,}) s = storage.revision_add([revisions[0]]) assert s == {} - revision_id = revisions[0]["id"] + revision_id = revisions[0].id missing_revisions = storage.revision_missing([revision_id]) assert list(missing_revisions) == [revision_id] @@ -231,23 +230,23 @@ assert list(missing_revisions) == [] -def test_buffering_proxy_storage_revision_threshold_hit(sample_data): - revisions = sample_data["revision"] +def test_buffering_proxy_storage_revision_threshold_hit(sample_data_model): + revisions = sample_data_model["revision"] storage = get_storage_with_buffer_config(min_batch_size={"revision": 1,}) s = storage.revision_add([revisions[0]]) assert s == { "revision:add": 1, } - missing_revisions = storage.revision_missing([revisions[0]["id"]]) + missing_revisions = storage.revision_missing([revisions[0].id]) assert list(missing_revisions) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_revision_deduplicate(sample_data): - revisions = sample_data["revision"] +def test_buffering_proxy_storage_revision_deduplicate(sample_data_model): + revisions = sample_data_model["revision"][:2] storage = get_storage_with_buffer_config(min_batch_size={"revision": 2,}) s = storage.revision_add([revisions[0], revisions[0]]) @@ -261,17 +260,15 @@ "revision:add": 1 + 1, } - missing_revisions = storage.revision_missing( - [revisions[0]["id"], revisions[1]["id"]] - ) + missing_revisions = storage.revision_missing([r.id for r in revisions]) assert list(missing_revisions) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_release_threshold_not_hit(sample_data): - releases = sample_data["release"] +def test_buffering_proxy_storage_release_threshold_not_hit(sample_data_model): + releases = sample_data_model["release"] threshold = 10 assert len(releases) < threshold @@ -281,7 +278,7 @@ s = storage.release_add(releases) assert s == {} - release_ids = [r["id"] for r in releases] + release_ids = [r.id for r in releases] missing_releases = storage.release_missing(release_ids) assert list(missing_releases) == release_ids @@ -294,8 +291,8 @@ assert list(missing_releases) == [] -def test_buffering_proxy_storage_release_threshold_hit(sample_data): - releases = sample_data["release"] +def test_buffering_proxy_storage_release_threshold_hit(sample_data_model): + releases = sample_data_model["release"] threshold = 2 assert len(releases) > threshold @@ -308,7 +305,7 @@ "release:add": len(releases), } - release_ids = [r["id"] for r in releases] + release_ids = [r.id for r in releases] missing_releases = storage.release_missing(release_ids) assert list(missing_releases) == [] @@ -316,8 +313,8 @@ assert s == {} -def test_buffering_proxy_storage_release_deduplicate(sample_data): - releases = sample_data["release"] +def test_buffering_proxy_storage_release_deduplicate(sample_data_model): + releases = sample_data_model["release"][:2] storage = get_storage_with_buffer_config(min_batch_size={"release": 2,}) s = storage.release_add([releases[0], releases[0]]) @@ -331,27 +328,27 @@ "release:add": 1 + 1, } - missing_releases = storage.release_missing([releases[0]["id"], releases[1]["id"]]) + missing_releases = storage.release_missing([r.id for r in releases]) assert list(missing_releases) == [] s = storage.flush() assert s == {} -def test_buffering_proxy_storage_clear(sample_data): +def test_buffering_proxy_storage_clear(sample_data_model): """Clear operation on buffer """ threshold = 10 - contents = sample_data["content"] + contents = sample_data_model["content"] assert 0 < len(contents) < threshold - skipped_contents = sample_data["skipped_content"] + skipped_contents = sample_data_model["skipped_content"] assert 0 < len(skipped_contents) < threshold - directories = sample_data["directory"] + directories = sample_data_model["directory"] assert 0 < len(directories) < threshold - revisions = sample_data["revision"] + revisions = sample_data_model["revision"] assert 0 < len(revisions) < threshold - releases = sample_data["release"] + releases = sample_data_model["release"] assert 0 < len(releases) < threshold storage = get_storage_with_buffer_config(