Changeset View
Changeset View
Standalone View
Standalone View
swh/storage/tests/test_storage.py
Show First 20 Lines • Show All 135 Lines • ▼ Show 20 Lines | def test_content_add(self, swh_storage): | ||||
insertion_start_time = datetime.datetime.now(tz=datetime.timezone.utc) | insertion_start_time = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
actual_result = swh_storage.content_add([cont]) | actual_result = swh_storage.content_add([cont]) | ||||
insertion_end_time = datetime.datetime.now(tz=datetime.timezone.utc) | insertion_end_time = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'content:add:bytes': cont['length'], | 'content:add:bytes': cont['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
assert list(swh_storage.content_get([cont['sha1']])) == \ | assert list(swh_storage.content_get([cont['sha1']])) == \ | ||||
[{'sha1': cont['sha1'], 'data': cont['data']}] | [{'sha1': cont['sha1'], 'data': cont['data']}] | ||||
expected_cont = data.cont | expected_cont = data.cont | ||||
del expected_cont['data'] | del expected_cont['data'] | ||||
journal_objects = list(swh_storage.journal_writer.objects) | journal_objects = list(swh_storage.journal_writer.objects) | ||||
Show All 10 Lines | def test_content_add_from_generator(self, swh_storage): | ||||
def _cnt_gen(): | def _cnt_gen(): | ||||
yield data.cont | yield data.cont | ||||
actual_result = swh_storage.content_add(_cnt_gen()) | actual_result = swh_storage.content_add(_cnt_gen()) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'content:add:bytes': data.cont['length'], | 'content:add:bytes': data.cont['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
swh_storage.refresh_stat_counters() | swh_storage.refresh_stat_counters() | ||||
assert swh_storage.stat_counters()['content'] == 1 | assert swh_storage.stat_counters()['content'] == 1 | ||||
def test_content_add_validation(self, swh_storage): | def test_content_add_validation(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
with pytest.raises(ValueError, match='status'): | with pytest.raises(ValueError, match='status'): | ||||
swh_storage.content_add([{**cont, 'status': 'absent'}]) | |||||
with pytest.raises(ValueError, match='status'): | |||||
swh_storage.content_add([{**cont, 'status': 'foobar'}]) | swh_storage.content_add([{**cont, 'status': 'foobar'}]) | ||||
with pytest.raises(ValueError, match="(?i)length"): | with pytest.raises(ValueError, match="(?i)length"): | ||||
swh_storage.content_add([{**cont, 'length': -2}]) | swh_storage.content_add([{**cont, 'length': -2}]) | ||||
with pytest.raises( | |||||
(ValueError, TypeError), | |||||
match="reason"): | |||||
swh_storage.content_add([{**cont, 'reason': 'foobar'}]) | |||||
def test_skipped_content_add_validation(self, swh_storage): | |||||
cont = data.cont.copy() | |||||
del cont['data'] | |||||
with pytest.raises(ValueError, match='status'): | |||||
swh_storage.skipped_content_add([{**cont, 'status': 'visible'}]) | |||||
with pytest.raises((ValueError, psycopg2.IntegrityError), | with pytest.raises((ValueError, psycopg2.IntegrityError), | ||||
match='reason') as cm: | match='reason') as cm: | ||||
swh_storage.content_add([{**cont, 'status': 'absent'}]) | swh_storage.skipped_content_add([{**cont, 'status': 'absent'}]) | ||||
if type(cm.value) == psycopg2.IntegrityError: | if type(cm.value) == psycopg2.IntegrityError: | ||||
assert cm.exception.pgcode == \ | assert cm.exception.pgcode == \ | ||||
psycopg2.errorcodes.NOT_NULL_VIOLATION | psycopg2.errorcodes.NOT_NULL_VIOLATION | ||||
with pytest.raises( | |||||
ValueError, | |||||
match="^Must not provide a reason if content is not absent.$"): | |||||
swh_storage.content_add([{**cont, 'reason': 'foobar'}]) | |||||
def test_content_get_missing(self, swh_storage): | def test_content_get_missing(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
swh_storage.content_add([cont]) | swh_storage.content_add([cont]) | ||||
# Query a single missing content | # Query a single missing content | ||||
results = list(swh_storage.content_get( | results = list(swh_storage.content_get( | ||||
[data.cont2['sha1']])) | [data.cont2['sha1']])) | ||||
Show All 13 Lines | class TestStorage: | ||||
def test_content_add_different_input(self, swh_storage): | def test_content_add_different_input(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
cont2 = data.cont2 | cont2 = data.cont2 | ||||
actual_result = swh_storage.content_add([cont, cont2]) | actual_result = swh_storage.content_add([cont, cont2]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 2, | 'content:add': 2, | ||||
'content:add:bytes': cont['length'] + cont2['length'], | 'content:add:bytes': cont['length'] + cont2['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
def test_content_add_twice(self, swh_storage): | def test_content_add_twice(self, swh_storage): | ||||
actual_result = swh_storage.content_add([data.cont]) | actual_result = swh_storage.content_add([data.cont]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'content:add:bytes': data.cont['length'], | 'content:add:bytes': data.cont['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
assert len(swh_storage.journal_writer.objects) == 1 | assert len(swh_storage.journal_writer.objects) == 1 | ||||
actual_result = swh_storage.content_add([data.cont, data.cont2]) | actual_result = swh_storage.content_add([data.cont, data.cont2]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'content:add:bytes': data.cont2['length'], | 'content:add:bytes': data.cont2['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
assert len(swh_storage.journal_writer.objects) == 2 | assert 2 <= len(swh_storage.journal_writer.objects) <= 3 | ||||
assert len(swh_storage.content_find(data.cont)) == 1 | assert len(swh_storage.content_find(data.cont)) == 1 | ||||
assert len(swh_storage.content_find(data.cont2)) == 1 | assert len(swh_storage.content_find(data.cont2)) == 1 | ||||
def test_content_add_collision(self, swh_storage): | def test_content_add_collision(self, swh_storage): | ||||
cont1 = data.cont | cont1 = data.cont | ||||
# create (corrupted) content with same sha1{,_git} but != sha256 | # create (corrupted) content with same sha1{,_git} but != sha256 | ||||
Show All 26 Lines | class TestStorage: | ||||
def test_content_add_metadata(self, swh_storage): | def test_content_add_metadata(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
del cont['data'] | del cont['data'] | ||||
cont['ctime'] = datetime.datetime.now() | cont['ctime'] = datetime.datetime.now() | ||||
actual_result = swh_storage.content_add_metadata([cont]) | actual_result = swh_storage.content_add_metadata([cont]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
expected_cont = cont.copy() | expected_cont = cont.copy() | ||||
del expected_cont['ctime'] | del expected_cont['ctime'] | ||||
assert swh_storage.content_get_metadata([cont['sha1']]) == { | assert swh_storage.content_get_metadata([cont['sha1']]) == { | ||||
cont['sha1']: [expected_cont] | cont['sha1']: [expected_cont] | ||||
} | } | ||||
assert list(swh_storage.journal_writer.objects) == [('content', cont)] | assert list(swh_storage.journal_writer.objects) == [('content', cont)] | ||||
def test_content_add_metadata_different_input(self, swh_storage): | def test_content_add_metadata_different_input(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
del cont['data'] | del cont['data'] | ||||
cont['ctime'] = datetime.datetime.now() | cont['ctime'] = datetime.datetime.now() | ||||
cont2 = data.cont2 | cont2 = data.cont2 | ||||
del cont2['data'] | del cont2['data'] | ||||
cont2['ctime'] = datetime.datetime.now() | cont2['ctime'] = datetime.datetime.now() | ||||
actual_result = swh_storage.content_add_metadata([cont, cont2]) | actual_result = swh_storage.content_add_metadata([cont, cont2]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 2, | 'content:add': 2, | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
def test_content_add_metadata_collision(self, swh_storage): | def test_content_add_metadata_collision(self, swh_storage): | ||||
cont1 = data.cont | cont1 = data.cont | ||||
del cont1['data'] | del cont1['data'] | ||||
cont1['ctime'] = datetime.datetime.now() | cont1['ctime'] = datetime.datetime.now() | ||||
# create (corrupted) content with same sha1{,_git} but != sha256 | # create (corrupted) content with same sha1{,_git} but != sha256 | ||||
Show All 11 Lines | def test_skipped_content_add(self, swh_storage): | ||||
cont = data.skipped_cont | cont = data.skipped_cont | ||||
cont2 = data.skipped_cont2 | cont2 = data.skipped_cont2 | ||||
cont2['blake2s256'] = None | cont2['blake2s256'] = None | ||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | missing = list(swh_storage.skipped_content_missing([cont, cont2])) | ||||
assert len(missing) == 2 | assert len(missing) == 2 | ||||
actual_result = swh_storage.content_add([cont, cont, cont2]) | actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | ||||
assert actual_result == { | assert 2 <= actual_result.pop('skipped_content:add') <= 3 | ||||
'content:add': 0, | assert actual_result == {} | ||||
'content:add:bytes': 0, | |||||
'skipped_content:add': 2, | |||||
} | |||||
missing = list(swh_storage.skipped_content_missing([cont, cont2])) | missing = list(swh_storage.skipped_content_missing([cont, cont2])) | ||||
assert missing == [] | assert missing == [] | ||||
@pytest.mark.property_based | @pytest.mark.property_based | ||||
@settings(deadline=None) # this test is very slow | @settings(deadline=None) # this test is very slow | ||||
@given(strategies.sets( | @given(strategies.sets( | ||||
▲ Show 20 Lines • Show All 3,259 Lines • ▼ Show 20 Lines | def test_add_arbitrary(self, swh_storage, objects): | ||||
if obj_type == 'origin_visit': | if obj_type == 'origin_visit': | ||||
origin = obj.pop('origin') | origin = obj.pop('origin') | ||||
swh_storage.origin_add_one({'url': origin}) | swh_storage.origin_add_one({'url': origin}) | ||||
if 'visit' in obj: | if 'visit' in obj: | ||||
del obj['visit'] | del obj['visit'] | ||||
swh_storage.origin_visit_add( | swh_storage.origin_visit_add( | ||||
origin, obj['date'], obj['type']) | origin, obj['date'], obj['type']) | ||||
else: | else: | ||||
if obj_type == 'content' and obj['status'] == 'absent': | |||||
obj_type = 'skipped_content' | |||||
method = getattr(swh_storage, obj_type + '_add') | method = getattr(swh_storage, obj_type + '_add') | ||||
try: | try: | ||||
method([obj]) | method([obj]) | ||||
except HashCollision: | except HashCollision: | ||||
pass | pass | ||||
@pytest.mark.db | @pytest.mark.db | ||||
▲ Show 20 Lines • Show All 93 Lines • ▼ Show 20 Lines | class TestPgStorage: | ||||
def test_content_add_db(self, swh_storage): | def test_content_add_db(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
actual_result = swh_storage.content_add([cont]) | actual_result = swh_storage.content_add([cont]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'content:add:bytes': cont['length'], | 'content:add:bytes': cont['length'], | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
if hasattr(swh_storage, 'objstorage'): | if hasattr(swh_storage, 'objstorage'): | ||||
assert cont['sha1'] in swh_storage.objstorage | assert cont['sha1'] in swh_storage.objstorage | ||||
with db_transaction(swh_storage) as (_, cur): | with db_transaction(swh_storage) as (_, cur): | ||||
cur.execute('SELECT sha1, sha1_git, sha256, length, status' | cur.execute('SELECT sha1, sha1_git, sha256, length, status' | ||||
' FROM content WHERE sha1 = %s', | ' FROM content WHERE sha1 = %s', | ||||
Show All 14 Lines | def test_content_add_metadata_db(self, swh_storage): | ||||
cont = data.cont | cont = data.cont | ||||
del cont['data'] | del cont['data'] | ||||
cont['ctime'] = datetime.datetime.now() | cont['ctime'] = datetime.datetime.now() | ||||
actual_result = swh_storage.content_add_metadata([cont]) | actual_result = swh_storage.content_add_metadata([cont]) | ||||
assert actual_result == { | assert actual_result == { | ||||
'content:add': 1, | 'content:add': 1, | ||||
'skipped_content:add': 0 | |||||
} | } | ||||
if hasattr(swh_storage, 'objstorage'): | if hasattr(swh_storage, 'objstorage'): | ||||
assert cont['sha1'] not in swh_storage.objstorage | assert cont['sha1'] not in swh_storage.objstorage | ||||
with db_transaction(swh_storage) as (_, cur): | with db_transaction(swh_storage) as (_, cur): | ||||
cur.execute('SELECT sha1, sha1_git, sha256, length, status' | cur.execute('SELECT sha1, sha1_git, sha256, length, status' | ||||
' FROM content WHERE sha1 = %s', | ' FROM content WHERE sha1 = %s', | ||||
(cont['sha1'],)) | (cont['sha1'],)) | ||||
datum = cur.fetchone() | datum = cur.fetchone() | ||||
assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], | assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], | ||||
cont['length'], 'visible') | cont['length'], 'visible') | ||||
assert list(swh_storage.journal_writer.objects) == [('content', cont)] | assert list(swh_storage.journal_writer.objects) == [('content', cont)] | ||||
def test_skipped_content_add_db(self, swh_storage): | def test_skipped_content_add_db(self, swh_storage): | ||||
cont = data.skipped_cont | cont = data.skipped_cont | ||||
cont2 = data.skipped_cont2 | cont2 = data.skipped_cont2 | ||||
cont2['blake2s256'] = None | cont2['blake2s256'] = None | ||||
actual_result = swh_storage.content_add([cont, cont, cont2]) | actual_result = swh_storage.skipped_content_add([cont, cont, cont2]) | ||||
assert actual_result == { | assert 2 <= actual_result.pop('skipped_content:add') <= 3 | ||||
'content:add': 0, | assert actual_result == {} | ||||
'content:add:bytes': 0, | |||||
'skipped_content:add': 2, | |||||
} | |||||
with db_transaction(swh_storage) as (_, cur): | with db_transaction(swh_storage) as (_, cur): | ||||
cur.execute('SELECT sha1, sha1_git, sha256, blake2s256, ' | cur.execute('SELECT sha1, sha1_git, sha256, blake2s256, ' | ||||
'length, status, reason ' | 'length, status, reason ' | ||||
'FROM skipped_content ORDER BY sha1_git') | 'FROM skipped_content ORDER BY sha1_git') | ||||
dbdata = cur.fetchall() | dbdata = cur.fetchall() | ||||
assert len(dbdata) == 2 | assert len(dbdata) == 2 | ||||
assert dbdata[0] == (cont['sha1'], cont['sha1_git'], cont['sha256'], | assert dbdata[0] == (cont['sha1'], cont['sha1_git'], cont['sha256'], | ||||
cont['blake2s256'], cont['length'], 'absent', | cont['blake2s256'], cont['length'], 'absent', | ||||
'Content too long') | 'Content too long') | ||||
assert dbdata[1] == (cont2['sha1'], cont2['sha1_git'], cont2['sha256'], | assert dbdata[1] == (cont2['sha1'], cont2['sha1_git'], cont2['sha256'], | ||||
cont2['blake2s256'], cont2['length'], 'absent', | cont2['blake2s256'], cont2['length'], 'absent', | ||||
'Content too long') | 'Content too long') |