diff --git a/swh/storage/tests/test_storage.py b/swh/storage/tests/test_storage.py --- a/swh/storage/tests/test_storage.py +++ b/swh/storage/tests/test_storage.py @@ -26,7 +26,8 @@ from swh.model import from_disk, identifiers from swh.model.hashutil import hash_to_bytes -from swh.model.model import Content, OriginVisit +from swh.model.model import ( + Content, Directory, Origin, OriginVisit, Release, Revision, Snapshot) from swh.model.hypothesis_strategies import objects from swh.model.hashutil import hash_to_hex from swh.storage import get_storage @@ -88,6 +89,15 @@ assert actual_list == expected_list, k +class LazyContent(Content): + + def with_data(self): + return Content.from_dict({ + **self.to_dict(), + 'data': data.cont['data'] + }) + + class TestStorage: """Main class for Storage testing. @@ -153,12 +163,16 @@ expected_cont = data.cont del expected_cont['data'] - journal_objects = list(swh_storage.journal_writer.journal.objects) - for (obj_type, obj) in journal_objects: - assert insertion_start_time <= obj['ctime'] - assert obj['ctime'] <= insertion_end_time - del obj['ctime'] - assert journal_objects == [('content', expected_cont)] + contents = [ + obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects + if obj_type == 'content'] + assert len(contents) == 1 + for obj in contents: + assert insertion_start_time <= obj.ctime + assert obj.ctime <= insertion_end_time + obj_d = obj.to_dict() + del obj_d['ctime'] + assert obj_d == expected_cont swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['content'] == 1 @@ -178,20 +192,9 @@ assert swh_storage.stat_counters()['content'] == 1 def test_content_add_from_lazy_content(self, swh_storage): - called = False - cont = data.cont - - class LazyContent(Content): - def with_data(self): - nonlocal called - called = True - return Content.from_dict({ - **self.to_dict(), - 'data': cont['data'] - }) lazy_content = LazyContent.from_dict({ - **cont, + **data.cont, 'data': b'nope', }) @@ -204,22 +207,26 @@ assert actual_result == { 'content:add': 1, - 'content:add:bytes': cont['length'], + 'content:add:bytes': data.cont['length'], } - assert called - - assert list(swh_storage.content_get([cont['sha1']])) == \ - [{'sha1': cont['sha1'], 'data': cont['data']}] + # the fact that we retrieve the content object from the storage with + # the correct 'data' field ensures it has been 'called' + assert list(swh_storage.content_get([data.cont['sha1']])) == \ + [{'sha1': data.cont['sha1'], 'data': data.cont['data']}] expected_cont = data.cont del expected_cont['data'] - journal_objects = list(swh_storage.journal_writer.journal.objects) - for (obj_type, obj) in journal_objects: - assert insertion_start_time <= obj['ctime'] - assert obj['ctime'] <= insertion_end_time - del obj['ctime'] - assert journal_objects == [('content', expected_cont)] + contents = [ + obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects + if obj_type == 'content'] + assert len(contents) == 1 + for obj in contents: + assert insertion_start_time <= obj.ctime + assert obj.ctime <= insertion_end_time + obj_d = obj.to_dict() + del obj_d['ctime'] + assert obj_d == expected_cont swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['content'] == 1 @@ -282,21 +289,21 @@ assert actual_result == { 'content:add': 2, 'content:add:bytes': cont['length'] + cont2['length'], - } + } def test_content_add_twice(self, swh_storage): actual_result = swh_storage.content_add([data.cont]) assert actual_result == { 'content:add': 1, 'content:add:bytes': data.cont['length'], - } + } assert len(swh_storage.journal_writer.journal.objects) == 1 actual_result = swh_storage.content_add([data.cont, data.cont2]) assert actual_result == { 'content:add': 1, 'content:add:bytes': data.cont2['length'], - } + } assert 2 <= len(swh_storage.journal_writer.journal.objects) <= 3 assert len(swh_storage.content_find(data.cont)) == 1 @@ -355,16 +362,21 @@ actual_result = swh_storage.content_add_metadata([cont]) assert actual_result == { 'content:add': 1, - } + } expected_cont = cont.copy() del expected_cont['ctime'] assert swh_storage.content_get_metadata([cont['sha1']]) == { cont['sha1']: [expected_cont] } - - assert list(swh_storage.journal_writer.journal.objects) == [ - ('content', cont)] + contents = [ + obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects + if obj_type == 'content'] + assert len(contents) == 1 + for obj in contents: + obj_d = obj.to_dict() + del obj_d['ctime'] + assert obj_d == expected_cont def test_content_add_metadata_different_input(self, swh_storage): cont = data.cont @@ -377,7 +389,7 @@ actual_result = swh_storage.content_add_metadata([cont, cont2]) assert actual_result == { 'content:add': 2, - } + } def test_content_add_metadata_collision(self, swh_storage): cont1 = data.cont @@ -601,7 +613,7 @@ for i in range(nb_partitions): actual_result = swh_storage.content_get_partition( - i, nb_partitions, limit=len(swh_contents)+1) + i, nb_partitions, limit=len(swh_contents) + 1) for cont in actual_result['contents']: seen_sha1s.append(cont['sha1']) @@ -684,7 +696,7 @@ assert actual_result == {'directory:add': 1} assert list(swh_storage.journal_writer.journal.objects) == \ - [('directory', data.dir)] + [('directory', Directory.from_dict(data.dir))] actual_data = list(swh_storage.directory_ls(data.dir['id'])) expected_data = list(transform_entries(data.dir)) @@ -706,7 +718,7 @@ assert actual_result == {'directory:add': 1} assert list(swh_storage.journal_writer.journal.objects) == \ - [('directory', data.dir)] + [('directory', Directory.from_dict(data.dir))] swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['directory'] == 1 @@ -732,13 +744,13 @@ assert actual_result == {'directory:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('directory', data.dir)] + == [('directory', Directory.from_dict(data.dir))] actual_result = swh_storage.directory_add([data.dir]) assert actual_result == {'directory:add': 0} assert list(swh_storage.journal_writer.journal.objects) \ - == [('directory', data.dir)] + == [('directory', Directory.from_dict(data.dir))] def test_directory_get_recursive(self, swh_storage): init_missing = list(swh_storage.directory_missing([data.dir['id']])) @@ -749,9 +761,9 @@ assert actual_result == {'directory:add': 3} assert list(swh_storage.journal_writer.journal.objects) == [ - ('directory', data.dir), - ('directory', data.dir2), - ('directory', data.dir3)] + ('directory', Directory.from_dict(data.dir)), + ('directory', Directory.from_dict(data.dir2)), + ('directory', Directory.from_dict(data.dir3))] # List directory containing a file and an unknown subdirectory actual_data = list(swh_storage.directory_ls( @@ -786,9 +798,9 @@ assert actual_result == {'directory:add': 3} assert list(swh_storage.journal_writer.journal.objects) == [ - ('directory', data.dir), - ('directory', data.dir2), - ('directory', data.dir3)] + ('directory', Directory.from_dict(data.dir)), + ('directory', Directory.from_dict(data.dir2)), + ('directory', Directory.from_dict(data.dir3))] # List directory containing a file and an unknown subdirectory actual_data = list(swh_storage.directory_ls(data.dir['id'])) @@ -898,7 +910,7 @@ assert list(end_missing) == [] assert list(swh_storage.journal_writer.journal.objects) \ - == [('revision', data.revision)] + == [('revision', Revision.from_dict(data.revision))] # already there so nothing added actual_result = swh_storage.revision_add([data.revision]) @@ -953,15 +965,15 @@ assert actual_result == {'revision:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('revision', data.revision)] + == [('revision', Revision.from_dict(data.revision))] actual_result = swh_storage.revision_add( [data.revision, data.revision2]) assert actual_result == {'revision:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('revision', data.revision), - ('revision', data.revision2)] + == [('revision', Revision.from_dict(data.revision)), + ('revision', Revision.from_dict(data.revision2))] def test_revision_add_name_clash(self, swh_storage): revision1 = data.revision @@ -1020,8 +1032,8 @@ assert actual_results[1] == normalize_entity(data.revision3) assert list(swh_storage.journal_writer.journal.objects) == [ - ('revision', data.revision3), - ('revision', data.revision4)] + ('revision', Revision.from_dict(data.revision3)), + ('revision', Revision.from_dict(data.revision4))] def test_revision_log_with_limit(self, swh_storage): # given @@ -1114,8 +1126,8 @@ assert list(end_missing) == [] assert list(swh_storage.journal_writer.journal.objects) == [ - ('release', data.release), - ('release', data.release2)] + ('release', Release.from_dict(data.release)), + ('release', Release.from_dict(data.release2))] # already present so nothing added actual_result = swh_storage.release_add([data.release, data.release2]) @@ -1133,8 +1145,8 @@ assert actual_result == {'release:add': 2} assert list(swh_storage.journal_writer.journal.objects) == [ - ('release', data.release), - ('release', data.release2)] + ('release', Release.from_dict(data.release)), + ('release', Release.from_dict(data.release2))] swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['release'] == 2 @@ -1152,7 +1164,7 @@ assert list(end_missing) == [] assert list(swh_storage.journal_writer.journal.objects) \ - == [('release', release)] + == [('release', Release.from_dict(release))] def test_release_add_validation(self, swh_storage): rel = copy.deepcopy(data.release) @@ -1186,14 +1198,14 @@ assert actual_result == {'release:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('release', data.release)] + == [('release', Release.from_dict(data.release))] actual_result = swh_storage.release_add([data.release, data.release2]) assert actual_result == {'release:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('release', data.release), - ('release', data.release2)] + == [('release', Release.from_dict(data.release)), + ('release', Release.from_dict(data.release2))] def test_release_add_name_clash(self, swh_storage): release1 = data.release.copy() @@ -1292,8 +1304,8 @@ del actual_origin2['id'] assert list(swh_storage.journal_writer.journal.objects) \ - == [('origin', actual_origin), - ('origin', actual_origin2)] + == [('origin', Origin.from_dict(actual_origin)), + ('origin', Origin.from_dict(actual_origin2))] swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['origin'] == 2 @@ -1320,8 +1332,8 @@ del actual_origin2['id'] assert list(swh_storage.journal_writer.journal.objects) \ - == [('origin', actual_origin), - ('origin', actual_origin2)] + == [('origin', Origin.from_dict(actual_origin)), + ('origin', Origin.from_dict(actual_origin2))] swh_storage.refresh_stat_counters() assert swh_storage.stat_counters()['origin'] == 2 @@ -1329,13 +1341,13 @@ def test_origin_add_twice(self, swh_storage): add1 = swh_storage.origin_add([data.origin, data.origin2]) assert list(swh_storage.journal_writer.journal.objects) \ - == [('origin', data.origin), - ('origin', data.origin2)] + == [('origin', Origin.from_dict(data.origin)), + ('origin', Origin.from_dict(data.origin2))] add2 = swh_storage.origin_add([data.origin, data.origin2]) assert list(swh_storage.journal_writer.journal.objects) \ - == [('origin', data.origin), - ('origin', data.origin2)] + == [('origin', Origin.from_dict(data.origin)), + ('origin', Origin.from_dict(data.origin2))] assert add1 == add2 @@ -1588,8 +1600,9 @@ assert expected_origin_visit in actual_origin_visits objects = list(swh_storage.journal_writer.journal.objects) - assert ('origin', data.origin2) in objects - assert ('origin_visit', expected_origin_visit) in objects + assert ('origin', Origin.from_dict(data.origin2)) in objects + assert \ + ('origin_visit', OriginVisit.from_dict(expected_origin_visit)) in objects def test_origin_visit_get__unknown_origin(self, swh_storage): assert [] == list(swh_storage.origin_visit_get('foo')) @@ -1645,10 +1658,10 @@ assert visit in actual_origin_visits objects = list(swh_storage.journal_writer.journal.objects) - assert ('origin', data.origin2) in objects + assert ('origin', Origin.from_dict(data.origin2)) in objects for visit in expected_visits: - assert ('origin_visit', visit) in objects + assert ('origin_visit', OriginVisit.from_dict(visit)) in objects def test_origin_visit_add_validation(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin2) @@ -1800,19 +1813,20 @@ 'snapshot': None, } objects = list(swh_storage.journal_writer.journal.objects) - assert ('origin', data.origin) in objects - assert ('origin', data.origin2) in objects - assert ('origin_visit', data1) in objects - assert ('origin_visit', data2) in objects - assert ('origin_visit', data3) in objects - assert ('origin_visit', data4) in objects - assert ('origin_visit', data5) in objects + assert ('origin', Origin.from_dict(data.origin)) in objects + assert ('origin', Origin.from_dict(data.origin2)) in objects + assert ('origin_visit', OriginVisit.from_dict(data1)) in objects + assert ('origin_visit', OriginVisit.from_dict(data2)) in objects + assert ('origin_visit', OriginVisit.from_dict(data3)) in objects + assert ('origin_visit', OriginVisit.from_dict(data4)) in objects + assert ('origin_visit', OriginVisit.from_dict(data5)) in objects def test_origin_visit_update_validation(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) visit = swh_storage.origin_visit_add( origin_url, date=data.date_visit2, type=data.type_visit2) - with pytest.raises(StorageArgumentException, match='status') as cm: + with pytest.raises( + (StorageArgumentException, ValueError), match='status') as cm: swh_storage.origin_visit_update( origin_url, visit.visit, status='foobar') @@ -1986,9 +2000,9 @@ 'snapshot': None, } assert list(swh_storage.journal_writer.journal.objects) == [ - ('origin', data.origin2), - ('origin_visit', data1), - ('origin_visit', data2)] + ('origin', Origin.from_dict(data.origin2)), + ('origin_visit', OriginVisit.from_dict(data1)), + ('origin_visit', OriginVisit.from_dict(data2))] def test_origin_visit_upsert_existing(self, swh_storage): # given @@ -2044,9 +2058,9 @@ 'snapshot': None, } assert list(swh_storage.journal_writer.journal.objects) == [ - ('origin', data.origin2), - ('origin_visit', data1), - ('origin_visit', data2)] + ('origin', Origin.from_dict(data.origin2)), + ('origin_visit', OriginVisit.from_dict(data1)), + ('origin_visit', OriginVisit.from_dict(data2))] def test_origin_visit_upsert_missing_visit_id(self, swh_storage): # given @@ -2065,7 +2079,7 @@ })]) assert list(swh_storage.journal_writer.journal.objects) == [ - ('origin', data.origin2) + ('origin', Origin.from_dict(data.origin2)) ] def test_origin_visit_get_by_no_result(self, swh_storage): @@ -2112,7 +2126,7 @@ # Status filter: all three visits are status=ongoing, so no visit # returned assert swh_storage.origin_visit_get_latest( - origin_url, allowed_statuses=['full']) is None + origin_url, allowed_statuses=['full']) is None # Mark the first visit as completed and check status filter again swh_storage.origin_visit_update( @@ -2163,13 +2177,13 @@ assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] - } == swh_storage.origin_visit_get_latest(origin_url) + } == swh_storage.origin_visit_get_latest(origin_url) assert { **origin_visit3, 'snapshot': data.complete_snapshot['id'] - } == swh_storage.origin_visit_get_latest( - origin_url, require_snapshot=True) + } == swh_storage.origin_visit_get_latest( + origin_url, require_snapshot=True) def test_person_fullname_unicity(self, swh_storage): # given (person injection through revisions for example) @@ -2231,10 +2245,10 @@ 'snapshot': data.empty_snapshot['id'], } assert list(swh_storage.journal_writer.journal.objects) == \ - [('origin', data.origin), - ('origin_visit', data1), - ('snapshot', data.empty_snapshot), - ('origin_visit', data2)] + [('origin', Origin.from_dict(data.origin)), + ('origin_visit', OriginVisit.from_dict(data1)), + ('snapshot', Snapshot.from_dict(data.empty_snapshot)), + ('origin_visit', OriginVisit.from_dict(data2))] def test_snapshot_add_get_complete(self, swh_storage): origin_url = data.origin['url'] @@ -2300,14 +2314,14 @@ assert actual_result == {'snapshot:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('snapshot', data.empty_snapshot)] + == [('snapshot', Snapshot.from_dict(data.empty_snapshot))] actual_result = swh_storage.snapshot_add([data.snapshot]) assert actual_result == {'snapshot:add': 1} assert list(swh_storage.journal_writer.journal.objects) \ - == [('snapshot', data.empty_snapshot), - ('snapshot', data.snapshot)] + == [('snapshot', Snapshot.from_dict(data.empty_snapshot)), + ('snapshot', Snapshot.from_dict(data.snapshot))] def test_snapshot_add_validation(self, swh_storage): snap = copy.deepcopy(data.snapshot) @@ -2317,7 +2331,7 @@ swh_storage.snapshot_add([snap]) snap = copy.deepcopy(data.snapshot) - snap['branches'][b'foo'] = {'target': b'\x42'*20} + snap['branches'][b'foo'] = {'target': b'\x42' * 20} with pytest.raises(StorageArgumentException, match='target_type'): swh_storage.snapshot_add([snap]) @@ -2546,7 +2560,7 @@ snapshot=data.snapshot['id']) assert list(swh_storage.journal_writer.journal.objects) == [ - ('snapshot', data.snapshot)] + ('snapshot', Snapshot.from_dict(data.snapshot))] def test_snapshot_add_twice__by_origin_visit(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) @@ -2614,12 +2628,12 @@ 'snapshot': data.snapshot['id'], } assert list(swh_storage.journal_writer.journal.objects) \ - == [('origin', data.origin), - ('origin_visit', data1), - ('snapshot', data.snapshot), - ('origin_visit', data2), - ('origin_visit', data3), - ('origin_visit', data4)] + == [('origin', Origin.from_dict(data.origin)), + ('origin_visit', OriginVisit.from_dict(data1)), + ('snapshot', Snapshot.from_dict(data.snapshot)), + ('origin_visit', OriginVisit.from_dict(data2)), + ('origin_visit', OriginVisit.from_dict(data3)), + ('origin_visit', OriginVisit.from_dict(data4))] def test_snapshot_get_latest(self, swh_storage): origin_url = swh_storage.origin_add_one(data.origin) @@ -2838,18 +2852,18 @@ actually_present = swh_storage.content_find( {'sha1': cont['sha1']} - ) + ) assert 1 == len(actually_present) actually_present[0].pop('ctime') assert actually_present[0] == { - 'sha1': cont['sha1'], - 'sha256': cont['sha256'], - 'sha1_git': cont['sha1_git'], - 'blake2s256': cont['blake2s256'], - 'length': cont['length'], - 'status': 'visible' - } + 'sha1': cont['sha1'], + 'sha256': cont['sha256'], + 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], + 'length': cont['length'], + 'status': 'visible' + } # 2. with something to find actually_present = swh_storage.content_find( @@ -2858,13 +2872,13 @@ actually_present[0].pop('ctime') assert actually_present[0] == { - 'sha1': cont['sha1'], - 'sha256': cont['sha256'], - 'sha1_git': cont['sha1_git'], - 'blake2s256': cont['blake2s256'], - 'length': cont['length'], - 'status': 'visible' - } + 'sha1': cont['sha1'], + 'sha256': cont['sha256'], + 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], + 'length': cont['length'], + 'status': 'visible' + } # 3. with something to find actually_present = swh_storage.content_find( @@ -2873,13 +2887,13 @@ actually_present[0].pop('ctime') assert actually_present[0] == { - 'sha1': cont['sha1'], - 'sha256': cont['sha256'], - 'sha1_git': cont['sha1_git'], - 'blake2s256': cont['blake2s256'], - 'length': cont['length'], - 'status': 'visible' - } + 'sha1': cont['sha1'], + 'sha256': cont['sha256'], + 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], + 'length': cont['length'], + 'status': 'visible' + } # 4. with something to find actually_present = swh_storage.content_find({ @@ -2892,13 +2906,13 @@ actually_present[0].pop('ctime') assert actually_present[0] == { - 'sha1': cont['sha1'], - 'sha256': cont['sha256'], - 'sha1_git': cont['sha1_git'], - 'blake2s256': cont['blake2s256'], - 'length': cont['length'], - 'status': 'visible' - } + 'sha1': cont['sha1'], + 'sha256': cont['sha256'], + 'sha1_git': cont['sha1_git'], + 'blake2s256': cont['blake2s256'], + 'length': cont['length'], + 'status': 'visible' + } def test_content_find_with_non_present_content(self, swh_storage): # 1. with something that does not exist @@ -2944,7 +2958,7 @@ actual_result[1].pop('ctime') expected_result = [ - cont1, duplicate_cont + cont1, duplicate_cont ] for result in expected_result: assert result in actual_result @@ -2962,7 +2976,7 @@ finder = { 'sha256': duplicate_cont['sha256'] - } + } actual_result = list(swh_storage.content_find(finder)) assert len(actual_result) == 2 @@ -3005,7 +3019,7 @@ swh_storage.content_add([cont1, duplicate_cont]) finder = { 'blake2s256': duplicate_cont['blake2s256'] - } + } actual_result = list(swh_storage.content_find(finder)) cont1.pop('data') @@ -3218,29 +3232,29 @@ tool = tools[0] swh_storage.metadata_provider_add( - data.provider['name'], - data.provider['type'], - data.provider['url'], - data.provider['metadata']) + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) provider = swh_storage.metadata_provider_get_by({ - 'provider_name': data.provider['name'], - 'provider_url': data.provider['url'] - }) + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] + }) # when adding for the same origin 2 metadatas n_om = len(list(swh_storage.origin_metadata_get_by(origin['url']))) swh_storage.origin_metadata_add( - origin['url'], - data.origin_metadata['discovery_date'], - provider['id'], - tool['id'], - data.origin_metadata['metadata']) + origin['url'], + data.origin_metadata['discovery_date'], + provider['id'], + tool['id'], + data.origin_metadata['metadata']) swh_storage.origin_metadata_add( - origin['url'], - '2015-01-01 23:00:00+00', - provider['id'], - tool['id'], - data.origin_metadata2['metadata']) + origin['url'], + '2015-01-01 23:00:00+00', + provider['id'], + tool['id'], + data.origin_metadata2['metadata']) n_actual_om = len(list( swh_storage.origin_metadata_get_by(origin['url']))) # then @@ -3264,23 +3278,23 @@ tool = swh_storage.tool_add([data.metadata_tool])[0] # when adding for the same origin 2 metadatas swh_storage.origin_metadata_add( - origin_url, - data.origin_metadata['discovery_date'], - provider['id'], - tool['id'], - data.origin_metadata['metadata']) + origin_url, + data.origin_metadata['discovery_date'], + provider['id'], + tool['id'], + data.origin_metadata['metadata']) swh_storage.origin_metadata_add( - origin_url2, - data.origin_metadata2['discovery_date'], - provider['id'], - tool['id'], - data.origin_metadata2['metadata']) + origin_url2, + data.origin_metadata2['discovery_date'], + provider['id'], + tool['id'], + data.origin_metadata2['metadata']) swh_storage.origin_metadata_add( - origin_url, - data.origin_metadata2['discovery_date'], - provider['id'], - tool['id'], - data.origin_metadata2['metadata']) + origin_url, + data.origin_metadata2['discovery_date'], + provider['id'], + tool['id'], + data.origin_metadata2['metadata']) all_metadatas = list(sorted(swh_storage.origin_metadata_get_by( origin_url), key=lambda x: x['discovery_date'])) metadatas_for_origin2 = list(swh_storage.origin_metadata_get_by( @@ -3288,8 +3302,8 @@ expected_results = [{ 'origin_url': origin_url, 'discovery_date': datetime.datetime( - 2015, 1, 1, 23, 0, - tzinfo=datetime.timezone.utc), + 2015, 1, 1, 23, 0, + tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' @@ -3302,8 +3316,8 @@ }, { 'origin_url': origin_url, 'discovery_date': datetime.datetime( - 2017, 1, 1, 23, 0, - tzinfo=datetime.timezone.utc), + 2017, 1, 1, 23, 0, + tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' @@ -3344,26 +3358,26 @@ swh_storage.origin_add([data.origin]) swh_storage.origin_add([data.origin2]) provider1_id = swh_storage.metadata_provider_add( - data.provider['name'], - data.provider['type'], - data.provider['url'], - data.provider['metadata']) + data.provider['name'], + data.provider['type'], + data.provider['url'], + data.provider['metadata']) provider1 = swh_storage.metadata_provider_get_by({ - 'provider_name': data.provider['name'], - 'provider_url': data.provider['url'] - }) + 'provider_name': data.provider['name'], + 'provider_url': data.provider['url'] + }) assert provider1 == swh_storage.metadata_provider_get(provider1_id) provider2_id = swh_storage.metadata_provider_add( - 'swMATH', - 'registry', - 'http://www.swmath.org/', - {'email': 'contact@swmath.org', - 'license': 'All rights reserved'}) + 'swMATH', + 'registry', + 'http://www.swmath.org/', + {'email': 'contact@swmath.org', + 'license': 'All rights reserved'}) provider2 = swh_storage.metadata_provider_get_by({ - 'provider_name': 'swMATH', - 'provider_url': 'http://www.swmath.org/' - }) + 'provider_name': 'swMATH', + 'provider_url': 'http://www.swmath.org/' + }) assert provider2 == swh_storage.metadata_provider_get(provider2_id) # using the only tool now inserted in the data.sql, but for this @@ -3393,8 +3407,8 @@ expected_results = [{ 'origin_url': origin_url2, 'discovery_date': datetime.datetime( - 2017, 1, 1, 23, 0, - tzinfo=datetime.timezone.utc), + 2017, 1, 1, 23, 0, + tzinfo=datetime.timezone.utc), 'metadata': { 'name': 'test_origin_metadata', 'version': '0.0.1' @@ -3582,7 +3596,7 @@ 'id': origin_id, } for (origin_id, origin) - in origins_with_id[origin_from-1:origin_from+origin_count-1] + in origins_with_id[origin_from - 1:origin_from + origin_count - 1] ] assert actual_origins == expected_origins @@ -3605,7 +3619,7 @@ page_token = result.get('next_page_token') if page_token is None: - assert i*limit >= len(swh_origins) + assert i * limit >= len(swh_origins) break else: assert len(result['origins']) == limit @@ -3824,7 +3838,7 @@ assert actual_result == { 'content:add': 1, 'content:add:bytes': cont['length'], - } + } if hasattr(swh_storage, 'objstorage'): assert cont['sha1'] in swh_storage.objstorage.objstorage @@ -3840,10 +3854,14 @@ expected_cont = cont.copy() del expected_cont['data'] - journal_objects = list(swh_storage.journal_writer.journal.objects) - for (obj_type, obj) in journal_objects: - del obj['ctime'] - assert journal_objects == [('content', expected_cont)] + contents = [ + obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects + if obj_type == 'content'] + assert len(contents) == 1 + for obj in contents: + obj_d = obj.to_dict() + del obj_d['ctime'] + assert obj_d == expected_cont def test_content_add_metadata_db(self, swh_storage): cont = data.cont @@ -3854,7 +3872,7 @@ assert actual_result == { 'content:add': 1, - } + } if hasattr(swh_storage, 'objstorage'): assert cont['sha1'] not in swh_storage.objstorage.objstorage @@ -3866,8 +3884,13 @@ assert datum == (cont['sha1'], cont['sha1_git'], cont['sha256'], cont['length'], 'visible') - assert list(swh_storage.journal_writer.journal.objects) == [ - ('content', cont)] + contents = [ + obj for (obj_type, obj) in swh_storage.journal_writer.journal.objects + if obj_type == 'content'] + assert len(contents) == 1 + for obj in contents: + obj_d = obj.to_dict() + assert obj_d == cont def test_skipped_content_add_db(self, swh_storage): cont = data.skipped_cont