Page MenuHomeSoftware Heritage

Jenkins > .tox.py3.lib.python3.7.site-packages.swh.lister.sourceforge.tests.test_lister::test_sourceforge_lister_incremental
Failed

TEST RESULT

Run At
Nov 23 2021, 10:55 AM
Details
swh_scheduler = <swh.scheduler.backend.SchedulerBackend object at 0x7f03dfb11da0> requests_mock = <requests_mock.mocker.Mocker object at 0x7f03dfb112e8> datadir = '/var/lib/jenkins/workspace/DLS/tests-on-diff/.tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/tests/data' mocker = <pytest_mock.plugin.MockerFixture object at 0x7f03dfb115f8> def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker): """ Simulate an incremental listing of an artificially restricted sourceforge. Same dataset as the full run, because it's enough to validate the different cases. """ lister = SourceForgeLister(scheduler=swh_scheduler, incremental=True) requests_mock.get( MAIN_SITEMAP_URL, text=get_main_sitemap(datadir), additional_matcher=_check_request_headers, ) def not_called(request, *args, **kwargs): raise AssertionError(f"Should not have been called: '{request.url}'") requests_mock.get( "https://sourceforge.net/allura_sitemap/sitemap-0.xml", text=get_subsitemap_0(datadir), additional_matcher=_check_request_headers, ) requests_mock.get( "https://sourceforge.net/allura_sitemap/sitemap-1.xml", text=not_called, additional_matcher=_check_request_headers, ) def filtered_get_project_json(request, context): # These projects should not be requested again assert URLS_MATCHER[request.url] not in {"adobe", "mojunk"} return get_project_json(datadir, request, context) requests_mock.get( re.compile("https://sourceforge.net/rest/.*"), json=filtered_get_project_json, additional_matcher=_check_request_headers, ) faked_listed_origins = [ # mramm: changed ListedOrigin( lister_id=lister.lister_obj.id, visit_type="git", url="https://git.code.sf.net/p/mramm/files", last_update=iso8601.parse_date("2019-01-01"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="git", url="https://git.code.sf.net/p/mramm/git", last_update=iso8601.parse_date("2019-01-01"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="svn", url="https://svn.code.sf.net/p/mramm/svn", last_update=iso8601.parse_date("2019-01-01"), ), # stayed the same, even though its subsitemap has changed ListedOrigin( lister_id=lister.lister_obj.id, visit_type="git", url="https://git.code.sf.net/p/os3dmodels/git", last_update=iso8601.parse_date("2017-03-31"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="svn", url="https://svn.code.sf.net/p/os3dmodels/svn", last_update=iso8601.parse_date("2017-03-31"), ), # others: stayed the same, should be skipped ListedOrigin( lister_id=lister.lister_obj.id, visit_type="git", url="https://git.code.sf.net/p/mojunk/git", last_update=iso8601.parse_date("2017-12-31"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="git", url="https://git.code.sf.net/p/mojunk/git2", last_update=iso8601.parse_date("2017-12-31"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="svn", url="https://svn.code.sf.net/p/mojunk/svn", last_update=iso8601.parse_date("2017-12-31"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="svn", url="https://svn.code.sf.net/p/backapps/website/code", last_update=iso8601.parse_date("2021-02-11"), ), ListedOrigin( lister_id=lister.lister_obj.id, visit_type="hg", url="http://hg.code.sf.net/p/random-mercurial/hg", last_update=iso8601.parse_date("2019-05-02"), ), ] swh_scheduler.record_listed_origins(faked_listed_origins) to_date = datetime.date.fromisoformat faked_state = SourceForgeListerState( subsitemap_last_modified={ # changed "https://sourceforge.net/allura_sitemap/sitemap-0.xml": to_date( "2021-02-18" ), # stayed the same "https://sourceforge.net/allura_sitemap/sitemap-1.xml": to_date( "2021-03-18" ), }, empty_projects={ "https://sourceforge.net/rest/p/backapps": to_date("2020-02-11"), "https://sourceforge.net/rest/adobe/adobexmp": to_date("2017-10-17"), }, ) lister.state = faked_state > stats = lister.run() .tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/tests/test_lister.py:264: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ .tox/py3/lib/python3.7/site-packages/swh/lister/pattern.py:130: in run full_stats.origins += self.send_origins(origins) .tox/py3/lib/python3.7/site-packages/swh/lister/pattern.py:233: in send_origins for batch_origins in grouper(origins, n=1000): .tox/py3/lib/python3.7/site-packages/swh/core/utils.py:47: in grouper for _data in itertools.zip_longest(*args, fillvalue=stop_value): _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = <swh.lister.sourceforge.lister.SourceForgeLister object at 0x7f03dfb11160> page = [SourceForgeListerEntry(vcs=<VcsNames.GIT: 'git'>, url='https://git.code.sf.net/p/mramm/files', last_modified='2019-04...urceForgeListerEntry(vcs=<VcsNames.GIT: 'git'>, url='https://git.code.sf.net/p/mramm/git', last_modified='2019-04-04')] def get_origins_from_page( self, page: SourceForgeListerPage ) -> Iterator[ListedOrigin]: assert self.lister_obj.id is not None for hit in page: last_update = datetime.datetime.combine( > hit.last_modified, datetime.time(tzinfo=datetime.timezone.utc) ) E TypeError: combine() argument 1 must be datetime.date, not str .tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/lister.py:251: TypeError