swh_scheduler = <swh.scheduler.backend.SchedulerBackend object at 0x7f03dfb11da0>
requests_mock = <requests_mock.mocker.Mocker object at 0x7f03dfb112e8>
datadir = '/var/lib/jenkins/workspace/DLS/tests-on-diff/.tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/tests/data'
mocker = <pytest_mock.plugin.MockerFixture object at 0x7f03dfb115f8>
def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker):
"""
Simulate an incremental listing of an artificially restricted sourceforge.
Same dataset as the full run, because it's enough to validate the different cases.
"""
lister = SourceForgeLister(scheduler=swh_scheduler, incremental=True)
requests_mock.get(
MAIN_SITEMAP_URL,
text=get_main_sitemap(datadir),
additional_matcher=_check_request_headers,
)
def not_called(request, *args, **kwargs):
raise AssertionError(f"Should not have been called: '{request.url}'")
requests_mock.get(
"https://sourceforge.net/allura_sitemap/sitemap-0.xml",
text=get_subsitemap_0(datadir),
additional_matcher=_check_request_headers,
)
requests_mock.get(
"https://sourceforge.net/allura_sitemap/sitemap-1.xml",
text=not_called,
additional_matcher=_check_request_headers,
)
def filtered_get_project_json(request, context):
# These projects should not be requested again
assert URLS_MATCHER[request.url] not in {"adobe", "mojunk"}
return get_project_json(datadir, request, context)
requests_mock.get(
re.compile("https://sourceforge.net/rest/.*"),
json=filtered_get_project_json,
additional_matcher=_check_request_headers,
)
faked_listed_origins = [
# mramm: changed
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="git",
url="https://git.code.sf.net/p/mramm/files",
last_update=iso8601.parse_date("2019-01-01"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="git",
url="https://git.code.sf.net/p/mramm/git",
last_update=iso8601.parse_date("2019-01-01"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="svn",
url="https://svn.code.sf.net/p/mramm/svn",
last_update=iso8601.parse_date("2019-01-01"),
),
# stayed the same, even though its subsitemap has changed
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="git",
url="https://git.code.sf.net/p/os3dmodels/git",
last_update=iso8601.parse_date("2017-03-31"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="svn",
url="https://svn.code.sf.net/p/os3dmodels/svn",
last_update=iso8601.parse_date("2017-03-31"),
),
# others: stayed the same, should be skipped
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="git",
url="https://git.code.sf.net/p/mojunk/git",
last_update=iso8601.parse_date("2017-12-31"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="git",
url="https://git.code.sf.net/p/mojunk/git2",
last_update=iso8601.parse_date("2017-12-31"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="svn",
url="https://svn.code.sf.net/p/mojunk/svn",
last_update=iso8601.parse_date("2017-12-31"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="svn",
url="https://svn.code.sf.net/p/backapps/website/code",
last_update=iso8601.parse_date("2021-02-11"),
),
ListedOrigin(
lister_id=lister.lister_obj.id,
visit_type="hg",
url="http://hg.code.sf.net/p/random-mercurial/hg",
last_update=iso8601.parse_date("2019-05-02"),
),
]
swh_scheduler.record_listed_origins(faked_listed_origins)
to_date = datetime.date.fromisoformat
faked_state = SourceForgeListerState(
subsitemap_last_modified={
# changed
"https://sourceforge.net/allura_sitemap/sitemap-0.xml": to_date(
"2021-02-18"
),
# stayed the same
"https://sourceforge.net/allura_sitemap/sitemap-1.xml": to_date(
"2021-03-18"
),
},
empty_projects={
"https://sourceforge.net/rest/p/backapps": to_date("2020-02-11"),
"https://sourceforge.net/rest/adobe/adobexmp": to_date("2017-10-17"),
},
)
lister.state = faked_state
> stats = lister.run()
.tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/tests/test_lister.py:264:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
.tox/py3/lib/python3.7/site-packages/swh/lister/pattern.py:130: in run
full_stats.origins += self.send_origins(origins)
.tox/py3/lib/python3.7/site-packages/swh/lister/pattern.py:233: in send_origins
for batch_origins in grouper(origins, n=1000):
.tox/py3/lib/python3.7/site-packages/swh/core/utils.py:47: in grouper
for _data in itertools.zip_longest(*args, fillvalue=stop_value):
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <swh.lister.sourceforge.lister.SourceForgeLister object at 0x7f03dfb11160>
page = [SourceForgeListerEntry(vcs=<VcsNames.GIT: 'git'>, url='https://git.code.sf.net/p/mramm/files', last_modified='2019-04...urceForgeListerEntry(vcs=<VcsNames.GIT: 'git'>, url='https://git.code.sf.net/p/mramm/git', last_modified='2019-04-04')]
def get_origins_from_page(
self, page: SourceForgeListerPage
) -> Iterator[ListedOrigin]:
assert self.lister_obj.id is not None
for hit in page:
last_update = datetime.datetime.combine(
> hit.last_modified, datetime.time(tzinfo=datetime.timezone.utc)
)
E TypeError: combine() argument 1 must be datetime.date, not str
.tox/py3/lib/python3.7/site-packages/swh/lister/sourceforge/lister.py:251: TypeError
TEST RESULT
TEST RESULT
- Run At
- Nov 23 2021, 10:55 AM