When scanning a recent checkout of the linux kernel I get this traceback:
$ swh scanner scan -f text . Traceback (most recent call last): File "/home/zack/.virtualenvs/swh/bin/swh", line 11, in <module> load_entry_point('swh.core', 'console_scripts', 'swh')() File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-core/swh/core/cli/__init__.py", line 122, in main return swh(auto_envvar_prefix="SWH") File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 764, in __call__ return self.main(*args, **kwargs) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 717, in main rv = self.invoke(ctx) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 1137, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 1137, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 956, in invoke return ctx.invoke(self.callback, **ctx.params) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/core.py", line 555, in invoke return callback(*args, **kwargs) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/click/decorators.py", line 17, in new_func return f(get_current_context(), *args, **kwargs) File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/cli.py", line 56, in scan loop.run_until_complete(run(path, api_url, source_tree)) File "/usr/lib/python3.7/asyncio/base_events.py", line 587, in run_until_complete return future.result() File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 138, in run await _scan(root, session, api_url, source_tree) File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 135, in _scan await _scan(path, session, api_url, source_tree) File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 125, in _scan for path, pid, found in await parse_path(root, session, api_url): File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 104, in parse_path parsed_pids = await pids_discovery(list(parsed_paths.values()), session, api_url) File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 61, in pids_discovery return await make_request(pids) File "/home/zack/dati/projects/sw-heritage/git/swh-environment/swh-scanner/swh/scanner/scanner.py", line 47, in make_request async with session.post(endpoint, json=pids) as resp: File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/aiohttp/client.py", line 1005, in __aenter__ self._resp = await self._coro File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/aiohttp/client.py", line 497, in _request await resp.start(conn) File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/aiohttp/client_reqrep.py", line 844, in start message, payload = await self._protocol.read() # type: ignore # noqa File "/home/zack/.virtualenvs/swh/lib/python3.7/site-packages/aiohttp/streams.py", line 588, in read await self._waiter aiohttp.client_exceptions.ServerDisconnectedError: None
It happens also if I add -u https://archive.internal.softwareheritage.org/api/1 so I do not think it's related to the rate limiting.