The origin, origin_visit and origin_visit_status replayers often failed with this stack trace :
Traceback (most recent call last): File "/usr/bin/swh", line 11, in <module> load_entry_point('swh.core==0.14.3', 'console_scripts', 'swh')() File "/usr/lib/python3/dist-packages/swh/core/cli/__init__.py", line 185, in main return swh(auto_envvar_prefix="SWH") File "/usr/lib/python3/dist-packages/click/core.py", line 764, in __call__ return self.main(*args, **kwargs) File "/usr/lib/python3/dist-packages/click/core.py", line 717, in main rv = self.invoke(ctx) File "/usr/lib/python3/dist-packages/click/core.py", line 1137, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/usr/lib/python3/dist-packages/click/core.py", line 1137, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) File "/usr/lib/python3/dist-packages/click/core.py", line 956, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/lib/python3/dist-packages/click/core.py", line 555, in invoke return callback(*args, **kwargs) File "/usr/lib/python3/dist-packages/click/decorators.py", line 17, in new_func return f(get_current_context(), *args, **kwargs) File "/usr/lib/python3/dist-packages/swh/storage/cli.py", line 194, in replay client.process(worker_fn) File "/usr/lib/python3/dist-packages/swh/journal/client.py", line 265, in process batch_processed, at_eof = self.handle_messages(messages, worker_fn) File "/usr/lib/python3/dist-packages/swh/journal/client.py", line 292, in handle_messages worker_fn(dict(objects)) File "/usr/lib/python3/dist-packages/swh/storage/replay.py", line 62, in process_replay_objects _insert_objects(object_type, objects, storage) File "/usr/lib/python3/dist-packages/swh/storage/replay.py", line 144, in _insert_objects storage.origin_add(origins) File "/usr/lib/python3/dist-packages/swh/storage/cassandra/storage.py", line 977, in origin_add origins = [ori for ori in to_add if self.origin_get_one(ori.url) is None] File "/usr/lib/python3/dist-packages/swh/storage/cassandra/storage.py", line 977, in <listcomp> origins = [ori for ori in to_add if self.origin_get_one(ori.url) is None] File "/usr/lib/python3/dist-packages/swh/storage/cassandra/storage.py", line 877, in origin_get_one rows = list(self._cql_runner.origin_get_by_url(origin_url)) File "/usr/lib/python3/dist-packages/swh/storage/cassandra/cql.py", line 783, in origin_get_by_url return self.origin_get_by_sha1(hash_url(url)) File "/usr/lib/python3/dist-packages/swh/storage/cassandra/common.py", line 16, in hash_url return hashlib.sha1(url.encode("ascii")).digest() UnicodeEncodeError: 'ascii' codec can't encode character '\u212a' in position 22: ordinal not in range(128)
another example:
File "/usr/lib/python3/dist-packages/swh/storage/cassandra/common.py", line 16, in hash_url return hashlib.sha1(url.encode("ascii")).digest() UnicodeEncodeError: 'ascii' codec can't encode characters in position 41-43: ordinal not in range(128)