Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/npm.py
Show First 20 Lines • Show All 129 Lines • ▼ Show 20 Lines | def normalize_author(self, d): | ||||
if email and isinstance(email, str): | if email and isinstance(email, str): | ||||
author[SCHEMA_URI + "email"] = email | author[SCHEMA_URI + "email"] = email | ||||
if url and isinstance(url, str): | if url and isinstance(url, str): | ||||
author[SCHEMA_URI + "url"] = {"@id": url} | author[SCHEMA_URI + "url"] = {"@id": url} | ||||
return {"@list": [author]} | return {"@list": [author]} | ||||
def normalize_description(self, description): | def normalize_description(self, description): | ||||
r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | ||||
mistake that causes issues in the database because of null bytes in JSON. | mistake that causes issues in the database because of null bytes in JSON. | ||||
>>> NpmMapping().normalize_description("foo bar") | >>> NpmMapping().normalize_description("foo bar") | ||||
vlorentz: you forgot to undo this change too | |||||
'foo bar' | 'foo bar' | ||||
>>> NpmMapping().normalize_description( | >>> NpmMapping().normalize_description( | ||||
... "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00" | ... "\ufffd\ufffd#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 \x00" | ||||
... ) | ... ) | ||||
'foo bar' | 'foo bar' | ||||
>>> NpmMapping().normalize_description( | >>> NpmMapping().normalize_description( | ||||
... "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 " | ... "\ufffd\ufffd\x00#\x00 \x00f\x00o\x00o\x00 \x00b\x00a\x00r\x00\r\x00 " | ||||
... ) | ... ) | ||||
'foo bar' | 'foo bar' | ||||
>>> NpmMapping().normalize_description( | >>> NpmMapping().normalize_description( | ||||
... # invalid UTF-16 and meaningless UTF-8: | ... # invalid UTF-16 and meaningless UTF-8: | ||||
... "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00" | ... "\ufffd\ufffd\x00#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00" | ||||
... ) is None | ... ) is None | ||||
True | True | ||||
>>> NpmMapping().normalize_description( | >>> NpmMapping().normalize_description( | ||||
... # ditto (ut looks like little-endian at first) | ... # ditto (ut looks like little-endian at first) | ||||
... "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00" | ... "\ufffd\ufffd#\x00\x00\x00 \x00\x00\x00\x00f\x00\x00\x00\x00\x00" | ||||
... ) is None | ... ) is None | ||||
True | True | ||||
>>> NpmMapping().normalize_description(None) is None | >>> NpmMapping().normalize_description(None) is None | ||||
True | True | ||||
""" | """ | ||||
if description is None: | if description is None or type(description) != str: | ||||
vlorentzUnsubmitted Not Done Inline ActionsPlease undo these changes for now, they are out of scope of this diff. Then, please submit another diff with only this change. vlorentz: Please undo these changes for now, they are out of scope of this diff. Then, please submit… | |||||
return None | return None | ||||
# XXX: if this function ever need to support more cases, consider | # XXX: if this function ever need to support more cases, consider | ||||
# switching to https://pypi.org/project/ftfy/ instead of adding more hacks | # switching to https://pypi.org/project/ftfy/ instead of adding more hacks | ||||
if description.startswith("\ufffd\ufffd") and "\x00" in description: | if description.startswith("\ufffd\ufffd") and "\x00" in description: | ||||
# 2 unicode replacement characters followed by '# ' encoded as UTF-16 | # 2 unicode replacement characters followed by '# ' encoded as UTF-16 | ||||
# is a common mistake, which indicates a README.md was saved as UTF-16, | # is a common mistake, which indicates a README.md was saved as UTF-16, | ||||
# and some NPM tool opened it as UTF-8 and used the first line as | # and some NPM tool opened it as UTF-8 and used the first line as | ||||
# description. | # description. | ||||
▲ Show 20 Lines • Show All 57 Lines • Show Last 20 Lines |
you forgot to undo this change too