diff --git a/swh/indexer/metadata_dictionary/utils.py b/swh/indexer/metadata_dictionary/utils.py --- a/swh/indexer/metadata_dictionary/utils.py +++ b/swh/indexer/metadata_dictionary/utils.py @@ -107,6 +107,10 @@ """ if not isinstance(url, str): return - if " " in url or not urllib.parse.urlparse(url).netloc: + try: + parsed_url = urllib.parse.urlparse(url) + except Exception: + return + if " " in url or not parsed_url.netloc: return graph.add((subject, predicate, rdflib.term.URIRef(url))) diff --git a/swh/indexer/tests/metadata_dictionary/test_npm.py b/swh/indexer/tests/metadata_dictionary/test_npm.py --- a/swh/indexer/tests/metadata_dictionary/test_npm.py +++ b/swh/indexer/tests/metadata_dictionary/test_npm.py @@ -389,6 +389,17 @@ "version": "1.0.0", } + package_json = rb"""{ + "version": "1.0.0", + "repository": "git+http://\\u001b[D\\u001b[D\\u001b[Ds\\u001b[C\\u001b[C\\u001b[D\\u001b://github.com/dearzoe/array-combination" +}""" # noqa + result = MAPPINGS["NpmMapping"]().translate(package_json) + assert result == { + "@context": "https://doi.org/10.5063/schema/codemeta-2.0", + "type": "SoftwareSourceCode", + "version": "1.0.0", + } + def test_npm_invalid_licenses(): package_json = rb"""{