Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/npm.py
# Copyright (C) 2018-2022 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import urllib.parse | |||||
from rdflib import RDF, BNode, Graph, Literal, URIRef | from rdflib import RDF, BNode, Graph, Literal, URIRef | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE | from swh.indexer.codemeta import CROSSWALK_TABLE | ||||
from swh.indexer.namespaces import SCHEMA | from swh.indexer.namespaces import SCHEMA | ||||
from .base import JsonMapping, SingleFileIntrinsicMapping | from .base import JsonMapping, SingleFileIntrinsicMapping | ||||
from .utils import add_list, prettyprint_graph # noqa | from .utils import add_list, add_url_if_valid, prettyprint_graph # noqa | ||||
SPDX = URIRef("https://spdx.org/licenses/") | SPDX = URIRef("https://spdx.org/licenses/") | ||||
class NpmMapping(JsonMapping, SingleFileIntrinsicMapping): | class NpmMapping(JsonMapping, SingleFileIntrinsicMapping): | ||||
""" | """ | ||||
dedicated class for NPM (package.json) mapping and translation | dedicated class for NPM (package.json) mapping and translation | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | def normalize_bugs(self, d): | ||||
""" | """ | ||||
if isinstance(d, dict) and isinstance(d.get("url"), str): | if isinstance(d, dict) and isinstance(d.get("url"), str): | ||||
url = d["url"] | url = d["url"] | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
url = d | url = d | ||||
else: | else: | ||||
url = "" | url = "" | ||||
parsed_url = urllib.parse.urlparse(url) | |||||
if parsed_url.netloc: | |||||
return URIRef(url) | return URIRef(url) | ||||
else: | |||||
return None | |||||
_parse_author = re.compile( | _parse_author = re.compile( | ||||
r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | ||||
) | ) | ||||
def translate_author(self, graph: Graph, root, d): | def translate_author(self, graph: Graph, root, d): | ||||
r"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | r"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | def translate_author(self, graph: Graph, root, d): | ||||
url = match.group("url") | url = match.group("url") | ||||
else: | else: | ||||
return None | return None | ||||
if name and isinstance(name, str): | if name and isinstance(name, str): | ||||
graph.add((author, SCHEMA.name, Literal(name))) | graph.add((author, SCHEMA.name, Literal(name))) | ||||
if email and isinstance(email, str): | if email and isinstance(email, str): | ||||
graph.add((author, SCHEMA.email, Literal(email))) | graph.add((author, SCHEMA.email, Literal(email))) | ||||
if url and isinstance(url, str): | add_url_if_valid(graph, author, SCHEMA.url, url) | ||||
# Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop | |||||
# URLs that are blatantly invalid early, so PyLD does not crash. | |||||
parsed_url = urllib.parse.urlparse(url) | |||||
if parsed_url.netloc: | |||||
graph.add((author, SCHEMA.url, URIRef(url))) | |||||
add_list(graph, root, SCHEMA.author, [author]) | add_list(graph, root, SCHEMA.author, [author]) | ||||
def normalize_description(self, description): | def normalize_description(self, description): | ||||
r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | ||||
mistake that causes issues in the database because of null bytes in JSON. | mistake that causes issues in the database because of null bytes in JSON. | ||||
>>> NpmMapping().normalize_description("foo bar") | >>> NpmMapping().normalize_description("foo bar") | ||||
▲ Show 20 Lines • Show All 91 Lines • Show Last 20 Lines |