Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/npm.py
# Copyright (C) 2018-2022 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
import urllib.parse | import urllib.parse | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | from swh.indexer.codemeta import CROSSWALK_TABLE | ||||
from swh.indexer.namespaces import SCHEMA | |||||
from .base import JsonMapping, SingleFileIntrinsicMapping | from .base import JsonMapping, SingleFileIntrinsicMapping | ||||
class NpmMapping(JsonMapping, SingleFileIntrinsicMapping): | class NpmMapping(JsonMapping, SingleFileIntrinsicMapping): | ||||
""" | """ | ||||
dedicated class for NPM (package.json) mapping and translation | dedicated class for NPM (package.json) mapping and translation | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | def normalize_author(self, d): | ||||
... 'name': 'John Doe', | ... 'name': 'John Doe', | ||||
... 'email': 'john.doe@example.org', | ... 'email': 'john.doe@example.org', | ||||
... 'url': 'https:\\\\example.invalid/~john.doe', | ... 'url': 'https:\\\\example.invalid/~john.doe', | ||||
... })) | ... })) | ||||
{'@list': [{'@type': 'http://schema.org/Person', | {'@list': [{'@type': 'http://schema.org/Person', | ||||
'http://schema.org/email': 'john.doe@example.org', | 'http://schema.org/email': 'john.doe@example.org', | ||||
'http://schema.org/name': 'John Doe'}]} | 'http://schema.org/name': 'John Doe'}]} | ||||
""" # noqa | """ # noqa | ||||
author = {"@type": SCHEMA_URI + "Person"} | author = {"@type": SCHEMA.Person} | ||||
if isinstance(d, dict): | if isinstance(d, dict): | ||||
name = d.get("name", None) | name = d.get("name", None) | ||||
email = d.get("email", None) | email = d.get("email", None) | ||||
url = d.get("url", None) | url = d.get("url", None) | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
match = self._parse_author.match(d) | match = self._parse_author.match(d) | ||||
if not match: | if not match: | ||||
return None | return None | ||||
name = match.group("name") | name = match.group("name") | ||||
email = match.group("email") | email = match.group("email") | ||||
url = match.group("url") | url = match.group("url") | ||||
else: | else: | ||||
return None | return None | ||||
if name and isinstance(name, str): | if name and isinstance(name, str): | ||||
author[SCHEMA_URI + "name"] = name | author[SCHEMA.name] = name | ||||
if email and isinstance(email, str): | if email and isinstance(email, str): | ||||
author[SCHEMA_URI + "email"] = email | author[SCHEMA.email] = email | ||||
if url and isinstance(url, str): | if url and isinstance(url, str): | ||||
# Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop | # Workaround for https://github.com/digitalbazaar/pyld/issues/91 : drop | ||||
# URLs that are blatantly invalid early, so PyLD does not crash. | # URLs that are blatantly invalid early, so PyLD does not crash. | ||||
parsed_url = urllib.parse.urlparse(url) | parsed_url = urllib.parse.urlparse(url) | ||||
if parsed_url.netloc: | if parsed_url.netloc: | ||||
author[SCHEMA_URI + "url"] = {"@id": url} | author[SCHEMA.url] = {"@id": url} | ||||
return {"@list": [author]} | return {"@list": [author]} | ||||
def normalize_description(self, description): | def normalize_description(self, description): | ||||
r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | r"""Try to re-decode ``description`` as UTF-16, as this is a somewhat common | ||||
mistake that causes issues in the database because of null bytes in JSON. | mistake that causes issues in the database because of null bytes in JSON. | ||||
>>> NpmMapping().normalize_description("foo bar") | >>> NpmMapping().normalize_description("foo bar") | ||||
▲ Show 20 Lines • Show All 88 Lines • Show Last 20 Lines |