Differential D5295 Diff 18958 swh/indexer/metadata_dictionary/npm.py

Changeset View

Standalone View

swh/indexer/metadata_dictionary/npm.py

# Copyright (C) 2018-2019 The Software Heritage developers		# Copyright (C) 2018-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution		# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version		# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information		# See top-level LICENSE file for more information

import re		import re
		from typing import Any, Dict, List, Optional, Union

from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI		from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI

from .base import JsonMapping		from .base import JsonMapping


class NpmMapping(JsonMapping):		class NpmMapping(JsonMapping):
"""		"""
Show All 9 Lines	_schema_shortcuts = {
"github": "git+https://github.com/%s.git",		"github": "git+https://github.com/%s.git",
"gist": "git+https://gist.github.com/%s.git",		"gist": "git+https://gist.github.com/%s.git",
"gitlab": "git+https://gitlab.com/%s.git",		"gitlab": "git+https://gitlab.com/%s.git",
# Bitbucket supports both hg and git, and the shortcut does not		# Bitbucket supports both hg and git, and the shortcut does not
# tell which one to use.		# tell which one to use.
# 'bitbucket': 'https://bitbucket.org/',		# 'bitbucket': 'https://bitbucket.org/',
}		}

def normalize_repository(self, d):		def normalize_repository(
		self, d: Union[Dict, str, Any]
		) -> Optional[Dict[str, str]]:
"""https://docs.npmjs.com/files/package.json#repository		"""https://docs.npmjs.com/files/package.json#repository

>>> NpmMapping().normalize_repository({		>>> NpmMapping().normalize_repository({
... 'type': 'git',		... 'type': 'git',
... 'url': 'https://example.org/foo.git'		... 'url': 'https://example.org/foo.git'
... })		... })
{'@id': 'git+https://example.org/foo.git'}		{'@id': 'git+https://example.org/foo.git'}
>>> NpmMapping().normalize_repository(		>>> NpmMapping().normalize_repository(
Show All 21 Lines	) -> Optional[Dict[str, str]]:
else:		else:
url = self._schema_shortcuts["github"] % d		url = self._schema_shortcuts["github"] % d

else:		else:
return None		return None

return {"@id": url}		return {"@id": url}

def normalize_bugs(self, d):		def normalize_bugs(self, d: Union[Dict, str, Any]) -> Optional[Dict[str, str]]:
"""https://docs.npmjs.com/files/package.json#bugs		"""https://docs.npmjs.com/files/package.json#bugs

>>> NpmMapping().normalize_bugs({		>>> NpmMapping().normalize_bugs({
... 'url': 'https://example.org/bugs/',		... 'url': 'https://example.org/bugs/',
... 'email': 'bugs@example.org'		... 'email': 'bugs@example.org'
... })		... })
{'@id': 'https://example.org/bugs/'}		{'@id': 'https://example.org/bugs/'}
>>> NpmMapping().normalize_bugs(		>>> NpmMapping().normalize_bugs(
... 'https://example.org/bugs/')		... 'https://example.org/bugs/')
{'@id': 'https://example.org/bugs/'}		{'@id': 'https://example.org/bugs/'}
"""		"""
if isinstance(d, dict) and isinstance(d.get("url"), str):		if isinstance(d, dict) and isinstance(d.get("url"), str):
return {"@id": d["url"]}		return {"@id": d["url"]}
elif isinstance(d, str):		elif isinstance(d, str):
return {"@id": d}		return {"@id": d}
else:		else:
return None		return None

_parse_author = re.compile(		_parse_author = re.compile(
r"^ " r"(?P<name>.?)" r"( +<(?P<email>.)>)?" r"( +$(?P<url>.)$)?" r" *$"		r"^ " r"(?P<name>.?)" r"( +<(?P<email>.)>)?" r"( +$(?P<url>.)$)?" r" *$"
)		)

def normalize_author(self, d):		def normalize_author(
		self, d: Union[Dict, str, Any]
		) -> Optional[Dict[str, List[Dict[str, Any]]]]:
"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors'		"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors'

>>> from pprint import pprint		>>> from pprint import pprint
>>> pprint(NpmMapping().normalize_author({		>>> pprint(NpmMapping().normalize_author({
... 'name': 'John Doe',		... 'name': 'John Doe',
... 'email': 'john.doe@example.org',		... 'email': 'john.doe@example.org',
... 'url': 'https://example.org/~john.doe',		... 'url': 'https://example.org/~john.doe',
... }))		... }))
Show All 23 Lines	) -> Optional[Dict[str, List[Dict[str, Any]]]]:
url = match.group("url")		url = match.group("url")
else:		else:
return None		return None
if name and isinstance(name, str):		if name and isinstance(name, str):
author[SCHEMA_URI + "name"] = name		author[SCHEMA_URI + "name"] = name
if email and isinstance(email, str):		if email and isinstance(email, str):
author[SCHEMA_URI + "email"] = email		author[SCHEMA_URI + "email"] = email
if url and isinstance(url, str):		if url and isinstance(url, str):
author[SCHEMA_URI + "url"] = {"@id": url}		author[SCHEMA_URI + "url"] = {"@id": url} # type: ignore
return {"@list": [author]}		return {"@list": [author]}

def normalize_license(self, s):		def normalize_license(self, s: str) -> Any:
		rohan-sachanUnsubmitted Not Done Inline Actions If it's Ok, I'll use the following form of notaion? Union[Dict[str, str], Any] Just using Dict[str, str] gives error for having no return statement. Whereas putting up an extra else for returning None would be just unnecessary lines of code. rohan-sachan: If it's Ok, I'll use the following form of notaion? ``` Union[Dict[str, str], Any] ``` Just…
		vlorentzAuthorUnsubmitted Not Done Inline Actions `Union[Dict[str, str], Any]` means the same thing as `Any`. But you don't need to mak an union of `Any` here, there are only two possible types returned vlorentz: `Union[Dict[str, str], Any]` means the same thing as `Any`. But you don't need to mak an union…
"""https://docs.npmjs.com/files/package.json#license		"""https://docs.npmjs.com/files/package.json#license

>>> NpmMapping().normalize_license('MIT')		>>> NpmMapping().normalize_license('MIT')
{'@id': 'https://spdx.org/licenses/MIT'}		{'@id': 'https://spdx.org/licenses/MIT'}
"""		"""
if isinstance(s, str):		if isinstance(s, str):
return {"@id": "https://spdx.org/licenses/" + s}		return {"@id": "https://spdx.org/licenses/" + s}

def normalize_homepage(self, s):		def normalize_homepage(self, s: str) -> Any:
"""https://docs.npmjs.com/files/package.json#homepage		"""https://docs.npmjs.com/files/package.json#homepage

>>> NpmMapping().normalize_homepage('https://example.org/~john.doe')		>>> NpmMapping().normalize_homepage('https://example.org/~john.doe')
{'@id': 'https://example.org/~john.doe'}		{'@id': 'https://example.org/~john.doe'}
"""		"""
if isinstance(s, str):		if isinstance(s, str):
return {"@id": s}		return {"@id": s}

def normalize_keywords(self, lst):		def normalize_keywords(self, lst: List[str]) -> Any:
"""https://docs.npmjs.com/files/package.json#homepage		"""https://docs.npmjs.com/files/package.json#homepage

>>> NpmMapping().normalize_keywords(['foo', 'bar'])		>>> NpmMapping().normalize_keywords(['foo', 'bar'])
['foo', 'bar']		['foo', 'bar']
"""		"""
if isinstance(lst, list):		if isinstance(lst, list):
return [x for x in lst if isinstance(x, str)]		return [x for x in lst if isinstance(x, str)]