Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/npm.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import re | import re | ||||
from typing import Any, Dict, List, Optional, Union | |||||
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | ||||
from .base import JsonMapping | from .base import JsonMapping | ||||
class NpmMapping(JsonMapping): | class NpmMapping(JsonMapping): | ||||
""" | """ | ||||
Show All 9 Lines | _schema_shortcuts = { | ||||
"github": "git+https://github.com/%s.git", | "github": "git+https://github.com/%s.git", | ||||
"gist": "git+https://gist.github.com/%s.git", | "gist": "git+https://gist.github.com/%s.git", | ||||
"gitlab": "git+https://gitlab.com/%s.git", | "gitlab": "git+https://gitlab.com/%s.git", | ||||
# Bitbucket supports both hg and git, and the shortcut does not | # Bitbucket supports both hg and git, and the shortcut does not | ||||
# tell which one to use. | # tell which one to use. | ||||
# 'bitbucket': 'https://bitbucket.org/', | # 'bitbucket': 'https://bitbucket.org/', | ||||
} | } | ||||
def normalize_repository(self, d): | def normalize_repository( | ||||
self, d: Union[Dict, str, Any] | |||||
) -> Optional[Dict[str, str]]: | |||||
"""https://docs.npmjs.com/files/package.json#repository | """https://docs.npmjs.com/files/package.json#repository | ||||
>>> NpmMapping().normalize_repository({ | >>> NpmMapping().normalize_repository({ | ||||
... 'type': 'git', | ... 'type': 'git', | ||||
... 'url': 'https://example.org/foo.git' | ... 'url': 'https://example.org/foo.git' | ||||
... }) | ... }) | ||||
{'@id': 'git+https://example.org/foo.git'} | {'@id': 'git+https://example.org/foo.git'} | ||||
>>> NpmMapping().normalize_repository( | >>> NpmMapping().normalize_repository( | ||||
Show All 21 Lines | ) -> Optional[Dict[str, str]]: | ||||
else: | else: | ||||
url = self._schema_shortcuts["github"] % d | url = self._schema_shortcuts["github"] % d | ||||
else: | else: | ||||
return None | return None | ||||
return {"@id": url} | return {"@id": url} | ||||
def normalize_bugs(self, d): | def normalize_bugs(self, d: Union[Dict, str, Any]) -> Optional[Dict[str, str]]: | ||||
"""https://docs.npmjs.com/files/package.json#bugs | """https://docs.npmjs.com/files/package.json#bugs | ||||
>>> NpmMapping().normalize_bugs({ | >>> NpmMapping().normalize_bugs({ | ||||
... 'url': 'https://example.org/bugs/', | ... 'url': 'https://example.org/bugs/', | ||||
... 'email': 'bugs@example.org' | ... 'email': 'bugs@example.org' | ||||
... }) | ... }) | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
>>> NpmMapping().normalize_bugs( | >>> NpmMapping().normalize_bugs( | ||||
... 'https://example.org/bugs/') | ... 'https://example.org/bugs/') | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
""" | """ | ||||
if isinstance(d, dict) and isinstance(d.get("url"), str): | if isinstance(d, dict) and isinstance(d.get("url"), str): | ||||
return {"@id": d["url"]} | return {"@id": d["url"]} | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
return {"@id": d} | return {"@id": d} | ||||
else: | else: | ||||
return None | return None | ||||
_parse_author = re.compile( | _parse_author = re.compile( | ||||
r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | ||||
) | ) | ||||
def normalize_author(self, d): | def normalize_author( | ||||
self, d: Union[Dict, str, Any] | |||||
) -> Optional[Dict[str, List[Dict[str, Any]]]]: | |||||
"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | """https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | ||||
>>> from pprint import pprint | >>> from pprint import pprint | ||||
>>> pprint(NpmMapping().normalize_author({ | >>> pprint(NpmMapping().normalize_author({ | ||||
... 'name': 'John Doe', | ... 'name': 'John Doe', | ||||
... 'email': 'john.doe@example.org', | ... 'email': 'john.doe@example.org', | ||||
... 'url': 'https://example.org/~john.doe', | ... 'url': 'https://example.org/~john.doe', | ||||
... })) | ... })) | ||||
Show All 23 Lines | ) -> Optional[Dict[str, List[Dict[str, Any]]]]: | ||||
url = match.group("url") | url = match.group("url") | ||||
else: | else: | ||||
return None | return None | ||||
if name and isinstance(name, str): | if name and isinstance(name, str): | ||||
author[SCHEMA_URI + "name"] = name | author[SCHEMA_URI + "name"] = name | ||||
if email and isinstance(email, str): | if email and isinstance(email, str): | ||||
author[SCHEMA_URI + "email"] = email | author[SCHEMA_URI + "email"] = email | ||||
if url and isinstance(url, str): | if url and isinstance(url, str): | ||||
author[SCHEMA_URI + "url"] = {"@id": url} | author[SCHEMA_URI + "url"] = {"@id": url} # type: ignore | ||||
return {"@list": [author]} | return {"@list": [author]} | ||||
def normalize_license(self, s): | def normalize_license(self, s: str) -> Any: | ||||
rohan-sachan: If it's Ok, I'll use the following form of notaion?
```
Union[Dict[str, str], Any]
```
Just… | |||||
vlorentzAuthorUnsubmitted Not Done Inline ActionsUnion[Dict[str, str], Any] means the same thing as Any. But you don't need to mak an union of Any here, there are only two possible types returned vlorentz: `Union[Dict[str, str], Any]` means the same thing as `Any`.
But you don't need to mak an union… | |||||
"""https://docs.npmjs.com/files/package.json#license | """https://docs.npmjs.com/files/package.json#license | ||||
>>> NpmMapping().normalize_license('MIT') | >>> NpmMapping().normalize_license('MIT') | ||||
{'@id': 'https://spdx.org/licenses/MIT'} | {'@id': 'https://spdx.org/licenses/MIT'} | ||||
""" | """ | ||||
if isinstance(s, str): | if isinstance(s, str): | ||||
return {"@id": "https://spdx.org/licenses/" + s} | return {"@id": "https://spdx.org/licenses/" + s} | ||||
def normalize_homepage(self, s): | def normalize_homepage(self, s: str) -> Any: | ||||
"""https://docs.npmjs.com/files/package.json#homepage | """https://docs.npmjs.com/files/package.json#homepage | ||||
>>> NpmMapping().normalize_homepage('https://example.org/~john.doe') | >>> NpmMapping().normalize_homepage('https://example.org/~john.doe') | ||||
{'@id': 'https://example.org/~john.doe'} | {'@id': 'https://example.org/~john.doe'} | ||||
""" | """ | ||||
if isinstance(s, str): | if isinstance(s, str): | ||||
return {"@id": s} | return {"@id": s} | ||||
def normalize_keywords(self, lst): | def normalize_keywords(self, lst: List[str]) -> Any: | ||||
"""https://docs.npmjs.com/files/package.json#homepage | """https://docs.npmjs.com/files/package.json#homepage | ||||
>>> NpmMapping().normalize_keywords(['foo', 'bar']) | >>> NpmMapping().normalize_keywords(['foo', 'bar']) | ||||
['foo', 'bar'] | ['foo', 'bar'] | ||||
""" | """ | ||||
if isinstance(lst, list): | if isinstance(lst, list): | ||||
return [x for x in lst if isinstance(x, str)] | return [x for x in lst if isinstance(x, str)] |
If it's Ok, I'll use the following form of notaion?
Just using Dict[str, str] gives error for having no return statement. Whereas putting up an extra else for returning None would be just unnecessary lines of code.