Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/npm.py
Show All 25 Lines | _schema_shortcuts = { | ||||
"gist": "git+https://gist.github.com/%s.git", | "gist": "git+https://gist.github.com/%s.git", | ||||
"gitlab": "git+https://gitlab.com/%s.git", | "gitlab": "git+https://gitlab.com/%s.git", | ||||
# Bitbucket supports both hg and git, and the shortcut does not | # Bitbucket supports both hg and git, and the shortcut does not | ||||
# tell which one to use. | # tell which one to use. | ||||
# 'bitbucket': 'https://bitbucket.org/', | # 'bitbucket': 'https://bitbucket.org/', | ||||
} | } | ||||
def normalize_repository( | def normalize_repository( | ||||
self, d: Union[Dict, str, Any] | self, d: Union[Dict[str, Any], str] | ||||
) -> Optional[Dict[str, str]]: | ) -> Optional[Dict[str, str]]: | ||||
"""https://docs.npmjs.com/files/package.json#repository | """https://docs.npmjs.com/files/package.json#repository | ||||
>>> NpmMapping().normalize_repository({ | >>> NpmMapping().normalize_repository({ | ||||
... 'type': 'git', | ... 'type': 'git', | ||||
... 'url': 'https://example.org/foo.git' | ... 'url': 'https://example.org/foo.git' | ||||
... }) | ... }) | ||||
{'@id': 'git+https://example.org/foo.git'} | {'@id': 'git+https://example.org/foo.git'} | ||||
Show All 22 Lines | ) -> Optional[Dict[str, str]]: | ||||
else: | else: | ||||
url = self._schema_shortcuts["github"] % d | url = self._schema_shortcuts["github"] % d | ||||
else: | else: | ||||
return None | return None | ||||
return {"@id": url} | return {"@id": url} | ||||
def normalize_bugs(self, d: Union[Dict, str, Any]) -> Optional[Dict[str, str]]: | def normalize_bugs(self, d: Union[Dict, str]) -> Optional[Dict[str, str]]: | ||||
"""https://docs.npmjs.com/files/package.json#bugs | """https://docs.npmjs.com/files/package.json#bugs | ||||
>>> NpmMapping().normalize_bugs({ | >>> NpmMapping().normalize_bugs({ | ||||
... 'url': 'https://example.org/bugs/', | ... 'url': 'https://example.org/bugs/', | ||||
... 'email': 'bugs@example.org' | ... 'email': 'bugs@example.org' | ||||
... }) | ... }) | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
>>> NpmMapping().normalize_bugs( | >>> NpmMapping().normalize_bugs( | ||||
... 'https://example.org/bugs/') | ... 'https://example.org/bugs/') | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
""" | """ | ||||
if isinstance(d, dict) and isinstance(d.get("url"), str): | if isinstance(d, dict) and isinstance(d.get("url"), str): | ||||
return {"@id": d["url"]} | return {"@id": d["url"]} | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
return {"@id": d} | return {"@id": d} | ||||
else: | else: | ||||
return None | return None | ||||
_parse_author = re.compile( | _parse_author = re.compile( | ||||
r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | r"^ *" r"(?P<name>.*?)" r"( +<(?P<email>.*)>)?" r"( +\((?P<url>.*)\))?" r" *$" | ||||
) | ) | ||||
def normalize_author( | def normalize_author( | ||||
self, d: Union[Dict, str, Any] | self, d: Union[Dict, str] | ||||
) -> Optional[Dict[str, List[Dict[str, Any]]]]: | ) -> Optional[Dict[str, List[Dict[str, Union[str, Dict[str, str]]]]]]: | ||||
"""https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | """https://docs.npmjs.com/files/package.json#people-fields-author-contributors' | ||||
>>> from pprint import pprint | >>> from pprint import pprint | ||||
>>> pprint(NpmMapping().normalize_author({ | >>> pprint(NpmMapping().normalize_author({ | ||||
... 'name': 'John Doe', | ... 'name': 'John Doe', | ||||
... 'email': 'john.doe@example.org', | ... 'email': 'john.doe@example.org', | ||||
... 'url': 'https://example.org/~john.doe', | ... 'url': 'https://example.org/~john.doe', | ||||
... })) | ... })) | ||||
{'@list': [{'@type': 'http://schema.org/Person', | {'@list': [{'@type': 'http://schema.org/Person', | ||||
'http://schema.org/email': 'john.doe@example.org', | 'http://schema.org/email': 'john.doe@example.org', | ||||
'http://schema.org/name': 'John Doe', | 'http://schema.org/name': 'John Doe', | ||||
'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]} | 'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]} | ||||
>>> pprint(NpmMapping().normalize_author( | >>> pprint(NpmMapping().normalize_author( | ||||
... 'John Doe <john.doe@example.org> (https://example.org/~john.doe)' | ... 'John Doe <john.doe@example.org> (https://example.org/~john.doe)' | ||||
... )) | ... )) | ||||
{'@list': [{'@type': 'http://schema.org/Person', | {'@list': [{'@type': 'http://schema.org/Person', | ||||
'http://schema.org/email': 'john.doe@example.org', | 'http://schema.org/email': 'john.doe@example.org', | ||||
'http://schema.org/name': 'John Doe', | 'http://schema.org/name': 'John Doe', | ||||
'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]} | 'http://schema.org/url': {'@id': 'https://example.org/~john.doe'}}]} | ||||
""" # noqa | """ # noqa | ||||
author = {"@type": SCHEMA_URI + "Person"} | author: Dict[str, Union[str, Dict[str, str]]] = {"@type": SCHEMA_URI + "Person"} | ||||
if isinstance(d, dict): | if isinstance(d, dict): | ||||
name = d.get("name", None) | name = d.get("name", None) | ||||
email = d.get("email", None) | email = d.get("email", None) | ||||
url = d.get("url", None) | url = d.get("url", None) | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
match = self._parse_author.match(d) | match = self._parse_author.match(d) | ||||
if not match: | if not match: | ||||
return None | return None | ||||
name = match.group("name") | name = match.group("name") | ||||
email = match.group("email") | email = match.group("email") | ||||
url = match.group("url") | url = match.group("url") | ||||
else: | else: | ||||
return None | return None | ||||
if name and isinstance(name, str): | if name and isinstance(name, str): | ||||
author[SCHEMA_URI + "name"] = name | author[SCHEMA_URI + "name"] = name | ||||
if email and isinstance(email, str): | if email and isinstance(email, str): | ||||
author[SCHEMA_URI + "email"] = email | author[SCHEMA_URI + "email"] = email | ||||
if url and isinstance(url, str): | if url and isinstance(url, str): | ||||
author[SCHEMA_URI + "url"] = {"@id": url} # type: ignore | author[SCHEMA_URI + "url"] = {"@id": url} | ||||
return {"@list": [author]} | return {"@list": [author]} | ||||
def normalize_license(self, s: str) -> Any: | def normalize_license(self, s: str) -> Dict[str, str]: | ||||
rohan-sachan: If it's Ok, I'll use the following form of notaion?
```
Union[Dict[str, str], Any]
```
Just… | |||||
Not Done Inline ActionsUnion[Dict[str, str], Any] means the same thing as Any. But you don't need to mak an union of Any here, there are only two possible types returned vlorentz: `Union[Dict[str, str], Any]` means the same thing as `Any`.
But you don't need to mak an union… | |||||
"""https://docs.npmjs.com/files/package.json#license | """https://docs.npmjs.com/files/package.json#license | ||||
>>> NpmMapping().normalize_license('MIT') | >>> NpmMapping().normalize_license('MIT') | ||||
{'@id': 'https://spdx.org/licenses/MIT'} | {'@id': 'https://spdx.org/licenses/MIT'} | ||||
""" | """ | ||||
if isinstance(s, str): | if isinstance(s, str): | ||||
return {"@id": "https://spdx.org/licenses/" + s} | return {"@id": "https://spdx.org/licenses/" + s} | ||||
def normalize_homepage(self, s: str) -> Any: | def normalize_homepage(self, s: str) -> Dict[str, str]: | ||||
"""https://docs.npmjs.com/files/package.json#homepage | """https://docs.npmjs.com/files/package.json#homepage | ||||
>>> NpmMapping().normalize_homepage('https://example.org/~john.doe') | >>> NpmMapping().normalize_homepage('https://example.org/~john.doe') | ||||
{'@id': 'https://example.org/~john.doe'} | {'@id': 'https://example.org/~john.doe'} | ||||
""" | """ | ||||
if isinstance(s, str): | if isinstance(s, str): | ||||
return {"@id": s} | return {"@id": s} | ||||
def normalize_keywords(self, lst: List[str]) -> Any: | def normalize_keywords(self, lst: List[str]) -> Optional[List[str]]: | ||||
"""https://docs.npmjs.com/files/package.json#homepage | """https://docs.npmjs.com/files/package.json#homepage | ||||
>>> NpmMapping().normalize_keywords(['foo', 'bar']) | >>> NpmMapping().normalize_keywords(['foo', 'bar']) | ||||
['foo', 'bar'] | ['foo', 'bar'] | ||||
""" | """ | ||||
if isinstance(lst, list): | if isinstance(lst, list): | ||||
return [x for x in lst if isinstance(x, str)] | return [x for x in lst if isinstance(x, str)] | ||||
else: | |||||
return None |
If it's Ok, I'll use the following form of notaion?
Just using Dict[str, str] gives error for having no return statement. Whereas putting up an extra else for returning None would be just unnecessary lines of code.