Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
Show First 20 Lines • Show All 234 Lines • ▼ Show 20 Lines | def normalize_repository(self, d): | ||||
{'@id': 'git+https://example.org/foo.git'} | {'@id': 'git+https://example.org/foo.git'} | ||||
>>> NpmMapping().normalize_repository( | >>> NpmMapping().normalize_repository( | ||||
... 'gitlab:foo/bar') | ... 'gitlab:foo/bar') | ||||
{'@id': 'git+https://gitlab.com/foo/bar.git'} | {'@id': 'git+https://gitlab.com/foo/bar.git'} | ||||
>>> NpmMapping().normalize_repository( | >>> NpmMapping().normalize_repository( | ||||
... 'foo/bar') | ... 'foo/bar') | ||||
{'@id': 'git+https://github.com/foo/bar.git'} | {'@id': 'git+https://github.com/foo/bar.git'} | ||||
""" | """ | ||||
if isinstance(d, dict) and {'type', 'url'} <= set(d): | if isinstance(d, dict) and isinstance(d.get('type'), str) \ | ||||
and isinstance(d.get('url'), str): | |||||
url = '{type}+{url}'.format(**d) | url = '{type}+{url}'.format(**d) | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
if '://' in d: | if '://' in d: | ||||
url = d | url = d | ||||
elif ':' in d: | elif ':' in d: | ||||
(schema, rest) = d.split(':', 1) | (schema, rest) = d.split(':', 1) | ||||
if schema in self._schema_shortcuts: | if schema in self._schema_shortcuts: | ||||
url = self._schema_shortcuts[schema] % rest | url = self._schema_shortcuts[schema] % rest | ||||
Show All 14 Lines | def normalize_bugs(self, d): | ||||
... 'url': 'https://example.org/bugs/', | ... 'url': 'https://example.org/bugs/', | ||||
... 'email': 'bugs@example.org' | ... 'email': 'bugs@example.org' | ||||
... }) | ... }) | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
>>> NpmMapping().normalize_bugs( | >>> NpmMapping().normalize_bugs( | ||||
... 'https://example.org/bugs/') | ... 'https://example.org/bugs/') | ||||
{'@id': 'https://example.org/bugs/'} | {'@id': 'https://example.org/bugs/'} | ||||
""" | """ | ||||
if isinstance(d, dict) and 'url' in d: | if isinstance(d, dict) and isinstance(d.get('url'), str): | ||||
return {'@id': '{url}'.format(**d)} | return {'@id': d['url']} | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
return {'@id': d} | return {'@id': d} | ||||
else: | else: | ||||
return None | return None | ||||
_parse_author = re.compile(r'^ *' | _parse_author = re.compile(r'^ *' | ||||
r'(?P<name>.*?)' | r'(?P<name>.*?)' | ||||
r'( +<(?P<email>.*)>)?' | r'( +<(?P<email>.*)>)?' | ||||
Show All 28 Lines | def normalize_author(self, d): | ||||
url = d.get('url', None) | url = d.get('url', None) | ||||
elif isinstance(d, str): | elif isinstance(d, str): | ||||
match = self._parse_author.match(d) | match = self._parse_author.match(d) | ||||
name = match.group('name') | name = match.group('name') | ||||
email = match.group('email') | email = match.group('email') | ||||
url = match.group('url') | url = match.group('url') | ||||
else: | else: | ||||
return None | return None | ||||
if name: | if name and isinstance(name, str): | ||||
author[SCHEMA_URI+'name'] = name | author[SCHEMA_URI+'name'] = name | ||||
if email: | if email and isinstance(email, str): | ||||
author[SCHEMA_URI+'email'] = email | author[SCHEMA_URI+'email'] = email | ||||
if url: | if url and isinstance(url, str): | ||||
author[SCHEMA_URI+'url'] = {'@id': url} | author[SCHEMA_URI+'url'] = {'@id': url} | ||||
return {"@list": [author]} | return {"@list": [author]} | ||||
def normalize_license(self, s): | def normalize_license(self, s): | ||||
"""https://docs.npmjs.com/files/package.json#license | """https://docs.npmjs.com/files/package.json#license | ||||
>>> NpmMapping().normalize_license('MIT') | >>> NpmMapping().normalize_license('MIT') | ||||
{'@id': 'https://spdx.org/licenses/MIT'} | {'@id': 'https://spdx.org/licenses/MIT'} | ||||
▲ Show 20 Lines • Show All 95 Lines • ▼ Show 20 Lines | def parse_repository(self, d, repo): | ||||
return {"@id": repo} | return {"@id": repo} | ||||
def normalize_groupId(self, id_): | def normalize_groupId(self, id_): | ||||
"""https://maven.apache.org/pom.html#Maven_Coordinates | """https://maven.apache.org/pom.html#Maven_Coordinates | ||||
>>> MavenMapping().normalize_groupId('org.example') | >>> MavenMapping().normalize_groupId('org.example') | ||||
{'@id': 'org.example'} | {'@id': 'org.example'} | ||||
""" | """ | ||||
if isinstance(id_, str): | |||||
return {"@id": id_} | return {"@id": id_} | ||||
def parse_licenses(self, d): | def parse_licenses(self, d): | ||||
"""https://maven.apache.org/pom.html#Licenses | """https://maven.apache.org/pom.html#Licenses | ||||
>>> import xmltodict | >>> import xmltodict | ||||
>>> import json | >>> import json | ||||
>>> d = xmltodict.parse(''' | >>> d = xmltodict.parse(''' | ||||
... <licenses> | ... <licenses> | ||||
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines | def parse_licenses(self, d): | ||||
return | return | ||||
licenses = licenses.get('license') | licenses = licenses.get('license') | ||||
if isinstance(licenses, dict): | if isinstance(licenses, dict): | ||||
licenses = [licenses] | licenses = [licenses] | ||||
elif not isinstance(licenses, list): | elif not isinstance(licenses, list): | ||||
return | return | ||||
return [{"@id": license['url']} | return [{"@id": license['url']} | ||||
for license in licenses | for license in licenses | ||||
if isinstance(license, dict) and 'url' in license] or None | if isinstance(license, dict) | ||||
and isinstance(license.get('url'), str)] or None | |||||
_normalize_pkginfo_key = str.lower | _normalize_pkginfo_key = str.lower | ||||
class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy): | class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy): | ||||
def header_fetch_parse(self, name, value): | def header_fetch_parse(self, name, value): | ||||
if hasattr(value, 'name'): | if hasattr(value, 'name'): | ||||
▲ Show 20 Lines • Show All 164 Lines • Show Last 20 Lines |