Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/maven.py
# Copyright (C) 2018-2022 The Software Heritage developers | # Copyright (C) 2018-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
from typing import Any, Dict | from typing import Any, Dict | ||||
from rdflib import Graph, Literal, URIRef | from rdflib import Graph, Literal | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE | from swh.indexer.codemeta import CROSSWALK_TABLE | ||||
from swh.indexer.namespaces import SCHEMA | from swh.indexer.namespaces import SCHEMA | ||||
from .base import SingleFileIntrinsicMapping, XmlMapping | from .base import SingleFileIntrinsicMapping, XmlMapping | ||||
from .utils import prettyprint_graph # noqa | from .utils import add_url_if_valid, prettyprint_graph # noqa | ||||
class MavenMapping(XmlMapping, SingleFileIntrinsicMapping): | class MavenMapping(XmlMapping, SingleFileIntrinsicMapping): | ||||
""" | """ | ||||
dedicated class for Maven (pom.xml) mapping and translation | dedicated class for Maven (pom.xml) mapping and translation | ||||
""" | """ | ||||
name = "maven" | name = "maven" | ||||
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | def parse_repository(self, graph: Graph, root, d, repo): | ||||
isinstance(url, str) | isinstance(url, str) | ||||
and isinstance(group_id, str) | and isinstance(group_id, str) | ||||
and isinstance(artifact_id, str) | and isinstance(artifact_id, str) | ||||
): | ): | ||||
repo = os.path.join(url, *group_id.split("."), artifact_id) | repo = os.path.join(url, *group_id.split("."), artifact_id) | ||||
if "${" in repo: | if "${" in repo: | ||||
# Often use as templating in pom.xml files collected from VCSs | # Often use as templating in pom.xml files collected from VCSs | ||||
return | return | ||||
graph.add((root, SCHEMA.codeRepository, URIRef(repo))) | add_url_if_valid(graph, root, SCHEMA.codeRepository, repo) | ||||
def normalize_groupId(self, id_): | def normalize_groupId(self, id_): | ||||
"""https://maven.apache.org/pom.html#Maven_Coordinates | """https://maven.apache.org/pom.html#Maven_Coordinates | ||||
>>> MavenMapping().normalize_groupId('org.example') | >>> MavenMapping().normalize_groupId('org.example') | ||||
rdflib.term.Literal('org.example') | rdflib.term.Literal('org.example') | ||||
""" | """ | ||||
if isinstance(id_, str): | if isinstance(id_, str): | ||||
return Literal(id_) | return Literal(id_) | ||||
def translate_licenses(self, graph, root, licenses): | def translate_licenses(self, graph, root, licenses): | ||||
"""https://maven.apache.org/pom.html#Licenses | """https://maven.apache.org/pom.html#Licenses | ||||
>>> import xmltodict | >>> import xmltodict | ||||
>>> import json | >>> import json | ||||
>>> from rdflib import URIRef | |||||
>>> d = xmltodict.parse(''' | >>> d = xmltodict.parse(''' | ||||
... <licenses> | ... <licenses> | ||||
... <license> | ... <license> | ||||
... <name>Apache License, Version 2.0</name> | ... <name>Apache License, Version 2.0</name> | ||||
... <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url> | ... <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url> | ||||
... </license> | ... </license> | ||||
... </licenses> | ... </licenses> | ||||
... ''') | ... ''') | ||||
▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines | def translate_licenses(self, graph, root, licenses): | ||||
if not isinstance(licenses, dict): | if not isinstance(licenses, dict): | ||||
return | return | ||||
licenses = licenses.get("license") | licenses = licenses.get("license") | ||||
if isinstance(licenses, dict): | if isinstance(licenses, dict): | ||||
licenses = [licenses] | licenses = [licenses] | ||||
elif not isinstance(licenses, list): | elif not isinstance(licenses, list): | ||||
return | return | ||||
for license in licenses: | for license in licenses: | ||||
if isinstance(license, dict) and isinstance(license.get("url"), str): | if isinstance(license, dict): | ||||
graph.add((root, SCHEMA.license, URIRef(license["url"]))) | add_url_if_valid(graph, root, SCHEMA.license, license.get("url")) |