Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/maven.py
# Copyright (C) 2018-2021 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
from typing import Any, Dict, Optional | from typing import Any, Dict, Optional | ||||
import xml.parsers.expat | import xml.parsers.expat | ||||
import xmltodict | import xmltodict | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | from swh.indexer.codemeta import CROSSWALK_TABLE | ||||
from swh.indexer.namespaces import SCHEMA | |||||
from .base import DictMapping, SingleFileIntrinsicMapping | from .base import DictMapping, SingleFileIntrinsicMapping | ||||
class MavenMapping(DictMapping, SingleFileIntrinsicMapping): | class MavenMapping(DictMapping, SingleFileIntrinsicMapping): | ||||
""" | """ | ||||
dedicated class for Maven (pom.xml) mapping and translation | dedicated class for Maven (pom.xml) mapping and translation | ||||
""" | """ | ||||
Show All 15 Lines | def translate(self, content: bytes) -> Optional[Dict[str, Any]]: | ||||
except (LookupError, ValueError): | except (LookupError, ValueError): | ||||
# unknown encoding or multi-byte encoding | # unknown encoding or multi-byte encoding | ||||
self.log.warning("Error detecting XML encoding from %s", self.log_suffix) | self.log.warning("Error detecting XML encoding from %s", self.log_suffix) | ||||
return None | return None | ||||
if not isinstance(d, dict): | if not isinstance(d, dict): | ||||
self.log.warning("Skipping ill-formed XML content: %s", content) | self.log.warning("Skipping ill-formed XML content: %s", content) | ||||
return None | return None | ||||
metadata = self._translate_dict(d, normalize=False) | metadata = self._translate_dict(d, normalize=False) | ||||
metadata[SCHEMA_URI + "codeRepository"] = self.parse_repositories(d) | metadata[SCHEMA.codeRepository] = self.parse_repositories(d) | ||||
metadata[SCHEMA_URI + "license"] = self.parse_licenses(d) | metadata[SCHEMA.license] = self.parse_licenses(d) | ||||
return self.normalize_translation(metadata) | return self.normalize_translation(metadata) | ||||
_default_repository = {"url": "https://repo.maven.apache.org/maven2/"} | _default_repository = {"url": "https://repo.maven.apache.org/maven2/"} | ||||
def parse_repositories(self, d): | def parse_repositories(self, d): | ||||
"""https://maven.apache.org/pom.html#Repositories | """https://maven.apache.org/pom.html#Repositories | ||||
>>> import xmltodict | >>> import xmltodict | ||||
▲ Show 20 Lines • Show All 109 Lines • Show Last 20 Lines |