Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary/maven.py
# Copyright (C) 2018-2019 The Software Heritage developers | # Copyright (C) 2018-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import xml.parsers.expat | import xml.parsers.expat | ||||
import xmltodict | import xmltodict | ||||
Show All 21 Lines | def translate(self, content): | ||||
return None | return None | ||||
except UnicodeDecodeError: | except UnicodeDecodeError: | ||||
self.log.warning("Error unidecoding XML from %s", self.log_suffix) | self.log.warning("Error unidecoding XML from %s", self.log_suffix) | ||||
return None | return None | ||||
except (LookupError, ValueError): | except (LookupError, ValueError): | ||||
# unknown encoding or multi-byte encoding | # unknown encoding or multi-byte encoding | ||||
self.log.warning("Error detecting XML encoding from %s", self.log_suffix) | self.log.warning("Error detecting XML encoding from %s", self.log_suffix) | ||||
return None | return None | ||||
if not isinstance(d, dict): | |||||
self.log.warning("Skipping ill-formed XML content: %s", content) | |||||
return None | |||||
metadata = self._translate_dict(d, normalize=False) | metadata = self._translate_dict(d, normalize=False) | ||||
metadata[SCHEMA_URI + "codeRepository"] = self.parse_repositories(d) | metadata[SCHEMA_URI + "codeRepository"] = self.parse_repositories(d) | ||||
metadata[SCHEMA_URI + "license"] = self.parse_licenses(d) | metadata[SCHEMA_URI + "license"] = self.parse_licenses(d) | ||||
return self.normalize_translation(metadata) | return self.normalize_translation(metadata) | ||||
_default_repository = {"url": "https://repo.maven.apache.org/maven2/"} | _default_repository = {"url": "https://repo.maven.apache.org/maven2/"} | ||||
def parse_repositories(self, d): | def parse_repositories(self, d): | ||||
▲ Show 20 Lines • Show All 112 Lines • Show Last 20 Lines |