Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/metadata_dictionary.py
# Copyright (C) 2017 The Software Heritage developers | # Copyright (C) 2017 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import os | import os | ||||
import re | import re | ||||
import abc | import abc | ||||
import json | import json | ||||
import logging | import logging | ||||
import email.parser | import email.parser | ||||
import xml.parsers.expat | |||||
import xmltodict | import xmltodict | ||||
from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | from swh.indexer.codemeta import CROSSWALK_TABLE, SCHEMA_URI | ||||
from swh.indexer.codemeta import compact, expand | from swh.indexer.codemeta import compact, expand | ||||
MAPPINGS = {} | MAPPINGS = {} | ||||
▲ Show 20 Lines • Show All 233 Lines • ▼ Show 20 Lines | |||||
class MavenMapping(DictMapping, SingleFileMapping): | class MavenMapping(DictMapping, SingleFileMapping): | ||||
""" | """ | ||||
dedicated class for Maven (pom.xml) mapping and translation | dedicated class for Maven (pom.xml) mapping and translation | ||||
""" | """ | ||||
filename = b'pom.xml' | filename = b'pom.xml' | ||||
mapping = CROSSWALK_TABLE['Java (Maven)'] | mapping = CROSSWALK_TABLE['Java (Maven)'] | ||||
def translate(self, content): | def translate(self, content): | ||||
d = xmltodict.parse(content).get('project') | try: | ||||
d = xmltodict.parse(content).get('project') or {} | |||||
except xml.parsers.expat.ExpatError: | |||||
self.log.warning('Error parsing XML of %r', content) | |||||
return None | |||||
metadata = self.translate_dict(d, normalize=False) | metadata = self.translate_dict(d, normalize=False) | ||||
metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d) | metadata[SCHEMA_URI+'codeRepository'] = self.parse_repositories(d) | ||||
metadata[SCHEMA_URI+'license'] = self.parse_licenses(d) | metadata[SCHEMA_URI+'license'] = self.parse_licenses(d) | ||||
return self.normalize_translation(metadata) | return self.normalize_translation(metadata) | ||||
_default_repository = {'url': 'https://repo.maven.apache.org/maven2/'} | _default_repository = {'url': 'https://repo.maven.apache.org/maven2/'} | ||||
def parse_repositories(self, d): | def parse_repositories(self, d): | ||||
▲ Show 20 Lines • Show All 147 Lines • Show Last 20 Lines |