Changeset View
Changeset View
Standalone View
Standalone View
swh/indexer/tests/test_metadata.py
# Copyright (C) 2017-2018 The Software Heritage developers | # Copyright (C) 2017-2018 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import json | import json | ||||
import unittest | import unittest | ||||
from hypothesis import given, strategies, settings, HealthCheck | from hypothesis import given, strategies, settings, HealthCheck | ||||
import xmltodict | import xmltodict | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.indexer.codemeta import CODEMETA_TERMS | from swh.indexer.codemeta import CODEMETA_TERMS, CROSSWALK_TABLE | ||||
from swh.indexer.metadata_dictionary import ( | from swh.indexer.metadata_dictionary import MAPPINGS | ||||
CROSSWALK_TABLE, MAPPINGS, merge_values) | from swh.indexer.metadata_dictionary.base import merge_values | ||||
from swh.indexer.metadata_detector import ( | from swh.indexer.metadata_detector import ( | ||||
detect_metadata, extract_minimal_metadata_dict | detect_metadata, extract_minimal_metadata_dict | ||||
) | ) | ||||
from swh.indexer.metadata import ( | from swh.indexer.metadata import ( | ||||
ContentMetadataIndexer, RevisionMetadataIndexer | ContentMetadataIndexer, RevisionMetadataIndexer | ||||
) | ) | ||||
from .utils import ( | from .utils import ( | ||||
▲ Show 20 Lines • Show All 603 Lines • ▼ Show 20 Lines | def test_compute_metadata_maven_almost_empty(self): | ||||
result = self.maven_mapping.translate(raw_content) | result = self.maven_mapping.translate(raw_content) | ||||
self.assertEqual(result, { | self.assertEqual(result, { | ||||
'@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | '@context': 'https://doi.org/10.5063/schema/codemeta-2.0', | ||||
'type': 'SoftwareSourceCode', | 'type': 'SoftwareSourceCode', | ||||
}) | }) | ||||
def test_compute_metadata_maven_invalid_xml(self): | def test_compute_metadata_maven_invalid_xml(self): | ||||
expected_warning = ( | expected_warning = ( | ||||
'WARNING:swh.indexer.metadata_dictionary.MavenMapping:' | 'WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:' | ||||
'Error parsing XML from foo') | 'Error parsing XML from foo') | ||||
raw_content = b""" | raw_content = b""" | ||||
<project>""" | <project>""" | ||||
with self.assertLogs('swh.indexer.metadata_dictionary', | with self.assertLogs('swh.indexer.metadata_dictionary', | ||||
level='WARNING') as cm: | level='WARNING') as cm: | ||||
result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | self.assertEqual(cm.output, [expected_warning]) | ||||
self.assertEqual(result, None) | self.assertEqual(result, None) | ||||
raw_content = b""" | raw_content = b""" | ||||
""" | """ | ||||
with self.assertLogs('swh.indexer.metadata_dictionary', | with self.assertLogs('swh.indexer.metadata_dictionary', | ||||
level='WARNING') as cm: | level='WARNING') as cm: | ||||
result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | self.assertEqual(cm.output, [expected_warning]) | ||||
self.assertEqual(result, None) | self.assertEqual(result, None) | ||||
def test_compute_metadata_maven_unknown_encoding(self): | def test_compute_metadata_maven_unknown_encoding(self): | ||||
expected_warning = ( | expected_warning = ( | ||||
'WARNING:swh.indexer.metadata_dictionary.MavenMapping:' | 'WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:' | ||||
'Error detecting XML encoding from foo') | 'Error detecting XML encoding from foo') | ||||
raw_content = b"""<?xml version="1.0" encoding="foo"?> | raw_content = b"""<?xml version="1.0" encoding="foo"?> | ||||
<project> | <project> | ||||
</project>""" | </project>""" | ||||
with self.assertLogs('swh.indexer.metadata_dictionary', | with self.assertLogs('swh.indexer.metadata_dictionary', | ||||
level='WARNING') as cm: | level='WARNING') as cm: | ||||
result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | self.assertEqual(cm.output, [expected_warning]) | ||||
self.assertEqual(result, None) | self.assertEqual(result, None) | ||||
raw_content = b"""<?xml version="1.0" encoding="UTF-7"?> | raw_content = b"""<?xml version="1.0" encoding="UTF-7"?> | ||||
<project> | <project> | ||||
</project>""" | </project>""" | ||||
with self.assertLogs('swh.indexer.metadata_dictionary', | with self.assertLogs('swh.indexer.metadata_dictionary', | ||||
level='WARNING') as cm: | level='WARNING') as cm: | ||||
result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | ||||
self.assertEqual(cm.output, [expected_warning]) | self.assertEqual(cm.output, [expected_warning]) | ||||
self.assertEqual(result, None) | self.assertEqual(result, None) | ||||
def test_compute_metadata_maven_invalid_encoding(self): | def test_compute_metadata_maven_invalid_encoding(self): | ||||
expected_warning = ( | expected_warning = ( | ||||
'WARNING:swh.indexer.metadata_dictionary.MavenMapping:' | 'WARNING:swh.indexer.metadata_dictionary.maven.MavenMapping:' | ||||
'Error unidecoding XML from foo') | 'Error unidecoding XML from foo') | ||||
raw_content = b"""<?xml version="1.0" encoding="UTF-8"?> | raw_content = b"""<?xml version="1.0" encoding="UTF-8"?> | ||||
<foo\xe5ct> | <foo\xe5ct> | ||||
</foo>""" | </foo>""" | ||||
with self.assertLogs('swh.indexer.metadata_dictionary', | with self.assertLogs('swh.indexer.metadata_dictionary', | ||||
level='WARNING') as cm: | level='WARNING') as cm: | ||||
result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | result = MAPPINGS["MavenMapping"]('foo').translate(raw_content) | ||||
▲ Show 20 Lines • Show All 521 Lines • Show Last 20 Lines |