Page MenuHomeSoftware Heritage

D971.id3201.diff
No OneTemporary

D971.id3201.diff

diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -10,6 +10,7 @@
import logging
import email.parser
import xml.parsers.expat
+import email.policy
import xmltodict
@@ -425,6 +426,14 @@
_normalize_pkginfo_key = str.lower
+class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy):
+ def header_fetch_parse(self, name, value):
+ if hasattr(value, 'name'):
+ return value
+ value = value.replace('\n ', '\n')
+ return self.header_factory(name, value)
+
+
@register_mapping
class PythonPkginfoMapping(DictMapping, SingleFileMapping):
"""Dedicated class for Python's PKG-INFO mapping and translation.
@@ -434,7 +443,8 @@
mapping = {_normalize_pkginfo_key(k): v
for (k, v) in CROSSWALK_TABLE['Python PKG-INFO'].items()}
- _parser = email.parser.BytesHeaderParser()
+ _parser = email.parser.BytesHeaderParser(
+ policy=LinebreakPreservingEmailPolicy())
def translate(self, content):
msg = self._parser.parsebytes(content)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -690,14 +690,14 @@
self.assertCountEqual(result['description'], [
'Software Heritage core utilities', # note the comma here
'swh-core\n'
- ' ========\n'
- ' \n'
- " core library for swh's modules:\n"
- ' - config parser\n'
- ' - hash computations\n'
- ' - serialization\n'
- ' - logging mechanism\n'
- ' '],
+ '========\n'
+ '\n'
+ "core library for swh's modules:\n"
+ '- config parser\n'
+ '- hash computations\n'
+ '- serialization\n'
+ '- logging mechanism\n'
+ ''],
result)
del result['description']
self.assertEqual(result, {
@@ -713,6 +713,22 @@
'version': '0.0.49',
})
+ def test_compute_metadata_pkginfo_utf8(self):
+ raw_content = (b'''\
+Metadata-Version: 1.1
+Name: snowpyt
+Description-Content-Type: UNKNOWN
+Description: foo
+ Hydrology N\xc2\xb083
+''') # noqa
+ result = MAPPINGS["PythonPkginfoMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'snowpyt',
+ 'description': 'foo\nHydrology N°83',
+ })
+
def test_compute_metadata_pkginfo_license(self):
raw_content = (b"""\
Metadata-Version: 2.1

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 10:18 AM (19 h, 8 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225474

Event Timeline