Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163563
D971.id3201.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Subscribers
None
D971.id3201.diff
View Options
diff --git a/swh/indexer/metadata_dictionary.py b/swh/indexer/metadata_dictionary.py
--- a/swh/indexer/metadata_dictionary.py
+++ b/swh/indexer/metadata_dictionary.py
@@ -10,6 +10,7 @@
import logging
import email.parser
import xml.parsers.expat
+import email.policy
import xmltodict
@@ -425,6 +426,14 @@
_normalize_pkginfo_key = str.lower
+class LinebreakPreservingEmailPolicy(email.policy.EmailPolicy):
+ def header_fetch_parse(self, name, value):
+ if hasattr(value, 'name'):
+ return value
+ value = value.replace('\n ', '\n')
+ return self.header_factory(name, value)
+
+
@register_mapping
class PythonPkginfoMapping(DictMapping, SingleFileMapping):
"""Dedicated class for Python's PKG-INFO mapping and translation.
@@ -434,7 +443,8 @@
mapping = {_normalize_pkginfo_key(k): v
for (k, v) in CROSSWALK_TABLE['Python PKG-INFO'].items()}
- _parser = email.parser.BytesHeaderParser()
+ _parser = email.parser.BytesHeaderParser(
+ policy=LinebreakPreservingEmailPolicy())
def translate(self, content):
msg = self._parser.parsebytes(content)
diff --git a/swh/indexer/tests/test_metadata.py b/swh/indexer/tests/test_metadata.py
--- a/swh/indexer/tests/test_metadata.py
+++ b/swh/indexer/tests/test_metadata.py
@@ -690,14 +690,14 @@
self.assertCountEqual(result['description'], [
'Software Heritage core utilities', # note the comma here
'swh-core\n'
- ' ========\n'
- ' \n'
- " core library for swh's modules:\n"
- ' - config parser\n'
- ' - hash computations\n'
- ' - serialization\n'
- ' - logging mechanism\n'
- ' '],
+ '========\n'
+ '\n'
+ "core library for swh's modules:\n"
+ '- config parser\n'
+ '- hash computations\n'
+ '- serialization\n'
+ '- logging mechanism\n'
+ ''],
result)
del result['description']
self.assertEqual(result, {
@@ -713,6 +713,22 @@
'version': '0.0.49',
})
+ def test_compute_metadata_pkginfo_utf8(self):
+ raw_content = (b'''\
+Metadata-Version: 1.1
+Name: snowpyt
+Description-Content-Type: UNKNOWN
+Description: foo
+ Hydrology N\xc2\xb083
+''') # noqa
+ result = MAPPINGS["PythonPkginfoMapping"].translate(raw_content)
+ self.assertEqual(result, {
+ '@context': 'https://doi.org/10.5063/schema/codemeta-2.0',
+ 'type': 'SoftwareSourceCode',
+ 'name': 'snowpyt',
+ 'description': 'foo\nHydrology N°83',
+ })
+
def test_compute_metadata_pkginfo_license(self):
raw_content = (b"""\
Metadata-Version: 2.1
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 10:18 AM (19 h, 8 m ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3225474
Attached To
D971: Fix parsing of the Description field in PKG-INFO.
Event Timeline
Log In to Comment