Page MenuHomeSoftware Heritage

D896.diff
No OneTemporary

D896.diff

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
vcversioner
click
-file-magic
+python-magic >= 0.4.13
pyld
xmltodict
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -3,13 +3,24 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import magic
+import pkgutil
-from swh.model import hashutil
+import pkg_resources
from .indexer import ContentIndexer, ContentRangeIndexer
+def _import_python_magic():
+ """Imports python-magic (NOT file_magic; both are made available
+ with 'import magic')"""
+ magic_dist = pkg_resources.get_distribution('python-magic')
+ magic_importer = pkgutil.get_importer(magic_dist.module_path)
+ return magic_importer.find_module('magic').load_module()
+
+
+magic = _import_python_magic()
+
+
def compute_mimetype_encoding(raw_content):
"""Determine mimetype and encoding from the raw content.
@@ -21,10 +32,12 @@
(as bytes).
"""
- r = magic.detect_from_content(raw_content)
+ m = magic.Magic(mime=True, mime_encoding=True)
+ res = m.from_buffer(raw_content)
+ (mimetype, encoding) = res.split('; charset=')
return {
- 'mimetype': r.mime_type,
- 'encoding': r.encoding,
+ 'mimetype': mimetype,
+ 'encoding': encoding,
}
@@ -63,17 +76,11 @@
- **encoding** (bytes): encoding in bytes
"""
- try:
- properties = compute_mimetype_encoding(data)
- properties.update({
- 'id': id,
- 'indexer_configuration_id': self.tool['id'],
- })
- except TypeError:
- self.log.error('Detecting mimetype error for id %s' % (
- hashutil.hash_to_hex(id), ))
- return None
-
+ properties = compute_mimetype_encoding(data)
+ properties.update({
+ 'id': id,
+ 'indexer_configuration_id': self.tool['id'],
+ })
return properties
def persist_index_computations(self, results, policy_update):

File Metadata

Mime Type
text/plain
Expires
Tue, Dec 17, 3:45 AM (2 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223021

Event Timeline