diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ pygments click chardet -file_magic +python-magic pyld xmltodict diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py --- a/swh/indexer/mimetype.py +++ b/swh/indexer/mimetype.py @@ -5,8 +5,6 @@ import magic -from swh.model import hashutil - from .indexer import ContentIndexer, ContentRangeIndexer @@ -21,10 +19,18 @@ (as bytes). """ - r = magic.detect_from_content(raw_content) + if hasattr(magic.Magic, 'from_buffer'): + # https://pypi.org/project/python-magic/ + m = magic.Magic(mime=True, mime_encoding=True) + res = m.from_buffer(raw_content) + (mimetype, encoding) = res.split('; charset=') + else: + # https://pypi.org/project/file-magic/ + r = magic.detect_from_content(raw_content) + (mimetype, encoding) = (r.mime_type, r.encoding) return { - 'mimetype': r.mime_type, - 'encoding': r.encoding, + 'mimetype': mimetype, + 'encoding': encoding, } @@ -67,17 +73,11 @@ - **encoding** (bytes): encoding in bytes """ - try: - properties = compute_mimetype_encoding(data) - properties.update({ - 'id': id, - 'indexer_configuration_id': self.tool['id'], - }) - except TypeError: - self.log.error('Detecting mimetype error for id %s' % ( - hashutil.hash_to_hex(id), )) - return None - + properties = compute_mimetype_encoding(data) + properties.update({ + 'id': id, + 'indexer_configuration_id': self.tool['id'], + }) return properties def persist_index_computations(self, results, policy_update):