Page MenuHomeSoftware Heritage

D896.id2877.diff
No OneTemporary

D896.id2877.diff

diff --git a/debian/control b/debian/control
--- a/debian/control
+++ b/debian/control
@@ -10,7 +10,7 @@
python3-hypothesis (>= 3.11.0~),
python3-pytest,
python3-pygments,
- python3-magic,
+ python3-magic (>= 2:0.4.13),
python3-pyld,
python3-setuptools,
python3-swh.core (>= 0.0.44~),
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,6 @@
pygments
click
chardet
-file_magic
+python-magic >= 0.4.13
pyld
xmltodict
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -5,10 +5,13 @@
import magic
-from swh.model import hashutil
-
from .indexer import ContentIndexer, ContentRangeIndexer
+if not hasattr(magic.Magic, 'from_buffer'):
+ raise ImportError(
+ 'Expected "import magic" to import python-magic, but file_magic '
+ 'was imported instead.')
+
def compute_mimetype_encoding(raw_content):
"""Determine mimetype and encoding from the raw content.
@@ -21,10 +24,12 @@
(as bytes).
"""
- r = magic.detect_from_content(raw_content)
+ m = magic.Magic(mime=True, mime_encoding=True)
+ res = m.from_buffer(raw_content)
+ (mimetype, encoding) = res.split('; charset=')
return {
- 'mimetype': r.mime_type,
- 'encoding': r.encoding,
+ 'mimetype': mimetype,
+ 'encoding': encoding,
}
@@ -67,17 +72,11 @@
- **encoding** (bytes): encoding in bytes
"""
- try:
- properties = compute_mimetype_encoding(data)
- properties.update({
- 'id': id,
- 'indexer_configuration_id': self.tool['id'],
- })
- except TypeError:
- self.log.error('Detecting mimetype error for id %s' % (
- hashutil.hash_to_hex(id), ))
- return None
-
+ properties = compute_mimetype_encoding(data)
+ properties.update({
+ 'id': id,
+ 'indexer_configuration_id': self.tool['id'],
+ })
return properties
def persist_index_computations(self, results, policy_update):
diff --git a/swh/indexer/origin_head.py b/swh/indexer/origin_head.py
--- a/swh/indexer/origin_head.py
+++ b/swh/indexer/origin_head.py
@@ -29,8 +29,8 @@
'configuration': {},
}),
'tasks': ('dict', {
- 'revision_metadata': 'revision_metadata',
- 'origin_intrinsic_metadata': 'origin_metadata',
+ 'revision_metadata': 'indexer_revision_metadata',
+ 'origin_intrinsic_metadata': 'indexer_origin_metadata',
})
}

File Metadata

Mime Type
text/plain
Expires
Wed, Jul 2, 10:30 AM (2 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229805

Event Timeline