diff --git a/PKG-INFO b/PKG-INFO
index 9732f47..43d1939 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,71 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.6.0
+Version: 0.6.1
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Description: swh-indexer
         ============
         
         Tools to compute multiple indexes on SWH's raw contents:
         - content:
           - mimetype
           - ctags
           - language
           - fossology-license
           - metadata
         - revision:
           - metadata
         
         An indexer is in charge of:
         - looking up objects
         - extracting information from those objects
         - store those information in the swh-indexer db
         
         There are multiple indexers working on different object types:
           - content indexer: works with content sha1 hashes
           - revision indexer: works with revision sha1 hashes
           - origin indexer: works with origin identifiers
         
         Indexation procedure:
         - receive batch of ids
         - retrieve the associated data depending on object type
         - compute for that object some index
         - store the result to swh's storage
         
         Current content indexers:
         
         - mimetype (queue swh_indexer_content_mimetype): detect the encoding
           and mimetype
         
         - language (queue swh_indexer_content_language): detect the
           programming language
         
         - ctags (queue swh_indexer_content_ctags): compute tags information
         
         - fossology-license (queue swh_indexer_fossology_license): compute the
           license
         
         - metadata: translate file into translated_metadata dict
         
         Current revision indexers:
         
         - metadata: detects files containing metadata and retrieves translated_metadata
           in content_metadata table in storage or run content indexer to translate
           files.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/swh.indexer.egg-info/PKG-INFO b/swh.indexer.egg-info/PKG-INFO
index 9732f47..43d1939 100644
--- a/swh.indexer.egg-info/PKG-INFO
+++ b/swh.indexer.egg-info/PKG-INFO
@@ -1,71 +1,71 @@
 Metadata-Version: 2.1
 Name: swh.indexer
-Version: 0.6.0
+Version: 0.6.1
 Summary: Software Heritage Content Indexer
 Home-page: https://forge.softwareheritage.org/diffusion/78/
 Author: Software Heritage developers
 Author-email: swh-devel@inria.fr
 License: UNKNOWN
 Project-URL: Bug Reports, https://forge.softwareheritage.org/maniphest
 Project-URL: Funding, https://www.softwareheritage.org/donate
 Project-URL: Source, https://forge.softwareheritage.org/source/swh-indexer
 Project-URL: Documentation, https://docs.softwareheritage.org/devel/swh-indexer/
 Description: swh-indexer
         ============
         
         Tools to compute multiple indexes on SWH's raw contents:
         - content:
           - mimetype
           - ctags
           - language
           - fossology-license
           - metadata
         - revision:
           - metadata
         
         An indexer is in charge of:
         - looking up objects
         - extracting information from those objects
         - store those information in the swh-indexer db
         
         There are multiple indexers working on different object types:
           - content indexer: works with content sha1 hashes
           - revision indexer: works with revision sha1 hashes
           - origin indexer: works with origin identifiers
         
         Indexation procedure:
         - receive batch of ids
         - retrieve the associated data depending on object type
         - compute for that object some index
         - store the result to swh's storage
         
         Current content indexers:
         
         - mimetype (queue swh_indexer_content_mimetype): detect the encoding
           and mimetype
         
         - language (queue swh_indexer_content_language): detect the
           programming language
         
         - ctags (queue swh_indexer_content_ctags): compute tags information
         
         - fossology-license (queue swh_indexer_fossology_license): compute the
           license
         
         - metadata: translate file into translated_metadata dict
         
         Current revision indexers:
         
         - metadata: detects files containing metadata and retrieves translated_metadata
           in content_metadata table in storage or run content indexer to translate
           files.
         
 Platform: UNKNOWN
 Classifier: Programming Language :: Python :: 3
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Operating System :: OS Independent
 Classifier: Development Status :: 5 - Production/Stable
 Requires-Python: >=3.7
 Description-Content-Type: text/markdown
 Provides-Extra: testing
diff --git a/swh/indexer/tests/test_journal_client.py b/swh/indexer/tests/test_journal_client.py
index 38e4386..21e5e0b 100644
--- a/swh/indexer/tests/test_journal_client.py
+++ b/swh/indexer/tests/test_journal_client.py
@@ -1,153 +1,132 @@
 # Copyright (C) 2019-2020 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
-import unittest
+
 from unittest.mock import Mock, patch
 
 from swh.indexer.journal_client import process_journal_objects
 
 
-class JournalClientTest(unittest.TestCase):
-    def test_one_origin_visit_status(self):
-        mock_scheduler = Mock()
-        messages = {
-            "origin_visit_status": [{"status": "full", "origin": "file:///dev/zero",},]
-        }
-        process_journal_objects(
-            messages,
-            scheduler=mock_scheduler,
-            task_names={"origin_metadata": "task-name"},
-        )
-        self.assertTrue(mock_scheduler.create_tasks.called)
-        call_args = mock_scheduler.create_tasks.call_args
-        (args, kwargs) = call_args
-        self.assertEqual(kwargs, {})
-        del args[0][0]["next_run"]
-        self.assertEqual(
-            args,
-            (
-                [
-                    {
-                        "arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),},
-                        "policy": "oneshot",
-                        "type": "task-name",
-                        "retries_left": 1,
-                    },
-                ],
-            ),
-        )
+def test_one_origin_visit_status():
+    mock_scheduler = Mock()
+    messages = {
+        "origin_visit_status": [{"status": "full", "origin": "file:///dev/zero",},]
+    }
+    process_journal_objects(
+        messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"},
+    )
+    assert mock_scheduler.create_tasks.called is True
+    call_args = mock_scheduler.create_tasks.call_args
+    (args, kwargs) = call_args
+    assert kwargs == {}
+    del args[0][0]["next_run"]
+    assert args == (
+        [
+            {
+                "arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),},
+                "policy": "oneshot",
+                "type": "task-name",
+                "retries_left": 1,
+            },
+        ],
+    )
+
+
+def test_origin_visit_legacy():
+    mock_scheduler = Mock()
+    messages = {
+        "origin_visit_status": [
+            {"status": "full", "origin": {"url": "file:///dev/zero",}},
+        ]
+    }
+    process_journal_objects(
+        messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"},
+    )
+    assert mock_scheduler.create_tasks.called is True
+    call_args = mock_scheduler.create_tasks.call_args
+    (args, kwargs) = call_args
+    assert kwargs == {}
+    del args[0][0]["next_run"]
+    assert args == (
+        [
+            {
+                "arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),},
+                "policy": "oneshot",
+                "type": "task-name",
+                "retries_left": 1,
+            },
+        ],
+    )
+
 
-    def test_origin_visit_legacy(self):
-        mock_scheduler = Mock()
-        messages = {
-            "origin_visit_status": [
-                {"status": "full", "origin": {"url": "file:///dev/zero",}},
-            ]
-        }
-        process_journal_objects(
-            messages,
-            scheduler=mock_scheduler,
-            task_names={"origin_metadata": "task-name"},
-        )
-        self.assertTrue(mock_scheduler.create_tasks.called)
-        call_args = mock_scheduler.create_tasks.call_args
-        (args, kwargs) = call_args
-        self.assertEqual(kwargs, {})
-        del args[0][0]["next_run"]
-        self.assertEqual(
-            args,
-            (
-                [
-                    {
-                        "arguments": {"kwargs": {}, "args": (["file:///dev/zero"],),},
-                        "policy": "oneshot",
-                        "type": "task-name",
-                        "retries_left": 1,
-                    },
-                ],
-            ),
-        )
+def test_one_origin_visit_batch():
+    mock_scheduler = Mock()
+    messages = {
+        "origin_visit_status": [
+            {"status": "full", "origin": "file:///dev/zero",},
+            {"status": "full", "origin": "file:///tmp/foobar",},
+        ]
+    }
+    process_journal_objects(
+        messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"},
+    )
+    assert mock_scheduler.create_tasks.called is True
+    call_args = mock_scheduler.create_tasks.call_args
+    (args, kwargs) = call_args
+    assert kwargs == {}
+    del args[0][0]["next_run"]
+    assert args == (
+        [
+            {
+                "arguments": {
+                    "kwargs": {},
+                    "args": (["file:///dev/zero", "file:///tmp/foobar"],),
+                },
+                "policy": "oneshot",
+                "type": "task-name",
+                "retries_left": 1,
+            },
+        ],
+    )
 
-    def test_one_origin_visit_batch(self):
-        mock_scheduler = Mock()
-        messages = {
-            "origin_visit_status": [
-                {"status": "full", "origin": "file:///dev/zero",},
-                {"status": "full", "origin": "file:///tmp/foobar",},
-            ]
-        }
-        process_journal_objects(
-            messages,
-            scheduler=mock_scheduler,
-            task_names={"origin_metadata": "task-name"},
-        )
-        self.assertTrue(mock_scheduler.create_tasks.called)
-        call_args = mock_scheduler.create_tasks.call_args
-        (args, kwargs) = call_args
-        self.assertEqual(kwargs, {})
-        del args[0][0]["next_run"]
-        self.assertEqual(
-            args,
-            (
-                [
-                    {
-                        "arguments": {
-                            "kwargs": {},
-                            "args": (["file:///dev/zero", "file:///tmp/foobar"],),
-                        },
-                        "policy": "oneshot",
-                        "type": "task-name",
-                        "retries_left": 1,
-                    },
-                ],
-            ),
-        )
 
-    @patch("swh.indexer.journal_client.MAX_ORIGINS_PER_TASK", 2)
-    def test_origin_visit_batches(self):
-        mock_scheduler = Mock()
-        messages = {
-            "origin_visit_status": [
-                {"status": "full", "origin": "file:///dev/zero",},
-                {"status": "full", "origin": "file:///tmp/foobar",},
-                {"status": "full", "origin": "file:///tmp/spamegg",},
-            ]
-        }
-        process_journal_objects(
-            messages,
-            scheduler=mock_scheduler,
-            task_names={"origin_metadata": "task-name"},
-        )
-        self.assertTrue(mock_scheduler.create_tasks.called)
-        call_args = mock_scheduler.create_tasks.call_args
-        (args, kwargs) = call_args
-        self.assertEqual(kwargs, {})
-        del args[0][0]["next_run"]
-        del args[0][1]["next_run"]
-        self.assertEqual(
-            args,
-            (
-                [
-                    {
-                        "arguments": {
-                            "kwargs": {},
-                            "args": (["file:///dev/zero", "file:///tmp/foobar"],),
-                        },
-                        "policy": "oneshot",
-                        "type": "task-name",
-                        "retries_left": 1,
-                    },
-                    {
-                        "arguments": {
-                            "kwargs": {},
-                            "args": (["file:///tmp/spamegg"],),
-                        },
-                        "policy": "oneshot",
-                        "type": "task-name",
-                        "retries_left": 1,
-                    },
-                ],
-            ),
-        )
+@patch("swh.indexer.journal_client.MAX_ORIGINS_PER_TASK", 2)
+def test_origin_visit_batches():
+    mock_scheduler = Mock()
+    messages = {
+        "origin_visit_status": [
+            {"status": "full", "origin": "file:///dev/zero",},
+            {"status": "full", "origin": "file:///tmp/foobar",},
+            {"status": "full", "origin": "file:///tmp/spamegg",},
+        ]
+    }
+    process_journal_objects(
+        messages, scheduler=mock_scheduler, task_names={"origin_metadata": "task-name"},
+    )
+    assert mock_scheduler.create_tasks.called is True
+    call_args = mock_scheduler.create_tasks.call_args
+    (args, kwargs) = call_args
+    assert kwargs == {}
+    del args[0][0]["next_run"]
+    del args[0][1]["next_run"]
+    assert args == (
+        [
+            {
+                "arguments": {
+                    "kwargs": {},
+                    "args": (["file:///dev/zero", "file:///tmp/foobar"],),
+                },
+                "policy": "oneshot",
+                "type": "task-name",
+                "retries_left": 1,
+            },
+            {
+                "arguments": {"kwargs": {}, "args": (["file:///tmp/spamegg"],),},
+                "policy": "oneshot",
+                "type": "task-name",
+                "retries_left": 1,
+            },
+        ],
+    )
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
index d3aef57..73d8d41 100644
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -1,125 +1,140 @@
 # Copyright (C) 2017-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Any, Dict
 import unittest
 
 import pytest
 
 from swh.indexer.mimetype import (
     MimetypeIndexer,
     MimetypePartitionIndexer,
     compute_mimetype_encoding,
 )
 from swh.indexer.storage.model import ContentMimetypeRow
 from swh.indexer.tests.utils import (
     BASE_TEST_CONFIG,
     CommonContentIndexerPartitionTest,
     CommonContentIndexerTest,
     fill_obj_storage,
     fill_storage,
     filter_dict,
 )
 from swh.model.hashutil import hash_to_bytes
 
 
-def test_compute_mimetype_encoding():
-    """Compute mimetype encoding should return results"""
-    for _input, _mimetype, _encoding in [
+@pytest.mark.parametrize(
+    "raw_text,mimetype,encoding",
+    [
         ("du français".encode(), "text/plain", "utf-8"),
-        (b"def __init__(self):", "text/x-python", "us-ascii"),
+        (b"def __init__(self):", ("text/x-python", "text/x-script.python"), "us-ascii"),
         (b"\xff\xfe\x00\x00\x00\x00\xff\xfe\xff\xff", "application/octet-stream", ""),
-    ]:
-        actual_result = compute_mimetype_encoding(_input)
-        assert actual_result == {"mimetype": _mimetype, "encoding": _encoding}
+    ],
+)
+def test_compute_mimetype_encoding(raw_text, mimetype, encoding):
+    """Compute mimetype encoding should return results"""
+    actual_result = compute_mimetype_encoding(raw_text)
+    if isinstance(mimetype, tuple):
+        # New magic version can return different results, this deals with such a case
+        expected_result = {"mimetype": mimetype[0], "encoding": encoding}
+        # as a fallback
+        fallback_expected_result = {"mimetype": mimetype[1], "encoding": encoding}
+    else:
+        expected_result = {"mimetype": mimetype, "encoding": encoding}
+        fallback_expected_result = expected_result
+
+    try:
+        assert actual_result == expected_result
+    except AssertionError:
+        assert actual_result == fallback_expected_result
 
 
 CONFIG = {
     **BASE_TEST_CONFIG,
     "tools": {
         "name": "file",
         "version": "1:5.30-1+deb9u1",
         "configuration": {"type": "library", "debian-package": "python3-magic"},
     },
 }  # type: Dict[str, Any]
 
 
 class TestMimetypeIndexer(CommonContentIndexerTest, unittest.TestCase):
     """Mimetype indexer test scenarios:
 
     - Known sha1s in the input list have their data indexed
     - Unknown sha1 in the input list are not indexed
 
     """
 
     def get_indexer_results(self, ids):
         yield from self.idx_storage.content_mimetype_get(ids)
 
     def setUp(self):
         self.indexer = MimetypeIndexer(config=CONFIG)
         self.indexer.catch_exceptions = False
         self.idx_storage = self.indexer.idx_storage
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
         self.id0 = "01c9379dfc33803963d07c1ccc748d3fe4c96bb5"
         self.id1 = "688a5ef812c53907562fe379d4b3851e69c7cb15"
         self.id2 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
 
         tool = {k.replace("tool_", ""): v for (k, v) in self.indexer.tool.items()}
 
         self.expected_results = [
             ContentMimetypeRow(
                 id=hash_to_bytes(self.id0),
                 tool=tool,
                 mimetype="text/plain",
                 encoding="us-ascii",
             ),
             ContentMimetypeRow(
                 id=hash_to_bytes(self.id1),
                 tool=tool,
                 mimetype="text/plain",
                 encoding="us-ascii",
             ),
             ContentMimetypeRow(
                 id=hash_to_bytes(self.id2),
                 tool=tool,
                 mimetype="application/x-empty",
                 encoding="binary",
             ),
         ]
 
 
 RANGE_CONFIG = dict(list(CONFIG.items()) + [("write_batch_size", 100)])
 
 
 class TestMimetypePartitionIndexer(
     CommonContentIndexerPartitionTest, unittest.TestCase
 ):
     """Range Mimetype Indexer tests.
 
     - new data within range are indexed
     - no data outside a range are indexed
     - with filtering existing indexed data prior to compute new index
     - without filtering existing indexed data prior to compute new index
 
     """
 
     def setUp(self):
         super().setUp()
         self.indexer = MimetypePartitionIndexer(config=RANGE_CONFIG)
         self.indexer.catch_exceptions = False
         fill_storage(self.indexer.storage)
         fill_obj_storage(self.indexer.objstorage)
 
 
 def test_mimetype_w_no_tool():
     with pytest.raises(ValueError):
         MimetypeIndexer(config=filter_dict(CONFIG, "tools"))
 
 
 def test_mimetype_range_w_no_tool():
     with pytest.raises(ValueError):
         MimetypePartitionIndexer(config=filter_dict(CONFIG, "tools"))