diff --git a/swh/indexer/fossology_license.py b/swh/indexer/fossology_license.py
--- a/swh/indexer/fossology_license.py
+++ b/swh/indexer/fossology_license.py
@@ -173,14 +173,12 @@
             **start** (bytes): Starting bound from range identifier
             **end** (bytes): End range identifier
 
-        Yields:
-            Content identifier (bytes) present in the range [start, end]
+        Returns:
+            a dict with keys:
+            - **ids** [bytes]: iterable of content ids within the range.
+            - **next** (Optional[bytes]): The next range of sha1 starts at
+                                          this sha1 if any
 
         """
-        while start:
-            result = self.idx_storage.content_fossology_license_get_range(
+        return self.idx_storage.content_fossology_license_get_range(
                 start, end, self.tool['id'])
-            contents = result['ids']
-            for _id in contents:
-                yield _id
-            start = result['next']
diff --git a/swh/indexer/indexer.py b/swh/indexer/indexer.py
--- a/swh/indexer/indexer.py
+++ b/swh/indexer/indexer.py
@@ -439,6 +439,26 @@
             if res:
                 yield res
 
+    def _index_with_skipping_already_done(self, start, end):
+        """Index not already indexed contents in range [start, end].
+
+        Args:
+            **start** (Union[bytes, str]): Starting range identifier
+            **end** (Union[bytes, str]): Ending range identifier
+
+        Yields:
+            Content identifier (bytes) present in the range [start,
+            end] which are not already indexed.
+
+        """
+        while start:
+            indexed_page = self.indexed_contents_in_range(start, end)
+            contents = indexed_page['ids']
+            _end = contents[-1] if contents else end
+            yield from self._index_contents(
+                    start, _end, contents)
+            start = indexed_page['next']
+
     def run(self, start, end, skip_existing=True, **kwargs):
         """Given a range of content ids, compute the indexing computations on
            the contents within. Either the indexer is incremental
@@ -464,20 +484,20 @@
                 end = hashutil.hash_to_bytes(end)
 
             if skip_existing:
-                indexed = set(self.indexed_contents_in_range(start, end))
+                gen = self._index_with_skipping_already_done(start, end)
             else:
-                indexed = set()
+                gen = self._index_contents(start, end, indexed=[])
 
-            index_computations = self._index_contents(start, end, indexed)
-            for results in utils.grouper(index_computations,
+            for results in utils.grouper(gen,
                                          n=self.config['write_batch_size']):
                 self.persist_index_computations(
                     results, policy_update='update-dups')
                 with_indexed_data = True
-            return with_indexed_data
         except Exception:
             self.log.exception(
                 'Problem when computing metadata.')
+        finally:
+            return with_indexed_data
 
 
 class OriginIndexer(BaseIndexer):
diff --git a/swh/indexer/mimetype.py b/swh/indexer/mimetype.py
--- a/swh/indexer/mimetype.py
+++ b/swh/indexer/mimetype.py
@@ -142,14 +142,12 @@
             **start** (bytes): Starting bound from range identifier
             **end** (bytes): End range identifier
 
-        Yields:
-            Content identifier (bytes) present in the range [start, end]
+        Returns:
+            a dict with keys:
+            - **ids** [bytes]: iterable of content ids within the range.
+            - **next** (Optional[bytes]): The next range of sha1 starts at
+                                          this sha1 if any
 
         """
-        while start:
-            result = self.idx_storage.content_mimetype_get_range(
-                start, end, self.tool['id'])
-            contents = result['ids']
-            for _id in contents:
-                yield _id
-            start = result['next']
+        return self.idx_storage.content_mimetype_get_range(
+            start, end, self.tool['id'])
diff --git a/swh/indexer/tests/test_mimetype.py b/swh/indexer/tests/test_mimetype.py
--- a/swh/indexer/tests/test_mimetype.py
+++ b/swh/indexer/tests/test_mimetype.py
@@ -173,12 +173,12 @@
 
 class MimetypeIndexerUnknownToolTestStorage(
         CommonIndexerNoTool, MimetypeTestIndexer):
-    """Fossology license indexer with wrong configuration"""
+    """Mimetype indexer with wrong configuration"""
 
 
 class MimetypeRangeIndexerUnknownToolTestStorage(
         CommonIndexerNoTool, MimetypeRangeIndexerTest):
-    """Fossology license range indexer with wrong configuration"""
+    """Mimetype range indexer with wrong configuration"""
 
 
 class TestMimetypeIndexersErrors(
diff --git a/swh/indexer/tests/test_utils.py b/swh/indexer/tests/test_utils.py
--- a/swh/indexer/tests/test_utils.py
+++ b/swh/indexer/tests/test_utils.py
@@ -685,6 +685,7 @@
 
         """
         start, end = [self.contents[0], self.contents[2]]  # output hex ids
+
         # given
         actual_results = self.indexer.run(start, end)
 
@@ -702,7 +703,8 @@
 
         # given
         actual_results = self.indexer.run(  # checks the bytes input this time
-            start, end, skip_existing=False)  # no data so same result
+            start, end, skip_existing=False)
+        # no already indexed data so same result as prior test
 
         # then
         self.assertTrue(actual_results)