diff --git a/swh/storage/archiver/director.py b/swh/storage/archiver/director.py
--- a/swh/storage/archiver/director.py
+++ b/swh/storage/archiver/director.py
@@ -76,7 +76,7 @@
             copies.
         """
         contents = []
-        for content in self._get_unarchived_content():
+        for content in self._get_unarchived_content_id():
             contents.append(content)
             if len(contents) > self.config['batch_max_size']:
                 yield contents
@@ -84,21 +84,11 @@
         if len(contents) > 0:
             yield contents
 
-    def _get_unarchived_content(self):
-        """ Get all the content ids in the db that needs more copies
-
-        Yields:
-            sha1 of contents that needs to be archived.
-        """
-        for content_id, present, _ongoing in self._get_all_contents():
-            if len(present) < self.config['retention_policy']:
-                yield content_id
-            else:
-                continue
-
-    def _get_all_contents(self):
+    def _get_unarchived_content_id(self):
         """ Get batchs from the archiver db and yield it as continous stream
 
+        Content returned are those that need to have more copies.
+
         Yields:
             Datas about a content as a tuple
             (content_id, present_copies, ongoing_copies) where ongoing_copies
@@ -107,13 +97,16 @@
         last_object = b''
         while True:
             archiver_contents = list(
-                self.archiver_storage.content_archive_get_copies(last_object)
+                self.archiver_storage.content_archive_get_unarchived_copies(
+                    last_content=last_object,
+                    retention_policy=self.config['retention_policy']
+                )
             )
             if not archiver_contents:
                 return
-            for content in archiver_contents:
-                last_object = content[0]
-                yield content
+            for content_id, presents, oingoings in archiver_contents:
+                last_object = content_id
+                yield content_id
 
 
 def launch():
diff --git a/swh/storage/archiver/storage.py b/swh/storage/archiver/storage.py
--- a/swh/storage/archiver/storage.py
+++ b/swh/storage/archiver/storage.py
@@ -53,13 +53,13 @@
         return self.db.content_archive_get(content_id, cur)
 
     @db_transaction_generator
-    def content_archive_get_copies(self, previous_content=None, limit=1000,
+    def content_archive_get_copies(self, last_content=None, limit=1000,
                                    cur=None):
-        """Get the list of copies for `limit` contents starting after
-           `previous_content`.
+        """ Get the list of copies for `limit` contents starting after
+           `last_content`.
 
         Args:
-            previous_content: sha1 of the last content retrieved. May be None
+            last_content: sha1 of the last content retrieved. May be None
                               to start at the beginning.
             limit: number of contents to retrieve. Can be None to retrieve all
                    objects (will be slow).
@@ -69,9 +69,32 @@
             ongoing_copies is a dict mapping copy to mtime.
 
         """
-        yield from self.db.content_archive_get_copies(previous_content, limit,
+        yield from self.db.content_archive_get_copies(last_content, limit,
                                                       cur)
 
+    @db_transaction_generator
+    def content_archive_get_unarchived_copies(
+            self, retention_policy, last_content=None,
+            limit=1000, cur=None):
+        """ Get the list of copies for `limit` contents starting after
+            `last_content`. Yields only copies with number of present
+            smaller than `retention policy`.
+
+        Args:
+            last_content: sha1 of the last content retrieved. May be None
+                              to start at the beginning.
+            retention_policy: number of presentcopies required.
+            limit: number of contents to retrieve. Can be None to retrieve all
+                   objects (will be slow).
+
+        Yields:
+            A tuple (content_id, present_copies, ongoing_copies), where
+            ongoing_copies is a dict mapping copy to mtime.
+
+        """
+        yield from self.db.content_archive_get_unarchived_copies(
+            retention_policy, last_content, limit, cur)
+
     @db_transaction
     def content_archive_update(self, content_id, archive_id,
                                new_status=None, cur=None):
diff --git a/swh/storage/db.py b/swh/storage/db.py
--- a/swh/storage/db.py
+++ b/swh/storage/db.py
@@ -702,13 +702,13 @@
         content_id, present, ongoing, mtimes = cur.fetchone()
         return (content_id, present, dict(zip(ongoing, mtimes)))
 
-    def content_archive_get_copies(self, previous_content=None, limit=1000,
+    def content_archive_get_copies(self, last_content=None, limit=1000,
                                    cur=None):
         """Get the list of copies for `limit` contents starting after
-           `previous_content`.
+           `last_content`.
 
         Args:
-            previous_content: sha1 of the last content retrieved. May be None
+            last_content: sha1 of the last content retrieved. May be None
                               to start at the beginning.
             limit: number of contents to retrieve. Can be None to retrieve all
                    objects (will be slow).
@@ -744,11 +744,64 @@
                    LIMIT %s
         """
 
-        if previous_content is None:
-            previous_content = b''
+        if last_content is None:
+            last_content = b''
 
         cur = self._cursor(cur)
-        cur.execute(query, (previous_content, limit))
+        cur.execute(query, (last_content, limit))
+        for content_id, present, ongoing, mtimes in cursor_to_bytes(cur):
+            yield (content_id, present, dict(zip(ongoing, mtimes)))
+
+    def content_archive_get_unarchived_copies(
+            self, retention_policy, last_content=None,
+            limit=1000, cur=None):
+        """ Get the list of copies for `limit` contents starting after
+            `last_content`. Yields only copies with number of present
+            smaller than `retention policy`.
+
+        Args:
+            last_content: sha1 of the last content retrieved. May be None
+                              to start at the beginning.
+            retention_policy: number of presentcopies required.
+            limit: number of contents to retrieve. Can be None to retrieve all
+                   objects (will be slow).
+
+        Yields:
+            A tuple (content_id, present_copies, ongoing_copies), where
+            ongoing_copies is a dict mapping copy to mtime.
+
+        """
+
+        query = """SELECT content_id,
+                          array(
+                            SELECT key
+                            FROM jsonb_each(copies)
+                            WHERE value->>'status' = 'present'
+                            ORDER BY key
+                          ) AS present,
+                          array(
+                            SELECT key
+                            FROM jsonb_each(copies)
+                            WHERE value->>'status' = 'ongoing'
+                            ORDER BY key
+                          ) AS ongoing,
+                          array(
+                            SELECT value->'mtime'
+                            FROM jsonb_each(copies)
+                            WHERE value->>'status' = 'ongoing'
+                            ORDER BY key
+                          ) AS ongoing_mtime
+                   FROM content_archive
+                   WHERE content_id > %s AND num_present < %s
+                   ORDER BY content_id
+                   LIMIT %s
+        """
+
+        if last_content is None:
+            last_content = b''
+
+        cur = self._cursor(cur)
+        cur.execute(query, (last_content, retention_policy, limit))
         for content_id, present, ongoing, mtimes in cursor_to_bytes(cur):
             yield (content_id, present, dict(zip(ongoing, mtimes)))