diff --git a/sql/updates/17.sql b/sql/updates/17.sql
new file mode 100644
index 0000000..695fb31
--- /dev/null
+++ b/sql/updates/17.sql
@@ -0,0 +1,4 @@
+insert into dbversion (version, release, description)
+       values (17, now(), 'Work In Progress');
+
+create index concurrently on listed_origins(url);
diff --git a/swh/scheduler/backend.py b/swh/scheduler/backend.py
index cfbdff2..84cda7c 100644
--- a/swh/scheduler/backend.py
+++ b/swh/scheduler/backend.py
@@ -1,674 +1,734 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import json
 import logging
+from uuid import UUID
 
 from arrow import Arrow, utcnow
 import attr
 import psycopg2.pool
 import psycopg2.extras
 
-from typing import Any, Dict, Iterable, List, Optional
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
 from psycopg2.extensions import AsIs
 
 from swh.core.db import BaseDb
 from swh.core.db.common import db_transaction
 
 from .exc import StaleData
-from .model import Lister, ListedOrigin
+from .model import (
+    Lister,
+    ListedOrigin,
+    ListedOriginPageToken,
+    PaginatedListedOriginList,
+)
 
 logger = logging.getLogger(__name__)
 
 
 def adapt_arrow(arrow):
     return AsIs("'%s'::timestamptz" % arrow.isoformat())
 
 
 psycopg2.extensions.register_adapter(dict, psycopg2.extras.Json)
 psycopg2.extensions.register_adapter(Arrow, adapt_arrow)
 psycopg2.extras.register_uuid()
 
 
 def format_query(query, keys):
     """Format a query with the given keys"""
 
     query_keys = ", ".join(keys)
     placeholders = ", ".join(["%s"] * len(keys))
 
     return query.format(keys=query_keys, placeholders=placeholders)
 
 
 class SchedulerBackend:
     """Backend for the Software Heritage scheduling database.
 
     """
 
     def __init__(self, db, min_pool_conns=1, max_pool_conns=10):
         """
         Args:
             db_conn: either a libpq connection string, or a psycopg2 connection
 
         """
         if isinstance(db, psycopg2.extensions.connection):
             self._pool = None
             self._db = BaseDb(db)
         else:
             self._pool = psycopg2.pool.ThreadedConnectionPool(
                 min_pool_conns,
                 max_pool_conns,
                 db,
                 cursor_factory=psycopg2.extras.RealDictCursor,
             )
             self._db = None
 
     def get_db(self):
         if self._db:
             return self._db
         return BaseDb.from_pool(self._pool)
 
     def put_db(self, db):
         if db is not self._db:
             db.put_conn()
 
     task_type_keys = [
         "type",
         "description",
         "backend_name",
         "default_interval",
         "min_interval",
         "max_interval",
         "backoff_factor",
         "max_queue_length",
         "num_retries",
         "retry_delay",
     ]
 
     @db_transaction()
     def create_task_type(self, task_type, db=None, cur=None):
         """Create a new task type ready for scheduling.
 
         Args:
             task_type (dict): a dictionary with the following keys:
 
                 - type (str): an identifier for the task type
                 - description (str): a human-readable description of what the
                   task does
                 - backend_name (str): the name of the task in the
                   job-scheduling backend
                 - default_interval (datetime.timedelta): the default interval
                   between two task runs
                 - min_interval (datetime.timedelta): the minimum interval
                   between two task runs
                 - max_interval (datetime.timedelta): the maximum interval
                   between two task runs
                 - backoff_factor (float): the factor by which the interval
                   changes at each run
                 - max_queue_length (int): the maximum length of the task queue
                   for this task type
 
         """
         keys = [key for key in self.task_type_keys if key in task_type]
         query = format_query(
             """insert into task_type ({keys}) values ({placeholders})
             on conflict do nothing""",
             keys,
         )
         cur.execute(query, [task_type[key] for key in keys])
 
     @db_transaction()
     def get_task_type(self, task_type_name, db=None, cur=None):
         """Retrieve the task type with id task_type_name"""
         query = format_query(
             "select {keys} from task_type where type=%s", self.task_type_keys,
         )
         cur.execute(query, (task_type_name,))
         return cur.fetchone()
 
     @db_transaction()
     def get_task_types(self, db=None, cur=None):
         """Retrieve all registered task types"""
         query = format_query("select {keys} from task_type", self.task_type_keys,)
         cur.execute(query)
         return cur.fetchall()
 
     @db_transaction()
     def get_or_create_lister(
         self, name: str, instance_name: Optional[str] = None, db=None, cur=None
     ) -> Lister:
         """Retrieve information about the given instance of the lister from the
         database, or create the entry if it did not exist.
         """
 
         if instance_name is None:
             instance_name = ""
 
         select_cols = ", ".join(Lister.select_columns())
         insert_cols, insert_meta = (
             ", ".join(tup) for tup in Lister.insert_columns_and_metavars()
         )
 
         query = f"""
             with added as (
               insert into listers ({insert_cols}) values ({insert_meta})
                 on conflict do nothing
                 returning {select_cols}
             )
             select {select_cols} from added
           union all
             select {select_cols} from listers
               where (name, instance_name) = (%(name)s, %(instance_name)s);
         """
 
         cur.execute(query, attr.asdict(Lister(name=name, instance_name=instance_name)))
 
         return Lister(**cur.fetchone())
 
     @db_transaction()
     def update_lister(self, lister: Lister, db=None, cur=None) -> Lister:
         """Update the state for the given lister instance in the database.
 
         Returns:
             a new Lister object, with all fields updated from the database
 
         Raises:
             StaleData if the `updated` timestamp for the lister instance in
         database doesn't match the one passed by the user.
         """
 
         select_cols = ", ".join(Lister.select_columns())
         set_vars = ", ".join(
             f"{col} = {meta}"
             for col, meta in zip(*Lister.insert_columns_and_metavars())
         )
 
         query = f"""update listers
                       set {set_vars}
                       where id=%(id)s and updated=%(updated)s
                       returning {select_cols}"""
 
         cur.execute(query, attr.asdict(lister))
         updated = cur.fetchone()
 
         if not updated:
             raise StaleData("Stale data; Lister state not updated")
 
         return Lister(**updated)
 
     @db_transaction()
     def record_listed_origins(
         self, listed_origins: Iterable[ListedOrigin], db=None, cur=None
     ) -> List[ListedOrigin]:
         """Record a set of origins that a lister has listed.
 
         This performs an "upsert": origins with the same (lister_id, url,
         visit_type) values are updated with new values for
         extra_loader_arguments, last_update and last_seen.
         """
 
         pk_cols = ListedOrigin.primary_key_columns()
         select_cols = ListedOrigin.select_columns()
         insert_cols, insert_meta = ListedOrigin.insert_columns_and_metavars()
 
         upsert_cols = [col for col in insert_cols if col not in pk_cols]
         upsert_set = ", ".join(f"{col} = EXCLUDED.{col}" for col in upsert_cols)
 
         query = f"""INSERT into listed_origins ({", ".join(insert_cols)})
                        VALUES %s
                     ON CONFLICT ({", ".join(pk_cols)}) DO UPDATE
                        SET {upsert_set}
                     RETURNING {", ".join(select_cols)}
         """
 
         ret = psycopg2.extras.execute_values(
             cur=cur,
             sql=query,
             argslist=(attr.asdict(origin) for origin in listed_origins),
             template=f"({', '.join(insert_meta)})",
             page_size=1000,
             fetch=True,
         )
 
         return [ListedOrigin(**d) for d in ret]
 
+    @db_transaction()
+    def get_listed_origins(
+        self,
+        lister_id: Optional[UUID] = None,
+        url: Optional[str] = None,
+        limit: int = 1000,
+        page_token: Optional[ListedOriginPageToken] = None,
+        db=None,
+        cur=None,
+    ) -> PaginatedListedOriginList:
+        """Get information on the listed origins matching either the `url` or
+        `lister_id`, or both arguments.
+        """
+
+        query_filters: List[str] = []
+        query_params: List[Union[int, str, UUID, Tuple[UUID, str]]] = []
+
+        if lister_id:
+            query_filters.append("lister_id = %s")
+            query_params.append(lister_id)
+
+        if url is not None:
+            query_filters.append("url = %s")
+            query_params.append(url)
+
+        if page_token is not None:
+            query_filters.append("(lister_id, url) > %s")
+            # the typeshed annotation for tuple() is too strict.
+            query_params.append(tuple(page_token))  # type: ignore
+
+        query_params.append(limit)
+
+        select_cols = ", ".join(ListedOrigin.select_columns())
+        if query_filters:
+            where_clause = "where %s" % (" and ".join(query_filters))
+        else:
+            where_clause = ""
+
+        query = f"""SELECT {select_cols}
+                    from listed_origins
+                    {where_clause}
+                    ORDER BY lister_id, url
+                    LIMIT %s"""
+
+        cur.execute(query, tuple(query_params))
+        origins = [ListedOrigin(**d) for d in cur]
+
+        if len(origins) == limit:
+            page_token = (origins[-1].lister_id, origins[-1].url)
+        else:
+            page_token = None
+
+        return PaginatedListedOriginList(origins, page_token)
+
     task_create_keys = [
         "type",
         "arguments",
         "next_run",
         "policy",
         "status",
         "retries_left",
         "priority",
     ]
     task_keys = task_create_keys + ["id", "current_interval"]
 
     @db_transaction()
     def create_tasks(self, tasks, policy="recurring", db=None, cur=None):
         """Create new tasks.
 
         Args:
             tasks (list): each task is a dictionary with the following keys:
 
                 - type (str): the task type
                 - arguments (dict): the arguments for the task runner, keys:
 
                       - args (list of str): arguments
                       - kwargs (dict str -> str): keyword arguments
 
                 - next_run (datetime.datetime): the next scheduled run for the
                   task
 
         Returns:
             a list of created tasks.
 
         """
         cur.execute("select swh_scheduler_mktemp_task()")
         db.copy_to(
             tasks,
             "tmp_task",
             self.task_create_keys,
             default_values={"policy": policy, "status": "next_run_not_scheduled"},
             cur=cur,
         )
         query = format_query(
             "select {keys} from swh_scheduler_create_tasks_from_temp()", self.task_keys,
         )
         cur.execute(query)
         return cur.fetchall()
 
     @db_transaction()
     def set_status_tasks(
         self, task_ids, status="disabled", next_run=None, db=None, cur=None
     ):
         """Set the tasks' status whose ids are listed.
 
         If given, also set the next_run date.
         """
         if not task_ids:
             return
         query = ["UPDATE task SET status = %s"]
         args = [status]
         if next_run:
             query.append(", next_run = %s")
             args.append(next_run)
         query.append(" WHERE id IN %s")
         args.append(tuple(task_ids))
 
         cur.execute("".join(query), args)
 
     @db_transaction()
     def disable_tasks(self, task_ids, db=None, cur=None):
         """Disable the tasks whose ids are listed."""
         return self.set_status_tasks(task_ids, db=db, cur=cur)
 
     @db_transaction()
     def search_tasks(
         self,
         task_id=None,
         task_type=None,
         status=None,
         priority=None,
         policy=None,
         before=None,
         after=None,
         limit=None,
         db=None,
         cur=None,
     ):
         """Search tasks from selected criterions"""
         where = []
         args = []
 
         if task_id:
             if isinstance(task_id, (str, int)):
                 where.append("id = %s")
             else:
                 where.append("id in %s")
                 task_id = tuple(task_id)
             args.append(task_id)
         if task_type:
             if isinstance(task_type, str):
                 where.append("type = %s")
             else:
                 where.append("type in %s")
                 task_type = tuple(task_type)
             args.append(task_type)
         if status:
             if isinstance(status, str):
                 where.append("status = %s")
             else:
                 where.append("status in %s")
                 status = tuple(status)
             args.append(status)
         if priority:
             if isinstance(priority, str):
                 where.append("priority = %s")
             else:
                 priority = tuple(priority)
                 where.append("priority in %s")
             args.append(priority)
         if policy:
             where.append("policy = %s")
             args.append(policy)
         if before:
             where.append("next_run <= %s")
             args.append(before)
         if after:
             where.append("next_run >= %s")
             args.append(after)
 
         query = "select * from task"
         if where:
             query += " where " + " and ".join(where)
         if limit:
             query += " limit %s :: bigint"
             args.append(limit)
         cur.execute(query, args)
         return cur.fetchall()
 
     @db_transaction()
     def get_tasks(self, task_ids, db=None, cur=None):
         """Retrieve the info of tasks whose ids are listed."""
         query = format_query("select {keys} from task where id in %s", self.task_keys)
         cur.execute(query, (tuple(task_ids),))
         return cur.fetchall()
 
     @db_transaction()
     def peek_ready_tasks(
         self,
         task_type,
         timestamp=None,
         num_tasks=None,
         num_tasks_priority=None,
         db=None,
         cur=None,
     ):
         """Fetch the list of ready tasks
 
         Args:
             task_type (str): filtering task per their type
             timestamp (datetime.datetime): peek tasks that need to be executed
                 before that timestamp
             num_tasks (int): only peek at num_tasks tasks (with no priority)
             num_tasks_priority (int): only peek at num_tasks_priority
                                       tasks (with priority)
 
         Returns:
             a list of tasks
 
         """
         if timestamp is None:
             timestamp = utcnow()
 
         cur.execute(
             """select * from swh_scheduler_peek_ready_tasks(
                 %s, %s, %s :: bigint, %s :: bigint)""",
             (task_type, timestamp, num_tasks, num_tasks_priority),
         )
         logger.debug("PEEK %s => %s" % (task_type, cur.rowcount))
         return cur.fetchall()
 
     @db_transaction()
     def grab_ready_tasks(
         self,
         task_type,
         timestamp=None,
         num_tasks=None,
         num_tasks_priority=None,
         db=None,
         cur=None,
     ):
         """Fetch the list of ready tasks, and mark them as scheduled
 
         Args:
             task_type (str): filtering task per their type
             timestamp (datetime.datetime): grab tasks that need to be executed
                 before that timestamp
             num_tasks (int): only grab num_tasks tasks (with no priority)
             num_tasks_priority (int): only grab oneshot num_tasks tasks (with
                                       priorities)
 
         Returns:
             a list of tasks
 
         """
         if timestamp is None:
             timestamp = utcnow()
         cur.execute(
             """select * from swh_scheduler_grab_ready_tasks(
                  %s, %s, %s :: bigint, %s :: bigint)""",
             (task_type, timestamp, num_tasks, num_tasks_priority),
         )
         logger.debug("GRAB %s => %s" % (task_type, cur.rowcount))
         return cur.fetchall()
 
     task_run_create_keys = ["task", "backend_id", "scheduled", "metadata"]
 
     @db_transaction()
     def schedule_task_run(
         self, task_id, backend_id, metadata=None, timestamp=None, db=None, cur=None
     ):
         """Mark a given task as scheduled, adding a task_run entry in the database.
 
         Args:
             task_id (int): the identifier for the task being scheduled
             backend_id (str): the identifier of the job in the backend
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             a fresh task_run entry
 
         """
 
         if metadata is None:
             metadata = {}
 
         if timestamp is None:
             timestamp = utcnow()
 
         cur.execute(
             "select * from swh_scheduler_schedule_task_run(%s, %s, %s, %s)",
             (task_id, backend_id, metadata, timestamp),
         )
 
         return cur.fetchone()
 
     @db_transaction()
     def mass_schedule_task_runs(self, task_runs, db=None, cur=None):
         """Schedule a bunch of task runs.
 
         Args:
             task_runs (list): a list of dicts with keys:
 
                 - task (int): the identifier for the task being scheduled
                 - backend_id (str): the identifier of the job in the backend
                 - metadata (dict): metadata to add to the task_run entry
                 - scheduled (datetime.datetime): the instant the event occurred
 
         Returns:
             None
         """
         cur.execute("select swh_scheduler_mktemp_task_run()")
         db.copy_to(task_runs, "tmp_task_run", self.task_run_create_keys, cur=cur)
         cur.execute("select swh_scheduler_schedule_task_run_from_temp()")
 
     @db_transaction()
     def start_task_run(
         self, backend_id, metadata=None, timestamp=None, db=None, cur=None
     ):
         """Mark a given task as started, updating the corresponding task_run
            entry in the database.
 
         Args:
             backend_id (str): the identifier of the job in the backend
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             the updated task_run entry
 
         """
 
         if metadata is None:
             metadata = {}
 
         if timestamp is None:
             timestamp = utcnow()
 
         cur.execute(
             "select * from swh_scheduler_start_task_run(%s, %s, %s)",
             (backend_id, metadata, timestamp),
         )
 
         return cur.fetchone()
 
     @db_transaction()
     def end_task_run(
         self,
         backend_id,
         status,
         metadata=None,
         timestamp=None,
         result=None,
         db=None,
         cur=None,
     ):
         """Mark a given task as ended, updating the corresponding task_run entry in the
         database.
 
         Args:
             backend_id (str): the identifier of the job in the backend
             status (str): how the task ended; one of: 'eventful', 'uneventful',
                 'failed'
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             the updated task_run entry
 
         """
 
         if metadata is None:
             metadata = {}
 
         if timestamp is None:
             timestamp = utcnow()
 
         cur.execute(
             "select * from swh_scheduler_end_task_run(%s, %s, %s, %s)",
             (backend_id, status, metadata, timestamp),
         )
         return cur.fetchone()
 
     @db_transaction()
     def filter_task_to_archive(
         self,
         after_ts: str,
         before_ts: str,
         limit: int = 10,
         page_token: Optional[str] = None,
         db=None,
         cur=None,
     ) -> Dict[str, Any]:
         """Compute the tasks to archive within the datetime interval
         [after_ts, before_ts[. The method returns a paginated result.
 
         Returns:
             dict with the following keys:
               - **next_page_token**: opaque token to be used as
                 `page_token` to retrieve the next page of result. If absent,
                 there is no more pages to gather.
               - **tasks**: list of task dictionaries with the following keys:
 
                     **id** (str): origin task id
                     **started** (Optional[datetime]): started date
                     **scheduled** (datetime): scheduled date
                     **arguments** (json dict): task's arguments
                     ...
 
         """
         assert not page_token or isinstance(page_token, str)
         last_id = -1 if page_token is None else int(page_token)
         tasks = []
         cur.execute(
             "select * from swh_scheduler_task_to_archive(%s, %s, %s, %s)",
             (after_ts, before_ts, last_id, limit + 1),
         )
         for row in cur:
             task = dict(row)
             # nested type index does not accept bare values
             # transform it as a dict to comply with this
             task["arguments"]["args"] = {
                 i: v for i, v in enumerate(task["arguments"]["args"])
             }
             kwargs = task["arguments"]["kwargs"]
             task["arguments"]["kwargs"] = json.dumps(kwargs)
             tasks.append(task)
 
         if len(tasks) >= limit + 1:  # remains data, add pagination information
             result = {
                 "tasks": tasks[:limit],
                 "next_page_token": str(tasks[-1]["task_id"]),
             }
         else:
             result = {"tasks": tasks}
 
         return result
 
     @db_transaction()
     def delete_archived_tasks(self, task_ids, db=None, cur=None):
         """Delete archived tasks as much as possible. Only the task_ids whose
            complete associated task_run have been cleaned up will be.
 
         """
         _task_ids = _task_run_ids = []
         for task_id in task_ids:
             _task_ids.append(task_id["task_id"])
             _task_run_ids.append(task_id["task_run_id"])
 
         cur.execute(
             "select * from swh_scheduler_delete_archived_tasks(%s, %s)",
             (_task_ids, _task_run_ids),
         )
 
     task_run_keys = [
         "id",
         "task",
         "backend_id",
         "scheduled",
         "started",
         "ended",
         "metadata",
         "status",
     ]
 
     @db_transaction()
     def get_task_runs(self, task_ids, limit=None, db=None, cur=None):
         """Search task run for a task id"""
         where = []
         args = []
 
         if task_ids:
             if isinstance(task_ids, (str, int)):
                 where.append("task = %s")
             else:
                 where.append("task in %s")
                 task_ids = tuple(task_ids)
             args.append(task_ids)
         else:
             return ()
 
         query = "select * from task_run where " + " and ".join(where)
         if limit:
             query += " limit %s :: bigint"
             args.append(limit)
         cur.execute(query, args)
         return cur.fetchall()
 
     @db_transaction()
     def get_priority_ratios(self, db=None, cur=None):
         cur.execute("select id, ratio from priority_ratio")
         return {row["id"]: row["ratio"] for row in cur.fetchall()}
diff --git a/swh/scheduler/interface.py b/swh/scheduler/interface.py
index 0ff5311..69787ea 100644
--- a/swh/scheduler/interface.py
+++ b/swh/scheduler/interface.py
@@ -1,290 +1,312 @@
 # Copyright (C) 2015-2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from typing import Any, Dict, Iterable, List, Optional
+from uuid import UUID
 
 from swh.core.api import remote_api_endpoint
 
-from swh.scheduler.model import ListedOrigin, Lister
+from swh.scheduler.model import (
+    ListedOrigin,
+    ListedOriginPageToken,
+    Lister,
+    PaginatedListedOriginList,
+)
 
 
 class SchedulerInterface:
     @remote_api_endpoint("task_type/create")
     def create_task_type(self, task_type):
         """Create a new task type ready for scheduling.
 
         Args:
             task_type (dict): a dictionary with the following keys:
 
                 - type (str): an identifier for the task type
                 - description (str): a human-readable description of what the
                   task does
                 - backend_name (str): the name of the task in the
                   job-scheduling backend
                 - default_interval (datetime.timedelta): the default interval
                   between two task runs
                 - min_interval (datetime.timedelta): the minimum interval
                   between two task runs
                 - max_interval (datetime.timedelta): the maximum interval
                   between two task runs
                 - backoff_factor (float): the factor by which the interval
                   changes at each run
                 - max_queue_length (int): the maximum length of the task queue
                   for this task type
 
         """
         ...
 
     @remote_api_endpoint("task_type/get")
     def get_task_type(self, task_type_name):
         """Retrieve the task type with id task_type_name"""
         ...
 
     @remote_api_endpoint("task_type/get_all")
     def get_task_types(self):
         """Retrieve all registered task types"""
         ...
 
     @remote_api_endpoint("task/create")
     def create_tasks(self, tasks, policy="recurring"):
         """Create new tasks.
 
         Args:
             tasks (list): each task is a dictionary with the following keys:
 
                 - type (str): the task type
                 - arguments (dict): the arguments for the task runner, keys:
 
                       - args (list of str): arguments
                       - kwargs (dict str -> str): keyword arguments
 
                 - next_run (datetime.datetime): the next scheduled run for the
                   task
 
         Returns:
             a list of created tasks.
 
         """
         ...
 
     @remote_api_endpoint("task/set_status")
     def set_status_tasks(self, task_ids, status="disabled", next_run=None):
         """Set the tasks' status whose ids are listed.
 
         If given, also set the next_run date.
         """
         ...
 
     @remote_api_endpoint("task/disable")
     def disable_tasks(self, task_ids):
         """Disable the tasks whose ids are listed."""
         ...
 
     @remote_api_endpoint("task/search")
     def search_tasks(
         self,
         task_id=None,
         task_type=None,
         status=None,
         priority=None,
         policy=None,
         before=None,
         after=None,
         limit=None,
     ):
         """Search tasks from selected criterions"""
         ...
 
     @remote_api_endpoint("task/get")
     def get_tasks(self, task_ids):
         """Retrieve the info of tasks whose ids are listed."""
         ...
 
     @remote_api_endpoint("task/peek_ready")
     def peek_ready_tasks(
         self, task_type, timestamp=None, num_tasks=None, num_tasks_priority=None,
     ):
         """Fetch the list of ready tasks
 
         Args:
             task_type (str): filtering task per their type
             timestamp (datetime.datetime): peek tasks that need to be executed
                 before that timestamp
             num_tasks (int): only peek at num_tasks tasks (with no priority)
             num_tasks_priority (int): only peek at num_tasks_priority
                                       tasks (with priority)
 
         Returns:
             a list of tasks
 
         """
         ...
 
     @remote_api_endpoint("task/grab_ready")
     def grab_ready_tasks(
         self, task_type, timestamp=None, num_tasks=None, num_tasks_priority=None,
     ):
         """Fetch the list of ready tasks, and mark them as scheduled
 
         Args:
             task_type (str): filtering task per their type
             timestamp (datetime.datetime): grab tasks that need to be executed
                 before that timestamp
             num_tasks (int): only grab num_tasks tasks (with no priority)
             num_tasks_priority (int): only grab oneshot num_tasks tasks (with
                                       priorities)
 
         Returns:
             a list of tasks
 
         """
         ...
 
     @remote_api_endpoint("task_run/schedule_one")
     def schedule_task_run(self, task_id, backend_id, metadata=None, timestamp=None):
         """Mark a given task as scheduled, adding a task_run entry in the database.
 
         Args:
             task_id (int): the identifier for the task being scheduled
             backend_id (str): the identifier of the job in the backend
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             a fresh task_run entry
 
         """
         ...
 
     @remote_api_endpoint("task_run/schedule")
     def mass_schedule_task_runs(self, task_runs):
         """Schedule a bunch of task runs.
 
         Args:
             task_runs (list): a list of dicts with keys:
 
                 - task (int): the identifier for the task being scheduled
                 - backend_id (str): the identifier of the job in the backend
                 - metadata (dict): metadata to add to the task_run entry
                 - scheduled (datetime.datetime): the instant the event occurred
 
         Returns:
             None
         """
         ...
 
     @remote_api_endpoint("task_run/start")
     def start_task_run(self, backend_id, metadata=None, timestamp=None):
         """Mark a given task as started, updating the corresponding task_run
            entry in the database.
 
         Args:
             backend_id (str): the identifier of the job in the backend
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             the updated task_run entry
 
         """
         ...
 
     @remote_api_endpoint("task_run/end")
     def end_task_run(
         self, backend_id, status, metadata=None, timestamp=None, result=None,
     ):
         """Mark a given task as ended, updating the corresponding task_run entry in the
         database.
 
         Args:
             backend_id (str): the identifier of the job in the backend
             status (str): how the task ended; one of: 'eventful', 'uneventful',
                 'failed'
             metadata (dict): metadata to add to the task_run entry
             timestamp (datetime.datetime): the instant the event occurred
 
         Returns:
             the updated task_run entry
 
         """
         ...
 
     @remote_api_endpoint("task/filter_for_archive")
     def filter_task_to_archive(
         self,
         after_ts: str,
         before_ts: str,
         limit: int = 10,
         page_token: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Compute the tasks to archive within the datetime interval
         [after_ts, before_ts[. The method returns a paginated result.
 
         Returns:
             dict with the following keys:
               - **next_page_token**: opaque token to be used as
                 `page_token` to retrieve the next page of result. If absent,
                 there is no more pages to gather.
               - **tasks**: list of task dictionaries with the following keys:
 
                     **id** (str): origin task id
                     **started** (Optional[datetime]): started date
                     **scheduled** (datetime): scheduled date
                     **arguments** (json dict): task's arguments
                     ...
 
         """
         ...
 
     @remote_api_endpoint("task/delete_archived")
     def delete_archived_tasks(self, task_ids):
         """Delete archived tasks as much as possible. Only the task_ids whose
            complete associated task_run have been cleaned up will be.
 
         """
         ...
 
     @remote_api_endpoint("task_run/get")
     def get_task_runs(self, task_ids, limit=None):
         """Search task run for a task id"""
         ...
 
     @remote_api_endpoint("lister/get_or_create")
     def get_or_create_lister(
         self, name: str, instance_name: Optional[str] = None
     ) -> Lister:
         """Retrieve information about the given instance of the lister from the
         database, or create the entry if it did not exist.
         """
         ...
 
     @remote_api_endpoint("lister/update")
     def update_lister(self, lister: Lister) -> Lister:
         """Update the state for the given lister instance in the database.
 
         Returns:
             a new Lister object, with all fields updated from the database
 
         Raises:
             StaleData if the `updated` timestamp for the lister instance in
         database doesn't match the one passed by the user.
         """
         ...
 
     @remote_api_endpoint("origins/record")
     def record_listed_origins(
         self, listed_origins: Iterable[ListedOrigin]
     ) -> List[ListedOrigin]:
         """Record a set of origins that a lister has listed.
 
         This performs an "upsert": origins with the same (lister_id, url,
         visit_type) values are updated with new values for
         extra_loader_arguments, last_update and last_seen.
         """
         ...
 
+    @remote_api_endpoint("origins/get")
+    def get_listed_origins(
+        self,
+        lister_id: Optional[UUID] = None,
+        url: Optional[str] = None,
+        limit: int = 1000,
+        page_token: Optional[ListedOriginPageToken] = None,
+    ) -> PaginatedListedOriginList:
+        """Get information on the listed origins matching either the `url` or
+        `lister_id`, or both arguments.
+
+        Use the `limit` and `page_token` arguments for continuation. The next
+        page token, if any, is returned in the PaginatedListedOriginList object.
+        """
+        ...
+
     @remote_api_endpoint("priority_ratios/get")
     def get_priority_ratios(self):
         ...
diff --git a/swh/scheduler/model.py b/swh/scheduler/model.py
index 211e769..904aaf9 100644
--- a/swh/scheduler/model.py
+++ b/swh/scheduler/model.py
@@ -1,162 +1,193 @@
 # Copyright (C) 2020  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import datetime
 from uuid import UUID
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import attr
 import attr.converters
 from attrs_strict import type_validator
 
 
 @attr.s
 class BaseSchedulerModel:
     """Base class for database-backed objects.
 
     These database-backed objects are defined through attrs-based attributes
     that match the columns of the database 1:1. This is a (very) lightweight
     ORM.
 
     These attrs-based attributes have metadata specific to the functionality
     expected from these fields in the database:
 
      - `primary_key`: the column is a primary key; it should be filtered out
        when doing an `update` of the object
      - `auto_primary_key`: the column is a primary key, which is automatically handled
        by the database. It will not be inserted to. This must be matched with a
        database-side default value.
      - `auto_now_add`: the column is a timestamp that is set to the current time when
        the object is inserted, and never updated afterwards. This must be matched with
        a database-side default value.
      - `auto_now`: the column is a timestamp that is set to the current time when
        the object is inserted or updated.
 
     """
 
     _pk_cols: Optional[Tuple[str, ...]] = None
     _select_cols: Optional[Tuple[str, ...]] = None
     _insert_cols_and_metavars: Optional[Tuple[Tuple[str, ...], Tuple[str, ...]]] = None
 
     @classmethod
     def primary_key_columns(cls) -> Tuple[str, ...]:
         """Get the primary key columns for this object type"""
         if cls._pk_cols is None:
             columns: List[str] = []
             for field in attr.fields(cls):
                 if any(
                     field.metadata.get(flag)
                     for flag in ("auto_primary_key", "primary_key")
                 ):
                     columns.append(field.name)
             cls._pk_cols = tuple(sorted(columns))
 
         return cls._pk_cols
 
     @classmethod
     def select_columns(cls) -> Tuple[str, ...]:
         """Get all the database columns needed for a `select` on this object type"""
         if cls._select_cols is None:
             columns: List[str] = []
             for field in attr.fields(cls):
                 columns.append(field.name)
             cls._select_cols = tuple(sorted(columns))
 
         return cls._select_cols
 
     @classmethod
     def insert_columns_and_metavars(cls) -> Tuple[Tuple[str, ...], Tuple[str, ...]]:
         """Get the database columns and metavars needed for an `insert` or `update` on
            this object type.
 
         This implements support for the `auto_*` field metadata attributes.
         """
         if cls._insert_cols_and_metavars is None:
             zipped_cols_and_metavars: List[Tuple[str, str]] = []
 
             for field in attr.fields(cls):
                 if any(
                     field.metadata.get(flag)
                     for flag in ("auto_now_add", "auto_primary_key")
                 ):
                     continue
                 elif field.metadata.get("auto_now"):
                     zipped_cols_and_metavars.append((field.name, "now()"))
                 else:
                     zipped_cols_and_metavars.append((field.name, f"%({field.name})s"))
 
             zipped_cols_and_metavars.sort()
 
             cols, metavars = zip(*zipped_cols_and_metavars)
             cls._insert_cols_and_metavars = cols, metavars
 
         return cls._insert_cols_and_metavars
 
 
 @attr.s
 class Lister(BaseSchedulerModel):
     name = attr.ib(type=str, validator=[type_validator()])
     instance_name = attr.ib(type=str, validator=[type_validator()])
 
     # Populated by database
     id = attr.ib(
         type=Optional[UUID],
         validator=type_validator(),
         default=None,
         metadata={"auto_primary_key": True},
     )
 
     current_state = attr.ib(
         type=Dict[str, Any], validator=[type_validator()], factory=dict
     )
     created = attr.ib(
         type=Optional[datetime.datetime],
         validator=[type_validator()],
         default=None,
         metadata={"auto_now_add": True},
     )
     updated = attr.ib(
         type=Optional[datetime.datetime],
         validator=[type_validator()],
         default=None,
         metadata={"auto_now": True},
     )
 
 
 @attr.s
 class ListedOrigin(BaseSchedulerModel):
     """Basic information about a listed origin, output by a lister"""
 
     lister_id = attr.ib(
         type=UUID, validator=[type_validator()], metadata={"primary_key": True}
     )
     url = attr.ib(
         type=str, validator=[type_validator()], metadata={"primary_key": True}
     )
     visit_type = attr.ib(
         type=str, validator=[type_validator()], metadata={"primary_key": True}
     )
     extra_loader_arguments = attr.ib(
         type=Dict[str, str], validator=[type_validator()], factory=dict
     )
 
     last_update = attr.ib(
         type=Optional[datetime.datetime], validator=[type_validator()], default=None,
     )
 
     enabled = attr.ib(type=bool, validator=[type_validator()], default=True)
 
     first_seen = attr.ib(
         type=Optional[datetime.datetime],
         validator=[type_validator()],
         default=None,
         metadata={"auto_now_add": True},
     )
     last_seen = attr.ib(
         type=Optional[datetime.datetime],
         validator=[type_validator()],
         default=None,
         metadata={"auto_now": True},
     )
+
+
+ListedOriginPageToken = Tuple[UUID, str]
+
+
+def convert_listed_origin_page_token(
+    input: Union[None, ListedOriginPageToken, List[Union[UUID, str]]]
+) -> Optional[ListedOriginPageToken]:
+    if input is None:
+        return None
+
+    if isinstance(input, tuple):
+        return input
+
+    x, y = input
+    assert isinstance(x, UUID)
+    assert isinstance(y, str)
+    return (x, y)
+
+
+@attr.s
+class PaginatedListedOriginList(BaseSchedulerModel):
+    """A list of listed origins, with a continuation token"""
+
+    origins = attr.ib(type=List[ListedOrigin], validator=[type_validator()])
+    next_page_token = attr.ib(
+        type=Optional[ListedOriginPageToken],
+        validator=[type_validator()],
+        converter=convert_listed_origin_page_token,
+        default=None,
+    )
diff --git a/swh/scheduler/sql/30-swh-schema.sql b/swh/scheduler/sql/30-swh-schema.sql
index 0dfed9e..118744c 100644
--- a/swh/scheduler/sql/30-swh-schema.sql
+++ b/swh/scheduler/sql/30-swh-schema.sql
@@ -1,161 +1,161 @@
 create table dbversion
 (
   version     int primary key,
   release     timestamptz not null,
   description text not null
 );
 
 comment on table dbversion is 'Schema update tracking';
 comment on column dbversion.version is 'SQL schema version';
 comment on column dbversion.release is 'Version deployment timestamp';
 comment on column dbversion.description is 'Version description';
 
 insert into dbversion (version, release, description)
-       values (16, now(), 'Work In Progress');
+       values (17, now(), 'Work In Progress');
 
 create table task_type (
   type text primary key,
   description text not null,
   backend_name text not null,
   default_interval interval,
   min_interval interval,
   max_interval interval,
   backoff_factor float,
   max_queue_length bigint,
   num_retries bigint,
   retry_delay interval
 );
 
 comment on table task_type is 'Types of schedulable tasks';
 comment on column task_type.type is 'Short identifier for the task type';
 comment on column task_type.description is 'Human-readable task description';
 comment on column task_type.backend_name is 'Name of the task in the job-running backend';
 comment on column task_type.default_interval is 'Default interval for newly scheduled tasks';
 comment on column task_type.min_interval is 'Minimum interval between two runs of a task';
 comment on column task_type.max_interval is 'Maximum interval between two runs of a task';
 comment on column task_type.backoff_factor is 'Adjustment factor for the backoff between two task runs';
 comment on column task_type.max_queue_length is 'Maximum length of the queue for this type of tasks';
 comment on column task_type.num_retries is 'Default number of retries on transient failures';
 comment on column task_type.retry_delay is 'Retry delay for the task';
 
 create type task_status as enum ('next_run_not_scheduled', 'next_run_scheduled', 'completed', 'disabled');
 comment on type task_status is 'Status of a given task';
 
 create type task_policy as enum ('recurring', 'oneshot');
 comment on type task_policy is 'Recurrence policy of the given task';
 
 create type task_priority as enum('high', 'normal', 'low');
 comment on type task_priority is 'Priority of the given task';
 
 create table priority_ratio(
   id task_priority primary key,
   ratio float not null
 );
 
 comment on table priority_ratio is 'Oneshot task''s reading ratio per priority';
 comment on column priority_ratio.id is 'Task priority id';
 comment on column priority_ratio.ratio is 'Percentage of tasks to read per priority';
 
 insert into priority_ratio (id, ratio) values ('high', 0.5);
 insert into priority_ratio (id, ratio) values ('normal', 0.3);
 insert into priority_ratio (id, ratio) values ('low', 0.2);
 
 create table task (
   id bigserial primary key,
   type text not null references task_type(type),
   arguments jsonb not null,
   next_run timestamptz not null,
   current_interval interval,
   status task_status not null,
   policy task_policy not null default 'recurring',
   retries_left bigint not null default 0,
   priority task_priority references priority_ratio(id),
   check (policy <> 'recurring' or current_interval is not null)
 );
 
 comment on table task is 'Schedule of recurring tasks';
 comment on column task.arguments is 'Arguments passed to the underlying job scheduler. '
                                     'Contains two keys, ''args'' (list) and ''kwargs'' (object).';
 comment on column task.next_run is 'The next run of this task should be run on or after that time';
 comment on column task.current_interval is 'The interval between two runs of this task, '
                                            'taking into account the backoff factor';
 comment on column task.policy is 'Whether the task is one-shot or recurring';
 comment on column task.retries_left is 'The number of "short delay" retries of the task in case of '
                                        'transient failure';
 comment on column task.priority is 'Policy of the given task';
 comment on column task.id is 'Task Identifier';
 comment on column task.type is 'References task_type table';
 comment on column task.status is 'Task status (''next_run_not_scheduled'', ''next_run_scheduled'', ''completed'', ''disabled'')';
 
 create type task_run_status as enum ('scheduled', 'started', 'eventful', 'uneventful', 'failed', 'permfailed', 'lost');
 comment on type task_run_status is 'Status of a given task run';
 
 create table task_run (
   id bigserial primary key,
   task bigint not null references task(id),
   backend_id text,
   scheduled timestamptz,
   started timestamptz,
   ended timestamptz,
   metadata jsonb,
   status task_run_status not null default 'scheduled'
 );
 comment on table task_run is 'History of task runs sent to the job-running backend';
 comment on column task_run.backend_id is 'id of the task run in the job-running backend';
 comment on column task_run.metadata is 'Useful metadata for the given task run. '
                                        'For instance, the worker that took on the job, '
                                        'or the logs for the run.';
 comment on column task_run.id is 'Task run identifier';
 comment on column task_run.task is 'References task table';
 comment on column task_run.scheduled is 'Scheduled run time for task';
 comment on column task_run.started is 'Task starting time';
 comment on column task_run.ended is 'Task ending time';
 
 create table if not exists listers (
   id uuid primary key default uuid_generate_v4(),
   name text not null,
   instance_name text not null,
   created timestamptz not null default now(),  -- auto_now_add in the model
   current_state jsonb not null,
   updated timestamptz not null
 );
 
 comment on table listers is 'Lister instances known to the origin visit scheduler';
 comment on column listers.name is 'Name of the lister (e.g. github, gitlab, debian, ...)';
 comment on column listers.instance_name is 'Name of the current instance of this lister (e.g. framagit, bitbucket, ...)';
 comment on column listers.created is 'Timestamp at which the lister was originally created';
 comment on column listers.current_state is 'Known current state of this lister';
 comment on column listers.updated is 'Timestamp at which the lister state was last updated';
 
 
 create table if not exists listed_origins (
   -- Basic information
   lister_id uuid not null references listers(id),
   url text not null,
   visit_type text not null,
   extra_loader_arguments jsonb not null,
 
   -- Whether this origin still exists or not
   enabled boolean not null,
 
   -- time-based information
   first_seen timestamptz not null default now(),
   last_seen timestamptz not null,
 
   -- potentially provided by the lister
   last_update timestamptz,
 
   primary key (lister_id, url, visit_type)
 );
 
 comment on table listed_origins is 'Origins known to the origin visit scheduler';
 comment on column listed_origins.lister_id is 'Lister instance which owns this origin';
 comment on column listed_origins.url is 'URL of the origin listed';
 comment on column listed_origins.visit_type is 'Type of the visit which should be scheduled for the given url';
 comment on column listed_origins.extra_loader_arguments is 'Extra arguments that should be passed to the loader for this origin';
 
 comment on column listed_origins.enabled is 'Whether this origin has been seen during the last listing, and visits should be scheduled.';
 comment on column listed_origins.first_seen is 'Time at which the origin was first seen by a lister';
 comment on column listed_origins.last_seen is 'Time at which the origin was last seen by the lister';
 
 comment on column listed_origins.last_update is 'Time of the last update to the origin recorded by the remote';
diff --git a/swh/scheduler/sql/60-swh-indexes.sql b/swh/scheduler/sql/60-swh-indexes.sql
index 690541c..1812c3e 100644
--- a/swh/scheduler/sql/60-swh-indexes.sql
+++ b/swh/scheduler/sql/60-swh-indexes.sql
@@ -1,16 +1,19 @@
 create index on task(type);
 create index on task(next_run);
 
 -- used for quick equality checking
 create index on task using btree(type, md5(arguments::text));
 
 create index on task(priority);
 
 create index on task_run(task);
 create index on task_run(backend_id);
 
 create index task_run_id_asc_idx on task_run(task asc, started asc);
 
 
 -- lister schema
 create unique index on listers (name, instance_name);
+
+-- listed origins
+create index on listed_origins (url);
diff --git a/swh/scheduler/tests/test_api_client.py b/swh/scheduler/tests/test_api_client.py
index 6589ab0..d931ac0 100644
--- a/swh/scheduler/tests/test_api_client.py
+++ b/swh/scheduler/tests/test_api_client.py
@@ -1,74 +1,75 @@
 # Copyright (C) 2018  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import pytest
 from flask import url_for
 
 import swh.scheduler.api.server as server
 from swh.scheduler.api.client import RemoteScheduler
 from swh.scheduler.tests.test_scheduler import TestScheduler  # noqa
 
 # tests are executed using imported class (TestScheduler) using overloaded
 # swh_scheduler fixture below
 
 
 # the Flask app used as server in these tests
 @pytest.fixture
 def app(swh_db_scheduler):
     assert hasattr(server, "scheduler")
     server.scheduler = swh_db_scheduler
     yield server.app
 
 
 # the RPCClient class used as client used in these tests
 @pytest.fixture
 def swh_rpc_client_class():
     return RemoteScheduler
 
 
 @pytest.fixture
 def swh_scheduler(swh_rpc_client, app):
     yield swh_rpc_client
 
 
 def test_site_map(flask_app_client):
     sitemap = flask_app_client.get(url_for("site_map"))
     assert sitemap.headers["Content-Type"] == "application/json"
 
     rules = set(x["rule"] for x in sitemap.json)
     # we expect at least these rules
     expected_rules = set(
         "/" + rule
         for rule in (
             "lister/get_or_create",
             "lister/update",
+            "origins/get",
             "origins/record",
             "priority_ratios/get",
             "task/create",
             "task/delete_archived",
             "task/disable",
             "task/filter_for_archive",
             "task/get",
             "task/grab_ready",
             "task/peek_ready",
             "task/search",
             "task/set_status",
             "task_run/end",
             "task_run/get",
             "task_run/schedule",
             "task_run/schedule_one",
             "task_run/start",
             "task_type/create",
             "task_type/get",
             "task_type/get_all",
         )
     )
     assert rules == expected_rules
 
 
 def test_root(flask_app_client):
     root = flask_app_client.get("/")
     assert root.status_code == 200
     assert b"Software Heritage scheduler RPC server" in root.data
diff --git a/swh/scheduler/tests/test_scheduler.py b/swh/scheduler/tests/test_scheduler.py
index 6d58e8c..8ffa85e 100644
--- a/swh/scheduler/tests/test_scheduler.py
+++ b/swh/scheduler/tests/test_scheduler.py
@@ -1,678 +1,730 @@
 # Copyright (C) 2017-2019  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import copy
 import datetime
 import random
 import uuid
 
 from collections import defaultdict
 import inspect
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
 
 from arrow import utcnow
 import attr
 import pytest
 
 from swh.scheduler.exc import StaleData
 from swh.scheduler.interface import SchedulerInterface
+from swh.scheduler.model import ListedOrigin, ListedOriginPageToken
 
 from .common import tasks_from_template, TEMPLATES, TASK_TYPES, LISTERS
 
 
 def subdict(d, keys=None, excl=()):
     if keys is None:
         keys = [k for k in d.keys()]
     return {k: d[k] for k in keys if k not in excl}
 
 
 class TestScheduler:
     def test_interface(self, swh_scheduler):
         """Checks all methods of SchedulerInterface are implemented by this
         backend, and that they have the same signature."""
         # Create an instance of the protocol (which cannot be instantiated
         # directly, so this creates a subclass, then instantiates it)
         interface = type("_", (SchedulerInterface,), {})()
 
         assert "create_task_type" in dir(interface)
 
         missing_methods = []
 
         for meth_name in dir(interface):
             if meth_name.startswith("_"):
                 continue
             interface_meth = getattr(interface, meth_name)
             try:
                 concrete_meth = getattr(swh_scheduler, meth_name)
             except AttributeError:
                 if not getattr(interface_meth, "deprecated_endpoint", False):
                     # The backend is missing a (non-deprecated) endpoint
                     missing_methods.append(meth_name)
                 continue
 
             expected_signature = inspect.signature(interface_meth)
             actual_signature = inspect.signature(concrete_meth)
 
             assert expected_signature == actual_signature, meth_name
 
         assert missing_methods == []
 
     def test_get_priority_ratios(self, swh_scheduler):
         assert swh_scheduler.get_priority_ratios() == {
             "high": 0.5,
             "normal": 0.3,
             "low": 0.2,
         }
 
     def test_add_task_type(self, swh_scheduler):
         tt = TASK_TYPES["git"]
         swh_scheduler.create_task_type(tt)
         assert tt == swh_scheduler.get_task_type(tt["type"])
         tt2 = TASK_TYPES["hg"]
         swh_scheduler.create_task_type(tt2)
         assert tt == swh_scheduler.get_task_type(tt["type"])
         assert tt2 == swh_scheduler.get_task_type(tt2["type"])
 
     def test_create_task_type_idempotence(self, swh_scheduler):
         tt = TASK_TYPES["git"]
         swh_scheduler.create_task_type(tt)
         swh_scheduler.create_task_type(tt)
         assert tt == swh_scheduler.get_task_type(tt["type"])
 
     def test_get_task_types(self, swh_scheduler):
         tt, tt2 = TASK_TYPES["git"], TASK_TYPES["hg"]
         swh_scheduler.create_task_type(tt)
         swh_scheduler.create_task_type(tt2)
         actual_task_types = swh_scheduler.get_task_types()
         assert tt in actual_task_types
         assert tt2 in actual_task_types
 
     def test_create_tasks(self, swh_scheduler):
         priority_ratio = self._priority_ratio(swh_scheduler)
         self._create_task_types(swh_scheduler)
         num_tasks_priority = 100
         tasks_1 = tasks_from_template(TEMPLATES["git"], utcnow(), 100)
         tasks_2 = tasks_from_template(
             TEMPLATES["hg"],
             utcnow(),
             100,
             num_tasks_priority,
             priorities=priority_ratio,
         )
         tasks = tasks_1 + tasks_2
 
         # tasks are returned only once with their ids
         ret1 = swh_scheduler.create_tasks(tasks + tasks_1 + tasks_2)
         set_ret1 = set([t["id"] for t in ret1])
 
         # creating the same set result in the same ids
         ret = swh_scheduler.create_tasks(tasks)
         set_ret = set([t["id"] for t in ret])
 
         # Idempotence results
         assert set_ret == set_ret1
         assert len(ret) == len(ret1)
 
         ids = set()
         actual_priorities = defaultdict(int)
 
         for task, orig_task in zip(ret, tasks):
             task = copy.deepcopy(task)
             task_type = TASK_TYPES[orig_task["type"].split("-")[-1]]
             assert task["id"] not in ids
             assert task["status"] == "next_run_not_scheduled"
             assert task["current_interval"] == task_type["default_interval"]
             assert task["policy"] == orig_task.get("policy", "recurring")
             priority = task.get("priority")
             if priority:
                 actual_priorities[priority] += 1
 
             assert task["retries_left"] == (task_type["num_retries"] or 0)
             ids.add(task["id"])
             del task["id"]
             del task["status"]
             del task["current_interval"]
             del task["retries_left"]
             if "policy" not in orig_task:
                 del task["policy"]
             if "priority" not in orig_task:
                 del task["priority"]
                 assert task == orig_task
 
         assert dict(actual_priorities) == {
             priority: int(ratio * num_tasks_priority)
             for priority, ratio in priority_ratio.items()
         }
 
     def test_peek_ready_tasks_no_priority(self, swh_scheduler):
         self._create_task_types(swh_scheduler)
         t = utcnow()
         task_type = TEMPLATES["git"]["type"]
         tasks = tasks_from_template(TEMPLATES["git"], t, 100)
         random.shuffle(tasks)
         swh_scheduler.create_tasks(tasks)
 
         ready_tasks = swh_scheduler.peek_ready_tasks(task_type)
         assert len(ready_tasks) == len(tasks)
         for i in range(len(ready_tasks) - 1):
             assert ready_tasks[i]["next_run"] <= ready_tasks[i + 1]["next_run"]
 
         # Only get the first few ready tasks
         limit = random.randrange(5, 5 + len(tasks) // 2)
         ready_tasks_limited = swh_scheduler.peek_ready_tasks(task_type, num_tasks=limit)
 
         assert len(ready_tasks_limited) == limit
         assert ready_tasks_limited == ready_tasks[:limit]
 
         # Limit by timestamp
         max_ts = tasks[limit - 1]["next_run"]
         ready_tasks_timestamped = swh_scheduler.peek_ready_tasks(
             task_type, timestamp=max_ts
         )
 
         for ready_task in ready_tasks_timestamped:
             assert ready_task["next_run"] <= max_ts
 
         # Make sure we get proper behavior for the first ready tasks
         assert ready_tasks[: len(ready_tasks_timestamped)] == ready_tasks_timestamped
 
         # Limit by both
         ready_tasks_both = swh_scheduler.peek_ready_tasks(
             task_type, timestamp=max_ts, num_tasks=limit // 3
         )
         assert len(ready_tasks_both) <= limit // 3
         for ready_task in ready_tasks_both:
             assert ready_task["next_run"] <= max_ts
             assert ready_task in ready_tasks[: limit // 3]
 
     def _priority_ratio(self, swh_scheduler):
         return swh_scheduler.get_priority_ratios()
 
     def test_peek_ready_tasks_mixed_priorities(self, swh_scheduler):
         priority_ratio = self._priority_ratio(swh_scheduler)
         self._create_task_types(swh_scheduler)
         t = utcnow()
         task_type = TEMPLATES["git"]["type"]
         num_tasks_priority = 100
         num_tasks_no_priority = 100
         # Create tasks with and without priorities
         tasks = tasks_from_template(
             TEMPLATES["git"],
             t,
             num=num_tasks_no_priority,
             num_priority=num_tasks_priority,
             priorities=priority_ratio,
         )
 
         random.shuffle(tasks)
         swh_scheduler.create_tasks(tasks)
 
         # take all available tasks
         ready_tasks = swh_scheduler.peek_ready_tasks(task_type)
 
         assert len(ready_tasks) == len(tasks)
         assert num_tasks_priority + num_tasks_no_priority == len(ready_tasks)
 
         count_tasks_per_priority = defaultdict(int)
         for task in ready_tasks:
             priority = task.get("priority")
             if priority:
                 count_tasks_per_priority[priority] += 1
 
         assert dict(count_tasks_per_priority) == {
             priority: int(ratio * num_tasks_priority)
             for priority, ratio in priority_ratio.items()
         }
 
         # Only get some ready tasks
         num_tasks = random.randrange(5, 5 + num_tasks_no_priority // 2)
         num_tasks_priority = random.randrange(5, num_tasks_priority // 2)
         ready_tasks_limited = swh_scheduler.peek_ready_tasks(
             task_type, num_tasks=num_tasks, num_tasks_priority=num_tasks_priority
         )
 
         count_tasks_per_priority = defaultdict(int)
         for task in ready_tasks_limited:
             priority = task.get("priority")
             count_tasks_per_priority[priority] += 1
 
         import math
 
         for priority, ratio in priority_ratio.items():
             expected_count = math.ceil(ratio * num_tasks_priority)
             actual_prio = count_tasks_per_priority[priority]
             assert actual_prio == expected_count or actual_prio == expected_count + 1
 
         assert count_tasks_per_priority[None] == num_tasks
 
     def test_grab_ready_tasks(self, swh_scheduler):
         priority_ratio = self._priority_ratio(swh_scheduler)
         self._create_task_types(swh_scheduler)
         t = utcnow()
         task_type = TEMPLATES["git"]["type"]
         num_tasks_priority = 100
         num_tasks_no_priority = 100
         # Create tasks with and without priorities
         tasks = tasks_from_template(
             TEMPLATES["git"],
             t,
             num=num_tasks_no_priority,
             num_priority=num_tasks_priority,
             priorities=priority_ratio,
         )
         random.shuffle(tasks)
         swh_scheduler.create_tasks(tasks)
 
         first_ready_tasks = swh_scheduler.peek_ready_tasks(
             task_type, num_tasks=10, num_tasks_priority=10
         )
         grabbed_tasks = swh_scheduler.grab_ready_tasks(
             task_type, num_tasks=10, num_tasks_priority=10
         )
 
         for peeked, grabbed in zip(first_ready_tasks, grabbed_tasks):
             assert peeked["status"] == "next_run_not_scheduled"
             del peeked["status"]
             assert grabbed["status"] == "next_run_scheduled"
             del grabbed["status"]
             assert peeked == grabbed
             assert peeked["priority"] == grabbed["priority"]
 
     def test_get_tasks(self, swh_scheduler):
         self._create_task_types(swh_scheduler)
         t = utcnow()
         tasks = tasks_from_template(TEMPLATES["git"], t, 100)
         tasks = swh_scheduler.create_tasks(tasks)
         random.shuffle(tasks)
         while len(tasks) > 1:
             length = random.randrange(1, len(tasks))
             cur_tasks = sorted(tasks[:length], key=lambda x: x["id"])
             tasks[:length] = []
 
             ret = swh_scheduler.get_tasks(task["id"] for task in cur_tasks)
             # result is not guaranteed to be sorted
             ret.sort(key=lambda x: x["id"])
             assert ret == cur_tasks
 
     def test_search_tasks(self, swh_scheduler):
         def make_real_dicts(lst):
             """RealDictRow is not a real dict."""
             return [dict(d.items()) for d in lst]
 
         self._create_task_types(swh_scheduler)
         t = utcnow()
         tasks = tasks_from_template(TEMPLATES["git"], t, 100)
         tasks = swh_scheduler.create_tasks(tasks)
         assert make_real_dicts(swh_scheduler.search_tasks()) == make_real_dicts(tasks)
 
     def assert_filtered_task_ok(
         self, task: Dict[str, Any], after: datetime.datetime, before: datetime.datetime
     ) -> None:
         """Ensure filtered tasks have the right expected properties
            (within the range, recurring disabled, etc..)
 
         """
         started = task["started"]
         date = started if started is not None else task["scheduled"]
         assert after <= date and date <= before
         if task["task_policy"] == "oneshot":
             assert task["task_status"] in ["completed", "disabled"]
         if task["task_policy"] == "recurring":
             assert task["task_status"] in ["disabled"]
 
     def test_filter_task_to_archive(self, swh_scheduler):
         """Filtering only list disabled recurring or completed oneshot tasks
 
         """
         self._create_task_types(swh_scheduler)
         _time = utcnow()
         recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
         oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
         total_tasks = len(recurring) + len(oneshots)
 
         # simulate scheduling tasks
         pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
         backend_tasks = [
             {
                 "task": task["id"],
                 "backend_id": str(uuid.uuid4()),
                 "scheduled": utcnow(),
             }
             for task in pending_tasks
         ]
         swh_scheduler.mass_schedule_task_runs(backend_tasks)
 
         # we simulate the task are being done
         _tasks = []
         for task in backend_tasks:
             t = swh_scheduler.end_task_run(task["backend_id"], status="eventful")
             _tasks.append(t)
 
         # Randomly update task's status per policy
         status_per_policy = {"recurring": 0, "oneshot": 0}
         status_choice = {
             # policy: [tuple (1-for-filtering, 'associated-status')]
             "recurring": [
                 (1, "disabled"),
                 (0, "completed"),
                 (0, "next_run_not_scheduled"),
             ],
             "oneshot": [
                 (0, "next_run_not_scheduled"),
                 (1, "disabled"),
                 (1, "completed"),
             ],
         }
 
         tasks_to_update = defaultdict(list)
         _task_ids = defaultdict(list)
         # randomize 'disabling' recurring task or 'complete' oneshot task
         for task in pending_tasks:
             policy = task["policy"]
             _task_ids[policy].append(task["id"])
             status = random.choice(status_choice[policy])
             if status[0] != 1:
                 continue
             # elected for filtering
             status_per_policy[policy] += status[0]
             tasks_to_update[policy].append(task["id"])
 
         swh_scheduler.disable_tasks(tasks_to_update["recurring"])
         # hack: change the status to something else than completed/disabled
         swh_scheduler.set_status_tasks(
             _task_ids["oneshot"], status="next_run_not_scheduled"
         )
         # complete the tasks to update
         swh_scheduler.set_status_tasks(tasks_to_update["oneshot"], status="completed")
 
         total_tasks_filtered = (
             status_per_policy["recurring"] + status_per_policy["oneshot"]
         )
 
         # no pagination scenario
 
         # retrieve tasks to archive
         after = _time.shift(days=-1)
         after_ts = after.format("YYYY-MM-DD")
         before = utcnow().shift(days=1)
         before_ts = before.format("YYYY-MM-DD")
         tasks_result = swh_scheduler.filter_task_to_archive(
             after_ts=after_ts, before_ts=before_ts, limit=total_tasks
         )
 
         tasks_to_archive = tasks_result["tasks"]
 
         assert len(tasks_to_archive) == total_tasks_filtered
         assert tasks_result.get("next_page_token") is None
 
         actual_filtered_per_status = {"recurring": 0, "oneshot": 0}
         for task in tasks_to_archive:
             self.assert_filtered_task_ok(task, after, before)
             actual_filtered_per_status[task["task_policy"]] += 1
 
         assert actual_filtered_per_status == status_per_policy
 
         # pagination scenario
 
         nb_tasks = 3
         tasks_result = swh_scheduler.filter_task_to_archive(
             after_ts=after_ts, before_ts=before_ts, limit=nb_tasks
         )
 
         tasks_to_archive2 = tasks_result["tasks"]
 
         assert len(tasks_to_archive2) == nb_tasks
         next_page_token = tasks_result["next_page_token"]
         assert next_page_token is not None
 
         all_tasks = tasks_to_archive2
         while next_page_token is not None:  # Retrieve paginated results
             tasks_result = swh_scheduler.filter_task_to_archive(
                 after_ts=after_ts,
                 before_ts=before_ts,
                 limit=nb_tasks,
                 page_token=next_page_token,
             )
             tasks_to_archive2 = tasks_result["tasks"]
             assert len(tasks_to_archive2) <= nb_tasks
             all_tasks.extend(tasks_to_archive2)
             next_page_token = tasks_result.get("next_page_token")
 
         actual_filtered_per_status = {"recurring": 0, "oneshot": 0}
         for task in all_tasks:
             self.assert_filtered_task_ok(task, after, before)
             actual_filtered_per_status[task["task_policy"]] += 1
 
         assert actual_filtered_per_status == status_per_policy
 
     def test_delete_archived_tasks(self, swh_scheduler):
         self._create_task_types(swh_scheduler)
         _time = utcnow()
         recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
         oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
         total_tasks = len(recurring) + len(oneshots)
         pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
         backend_tasks = [
             {
                 "task": task["id"],
                 "backend_id": str(uuid.uuid4()),
                 "scheduled": utcnow(),
             }
             for task in pending_tasks
         ]
         swh_scheduler.mass_schedule_task_runs(backend_tasks)
 
         _tasks = []
         percent = random.randint(0, 100)  # random election removal boundary
         for task in backend_tasks:
             t = swh_scheduler.end_task_run(task["backend_id"], status="eventful")
             c = random.randint(0, 100)
             if c <= percent:
                 _tasks.append({"task_id": t["task"], "task_run_id": t["id"]})
 
         swh_scheduler.delete_archived_tasks(_tasks)
 
         all_tasks = [task["id"] for task in swh_scheduler.search_tasks()]
         tasks_count = len(all_tasks)
         tasks_run_count = len(swh_scheduler.get_task_runs(all_tasks))
 
         assert tasks_count == total_tasks - len(_tasks)
         assert tasks_run_count == total_tasks - len(_tasks)
 
     def test_get_task_runs_no_task(self, swh_scheduler):
         """No task exist in the scheduler's db, get_task_runs() should always return an
         empty list.
 
         """
         assert not swh_scheduler.get_task_runs(task_ids=())
         assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3))
         assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3), limit=10)
 
     def test_get_task_runs_no_task_executed(self, swh_scheduler):
         """No task has been executed yet, get_task_runs() should always return an empty
         list.
 
         """
         self._create_task_types(swh_scheduler)
         _time = utcnow()
         recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
         oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
         swh_scheduler.create_tasks(recurring + oneshots)
 
         assert not swh_scheduler.get_task_runs(task_ids=())
         assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3))
         assert not swh_scheduler.get_task_runs(task_ids=(1, 2, 3), limit=10)
 
     def test_get_task_runs_with_scheduled(self, swh_scheduler):
         """Some tasks have been scheduled but not executed yet, get_task_runs() should
         not return an empty list. limit should behave as expected.
 
         """
         self._create_task_types(swh_scheduler)
         _time = utcnow()
         recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
         oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
         total_tasks = len(recurring) + len(oneshots)
         pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
         backend_tasks = [
             {
                 "task": task["id"],
                 "backend_id": str(uuid.uuid4()),
                 "scheduled": utcnow(),
             }
             for task in pending_tasks
         ]
         swh_scheduler.mass_schedule_task_runs(backend_tasks)
 
         assert not swh_scheduler.get_task_runs(task_ids=[total_tasks + 1])
 
         btask = backend_tasks[0]
         runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
         assert len(runs) == 1
         run = runs[0]
 
         assert subdict(run, excl=("id",)) == {
             "task": btask["task"],
             "backend_id": btask["backend_id"],
             "scheduled": btask["scheduled"],
             "started": None,
             "ended": None,
             "metadata": None,
             "status": "scheduled",
         }
 
         runs = swh_scheduler.get_task_runs(
             task_ids=[bt["task"] for bt in backend_tasks], limit=2
         )
         assert len(runs) == 2
 
         runs = swh_scheduler.get_task_runs(
             task_ids=[bt["task"] for bt in backend_tasks]
         )
         assert len(runs) == total_tasks
 
         keys = ("task", "backend_id", "scheduled")
         assert (
             sorted([subdict(x, keys) for x in runs], key=lambda x: x["task"])
             == backend_tasks
         )
 
     def test_get_task_runs_with_executed(self, swh_scheduler):
         """Some tasks have been executed, get_task_runs() should
         not return an empty list. limit should behave as expected.
 
         """
         self._create_task_types(swh_scheduler)
         _time = utcnow()
         recurring = tasks_from_template(TEMPLATES["git"], _time, 12)
         oneshots = tasks_from_template(TEMPLATES["hg"], _time, 12)
         pending_tasks = swh_scheduler.create_tasks(recurring + oneshots)
         backend_tasks = [
             {
                 "task": task["id"],
                 "backend_id": str(uuid.uuid4()),
                 "scheduled": utcnow(),
             }
             for task in pending_tasks
         ]
         swh_scheduler.mass_schedule_task_runs(backend_tasks)
 
         btask = backend_tasks[0]
         ts = utcnow()
         swh_scheduler.start_task_run(
             btask["backend_id"], metadata={"something": "stupid"}, timestamp=ts
         )
         runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
         assert len(runs) == 1
         assert subdict(runs[0], excl=("id")) == {
             "task": btask["task"],
             "backend_id": btask["backend_id"],
             "scheduled": btask["scheduled"],
             "started": ts,
             "ended": None,
             "metadata": {"something": "stupid"},
             "status": "started",
         }
 
         ts2 = utcnow()
         swh_scheduler.end_task_run(
             btask["backend_id"],
             metadata={"other": "stuff"},
             timestamp=ts2,
             status="eventful",
         )
         runs = swh_scheduler.get_task_runs(task_ids=[btask["task"]])
         assert len(runs) == 1
         assert subdict(runs[0], excl=("id")) == {
             "task": btask["task"],
             "backend_id": btask["backend_id"],
             "scheduled": btask["scheduled"],
             "started": ts,
             "ended": ts2,
             "metadata": {"something": "stupid", "other": "stuff"},
             "status": "eventful",
         }
 
     def test_get_or_create_lister(self, swh_scheduler):
         db_listers = []
         for lister_args in LISTERS:
             db_listers.append(swh_scheduler.get_or_create_lister(**lister_args))
 
         for lister, lister_args in zip(db_listers, LISTERS):
             assert lister.name == lister_args["name"]
             assert lister.instance_name == lister_args.get("instance_name", "")
 
             lister_get_again = swh_scheduler.get_or_create_lister(
                 lister.name, lister.instance_name
             )
 
             assert lister == lister_get_again
 
     def test_update_lister(self, swh_scheduler, stored_lister):
         lister = attr.evolve(stored_lister, current_state={"updated": "now"})
 
         updated_lister = swh_scheduler.update_lister(lister)
 
         assert updated_lister.updated > lister.updated
         assert updated_lister == attr.evolve(lister, updated=updated_lister.updated)
 
     def test_update_lister_stale(self, swh_scheduler, stored_lister):
         swh_scheduler.update_lister(stored_lister)
 
         with pytest.raises(StaleData) as exc:
             swh_scheduler.update_lister(stored_lister)
         assert "state not updated" in exc.value.args[0]
 
     def test_record_listed_origins(self, swh_scheduler, listed_origins):
         ret = swh_scheduler.record_listed_origins(listed_origins)
 
         assert set(returned.url for returned in ret) == set(
             origin.url for origin in listed_origins
         )
 
         assert all(origin.first_seen == origin.last_seen for origin in ret)
 
     def test_record_listed_origins_upsert(self, swh_scheduler, listed_origins):
         # First, insert `cutoff` origins
         cutoff = 100
         assert cutoff < len(listed_origins)
 
         ret = swh_scheduler.record_listed_origins(listed_origins[:cutoff])
         assert len(ret) == cutoff
 
         # Then, insert all origins, including the `cutoff` first.
         ret = swh_scheduler.record_listed_origins(listed_origins)
 
         assert len(ret) == len(listed_origins)
 
         # Two different "first seen" values
         assert len(set(origin.first_seen for origin in ret)) == 2
 
         # But a single "last seen" value
         assert len(set(origin.last_seen for origin in ret)) == 1
 
+    def test_get_listed_origins_exact(self, swh_scheduler, listed_origins):
+        swh_scheduler.record_listed_origins(listed_origins)
+
+        for i, origin in enumerate(listed_origins):
+            ret = swh_scheduler.get_listed_origins(
+                lister_id=origin.lister_id, url=origin.url
+            )
+
+            assert ret.next_page_token is None
+            assert len(ret.origins) == 1
+            assert ret.origins[0].lister_id == origin.lister_id
+            assert ret.origins[0].url == origin.url
+
+    @pytest.mark.parametrize("num_origins,limit", [(20, 6), (5, 42), (20, 20)])
+    def test_get_listed_origins_limit(
+        self, swh_scheduler, listed_origins, num_origins, limit
+    ) -> None:
+        added_origins = sorted(
+            listed_origins[:num_origins], key=lambda o: (o.lister_id, o.url)
+        )
+        swh_scheduler.record_listed_origins(added_origins)
+
+        returned_origins: List[ListedOrigin] = []
+        call_count = 0
+        next_page_token: Optional[ListedOriginPageToken] = None
+        while True:
+            call_count += 1
+            ret = swh_scheduler.get_listed_origins(
+                lister_id=listed_origins[0].lister_id,
+                limit=limit,
+                page_token=next_page_token,
+            )
+            returned_origins.extend(ret.origins)
+            next_page_token = ret.next_page_token
+            if next_page_token is None:
+                break
+
+        assert call_count == (num_origins // limit) + 1
+
+        assert len(returned_origins) == num_origins
+        assert [(origin.lister_id, origin.url) for origin in returned_origins] == [
+            (origin.lister_id, origin.url) for origin in added_origins
+        ]
+
+    def test_get_listed_origins_all(self, swh_scheduler, listed_origins) -> None:
+        swh_scheduler.record_listed_origins(listed_origins)
+
+        ret = swh_scheduler.get_listed_origins(limit=len(listed_origins) + 1)
+        assert ret.next_page_token is None
+        assert len(ret.origins) == len(listed_origins)
+
     def _create_task_types(self, scheduler):
         for tt in TASK_TYPES.values():
             scheduler.create_task_type(tt)