diff --git a/swh/lister/sourceforge/lister.py b/swh/lister/sourceforge/lister.py
index ec1db9f..909f0b5 100644
--- a/swh/lister/sourceforge/lister.py
+++ b/swh/lister/sourceforge/lister.py
@@ -1,224 +1,349 @@
 # Copyright (C) 2021  The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 import datetime
 from enum import Enum
 import logging
 import re
-from typing import Iterator, List, Set
+from typing import Any, Dict, Iterator, List, Optional, Set, Tuple
 from xml.etree import ElementTree
 
 import iso8601
 import requests
 from tenacity.before_sleep import before_sleep_log
 
+from swh.core.api.classes import stream_results
 from swh.lister.utils import throttling_retry
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 from .. import USER_AGENT
-from ..pattern import StatelessLister
+from ..pattern import Lister
 
 logger = logging.getLogger(__name__)
 
 
 class VcsNames(Enum):
     """Used to filter SourceForge tool names for valid VCS types"""
 
     # CVS projects are read-only
     CVS = "cvs"
     GIT = "git"
     SUBVERSION = "svn"
     MERCURIAL = "hg"
     BAZAAR = "bzr"
 
 
 VCS_NAMES = set(v.value for v in VcsNames.__members__.values())
 
 
 @dataclass
 class SourceForgeListerEntry:
     vcs: VcsNames
     url: str
     last_modified: datetime.date
 
 
+SubSitemapNameT = str
+ProjectNameT = str
+# SourceForge only offers day-level granularity, which is good enough for our purposes
+LastModifiedT = datetime.date
+
+
+@dataclass
+class SourceForgeListerState:
+    """Current state of the SourceForge lister in incremental runs
+    """
+
+    """If the subsitemap does not exist, we assume a full run of this subsitemap
+    is needed. If the date is the same, we skip the subsitemap, otherwise we
+    request the subsitemap and look up every project's "last modified" date
+    to compare against `ListedOrigins` from the database."""
+    subsitemap_last_modified: Dict[SubSitemapNameT, LastModifiedT] = field(
+        default_factory=dict
+    )
+    """Some projects (not the majority, but still meaningful) have no VCS for us to
+    archive. We need to remember a mapping of their API URL to their "last modified"
+    date so we don't keep querying them needlessly every time."""
+    empty_projects: Dict[str, LastModifiedT] = field(default_factory=dict)
+
+
 SourceForgeListerPage = List[SourceForgeListerEntry]
 
 MAIN_SITEMAP_URL = "https://sourceforge.net/allura_sitemap/sitemap.xml"
 SITEMAP_XML_NAMESPACE = "{http://www.sitemaps.org/schemas/sitemap/0.9}"
 
 # API resource endpoint for information about the given project.
 #
 # `namespace`: Project namespace. Very often `p`, but can be something else like
 #              `adobe`
 # `project`: Project name, e.g. `seedai`. Can be a subproject, e.g `backapps/website`.
 PROJECT_API_URL_FORMAT = "https://sourceforge.net/rest/{namespace}/{project}"
 
 # Predictable URL for cloning (in the broad sense) a VCS registered for the project.
 #
 # `vcs`: VCS type, one of `VCS_NAMES`
 # `namespace`: Project namespace. Very often `p`, but can be something else like
 #              `adobe`.
 # `project`: Project name, e.g. `seedai`. Can be a subproject, e.g `backapps/website`.
 # `mount_point`: url path used by the repo. For example, the Code::Blocks project uses
 #                `git` (https://git.code.sf.net/p/codeblocks/git).
 CLONE_URL_FORMAT = "{vcs}.code.sf.net/{namespace}/{project}/{mount_point}"
 
 PROJ_URL_RE = re.compile(
     r"^https://sourceforge.net/(?P<namespace>[^/]+)/(?P<project>[^/]+)/(?P<rest>.*)?"
 )
 
+# Mapping of `(namespace, project name)` to `last modified` date.
+ProjectsLastModifiedCache = Dict[Tuple[str, str], LastModifiedT]
+
 
-class SourceForgeLister(StatelessLister[SourceForgeListerPage]):
+class SourceForgeLister(Lister[SourceForgeListerState, SourceForgeListerPage]):
     """List origins from the "SourceForge" forge.
 
     """
 
     # Part of the lister API, that identifies this lister
     LISTER_NAME = "sourceforge"
 
-    def __init__(self, scheduler: SchedulerInterface):
+    def __init__(self, scheduler: SchedulerInterface, incremental: bool = False):
         super().__init__(
             scheduler=scheduler, url="https://sourceforge.net", instance="main"
         )
 
+        # Will hold the currently saved "last modified" dates to compare against our
+        # requests.
+        self._project_last_modified: Optional[ProjectsLastModifiedCache] = None
         self.session = requests.Session()
         # Declare the USER_AGENT is more sysadm-friendly for the forge we list
         self.session.headers.update(
             {"Accept": "application/json", "User-Agent": USER_AGENT}
         )
+        self.incremental = incremental
+
+    def state_from_dict(self, d: Dict[str, Dict[str, Any]]) -> SourceForgeListerState:
+        subsitemaps = {
+            k: datetime.date.fromisoformat(v)
+            for k, v in d.get("subsitemap_last_modified", {}).items()
+        }
+        empty_projects = {
+            k: datetime.date.fromisoformat(v)
+            for k, v in d.get("empty_projects", {}).items()
+        }
+        return SourceForgeListerState(
+            subsitemap_last_modified=subsitemaps, empty_projects=empty_projects
+        )
+
+    def state_to_dict(self, state: SourceForgeListerState) -> Dict[str, Any]:
+        return {
+            "subsitemap_last_modified": {
+                k: v.isoformat() for k, v in state.subsitemap_last_modified.items()
+            },
+            "empty_projects": {
+                k: v.isoformat() for k, v in state.empty_projects.items()
+            },
+        }
+
+    def projects_last_modified(self) -> ProjectsLastModifiedCache:
+        if not self.incremental:
+            # No point in loading the previous results if we're doing a full run
+            return {}
+        if self._project_last_modified is not None:
+            return self._project_last_modified
+        # We know there will be at least that many origins
+        stream = stream_results(
+            self.scheduler.get_listed_origins, self.lister_obj.id, limit=300_000
+        )
+        listed_origins = dict()
+        # Projects can have slashes in them if they're subprojects, but the
+        # mointpoint (last component) cannot.
+        url_match = re.compile(
+            r".*\.code\.sf\.net/(?P<namespace>[^/]+)/(?P<project>.+)/.*"
+        )
+        for origin in stream:
+            url = origin.url
+            match = url_match.match(url)
+            assert match is not None
+            matches = match.groupdict()
+            namespace = matches["namespace"]
+            project = matches["project"]
+            # "Last modified" dates are the same across all VCS (tools, even)
+            # within a project or subproject. An assertion here would be overkill.
+            last_modified = origin.last_update
+            assert last_modified is not None
+            listed_origins[(namespace, project)] = last_modified.date()
+
+        self._project_last_modified = listed_origins
+        return listed_origins
 
     @throttling_retry(before_sleep=before_sleep_log(logger, logging.WARNING))
     def page_request(self, url, params) -> requests.Response:
         # Log listed URL to ease debugging
         logger.debug("Fetching URL %s with params %s", url, params)
         response = self.session.get(url, params=params)
 
         if response.status_code != 200:
             # Log response content to ease debugging
             logger.warning(
                 "Unexpected HTTP status code %s on %s: %s",
                 response.status_code,
                 response.url,
                 response.content,
             )
         # The lister must fail on blocking errors
         response.raise_for_status()
 
         return response
 
     def get_pages(self) -> Iterator[SourceForgeListerPage]:
         """
         SourceForge has a main XML sitemap that lists its sharded sitemaps for all
         projects.
         Each XML sub-sitemap lists project pages, which are not unique per project: a
         project can have a wiki, a home, a git, an svn, etc.
         For each unique project, we query an API endpoint that lists (among
         other things) the tools associated with said project, some of which are
         the VCS used. Subprojects are considered separate projects.
         Lastly we use the information of which VCS are used to build the predictable
         clone URL for any given VCS.
         """
         sitemap_contents = self.page_request(MAIN_SITEMAP_URL, {}).text
         tree = ElementTree.fromstring(sitemap_contents)
 
         for subsitemap in tree.iterfind(f"{SITEMAP_XML_NAMESPACE}sitemap"):
-            # TODO use when adding incremental support
-            # last_modified = sub_sitemap.find(f"{SITEMAP_XML_NAMESPACE}lastmod")
+            last_modified_el = subsitemap.find(f"{SITEMAP_XML_NAMESPACE}lastmod")
+            assert last_modified_el is not None and last_modified_el.text is not None
+            last_modified = datetime.date.fromisoformat(last_modified_el.text)
             location = subsitemap.find(f"{SITEMAP_XML_NAMESPACE}loc")
-            assert location is not None
+            assert location is not None and location.text is not None
             sub_url = location.text
+
+            if self.incremental:
+                recorded_last_mod = self.state.subsitemap_last_modified.get(sub_url)
+                if recorded_last_mod == last_modified:
+                    # The entire subsitemap hasn't changed, so none of its projects
+                    # have either, skip it.
+                    continue
+
+            self.state.subsitemap_last_modified[sub_url] = last_modified
             subsitemap_contents = self.page_request(sub_url, {}).text
             subtree = ElementTree.fromstring(subsitemap_contents)
 
             yield from self._get_pages_from_subsitemap(subtree)
 
     def get_origins_from_page(
         self, page: SourceForgeListerPage
     ) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
         for hit in page:
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 visit_type=hit.vcs.value,
                 url=hit.url,
                 last_update=iso8601.parse_date(hit.last_modified),
             )
 
     def _get_pages_from_subsitemap(
         self, subtree: ElementTree.Element
     ) -> Iterator[SourceForgeListerPage]:
-        projects: Set[str] = set()
+        projects: Set[ProjectNameT] = set()
         for project_block in subtree.iterfind(f"{SITEMAP_XML_NAMESPACE}url"):
             last_modified_block = project_block.find(f"{SITEMAP_XML_NAMESPACE}lastmod")
             assert last_modified_block is not None
             last_modified = last_modified_block.text
             location = project_block.find(f"{SITEMAP_XML_NAMESPACE}loc")
             assert location is not None
             project_url = location.text
             assert project_url is not None
 
             match = PROJ_URL_RE.match(project_url)
             if match:
                 matches = match.groupdict()
                 namespace = matches["namespace"]
                 if namespace == "projects":
                     # These have a `p`-namespaced counterpart, use that instead
                     continue
 
                 project = matches["project"]
                 rest = matches["rest"]
                 if rest.count("/") > 1:
                     # This is a subproject. There exists no sub-subprojects.
                     subproject_name = rest.rsplit("/", 2)[0]
                     project = f"{project}/{subproject_name}"
 
                 prev_len = len(projects)
                 projects.add(project)
 
                 if prev_len == len(projects):
                     # Already seen
                     continue
 
                 pages = self._get_pages_for_project(namespace, project, last_modified)
                 if pages:
                     yield pages
                 else:
                     logger.debug("Project '%s' does not have any VCS", project)
             else:
                 # Should always match, let's log it
                 msg = "Project URL '%s' does not match expected pattern"
                 logger.warning(msg, project_url)
 
     def _get_pages_for_project(
         self, namespace, project, last_modified
     ) -> SourceForgeListerPage:
         endpoint = PROJECT_API_URL_FORMAT.format(namespace=namespace, project=project)
+        empty_project_last_modified = self.state.empty_projects.get(endpoint)
+        if empty_project_last_modified is not None:
+            if last_modified == empty_project_last_modified.isoformat():
+                # Project has not changed, so is still empty, meaning it has
+                # no VCS attached that we can archive.
+                logger.debug(f"Project {namespace}/{project} is still empty")
+                return []
+
+        if self.incremental:
+            expected = self.projects_last_modified().get((namespace, project))
+
+            if expected is not None:
+                if expected.isoformat() == last_modified:
+                    # Project has not changed
+                    logger.debug(f"Project {namespace}/{project} has not changed")
+                    return []
+                else:
+                    logger.debug(f"Project {namespace}/{project} was updated")
+            else:
+                msg = "New project during an incremental run: %s/%s"
+                logger.debug(msg, namespace, project)
+
         res = self.page_request(endpoint, {}).json()
 
         tools = res.get("tools")
         if tools is None:
             # This probably never happens
             logger.warning("Project '%s' does not have any tools", endpoint)
             return []
 
         hits = []
         for tool in tools:
             tool_name = tool["name"]
             if tool_name not in VCS_NAMES:
                 continue
             url = CLONE_URL_FORMAT.format(
                 vcs=tool_name,
                 namespace=namespace,
                 project=project,
                 mount_point=tool["mount_point"],
             )
             entry = SourceForgeListerEntry(
                 vcs=VcsNames(tool_name), url=url, last_modified=last_modified
             )
             hits.append(entry)
 
+        if not hits:
+            date = datetime.date.fromisoformat(last_modified)
+            self.state.empty_projects[endpoint] = date
+        else:
+            self.state.empty_projects.pop(endpoint, None)
+
         return hits
diff --git a/swh/lister/sourceforge/tests/data/subsitemap-0.xml b/swh/lister/sourceforge/tests/data/subsitemap-0.xml
index c737ee3..5f2cba8 100644
--- a/swh/lister/sourceforge/tests/data/subsitemap-0.xml
+++ b/swh/lister/sourceforge/tests/data/subsitemap-0.xml
@@ -1,58 +1,69 @@
 <?xml version="1.0" encoding="utf-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
         <loc>https://sourceforge.net/projects/os3dmodels/files/</loc>
         <lastmod>2017-03-31</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/os3dmodels/home/</loc>
         <lastmod>2017-03-31</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/os3dmodels/tickets/</loc>
         <lastmod>2017-03-31</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/mramm/home/</loc>
         <lastmod>2019-04-04</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/mramm/todo/</loc>
         <lastmod>2019-04-04</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/mramm/notes/</loc>
         <lastmod>2019-04-04</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/mramm/reviews/</loc>
         <lastmod>2019-04-04</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/p/mramm/discussion/</loc>
         <lastmod>2019-04-04</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/adobe/adobexmp/home/</loc>
         <lastmod>2017-10-17</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/adobe/adobexmp/wiki/</loc>
         <lastmod>2017-10-17</lastmod>
         <changefreq>daily</changefreq>
     </url>
     <url>
         <loc>https://sourceforge.net/adobe/adobexmp/discussion/</loc>
         <lastmod>2017-10-17</lastmod>
         <changefreq>daily</changefreq>
     </url>
+    <!-- Copied from subsitemap-1 to test an update to an empty project -->
+    <url>
+        <loc>https://sourceforge.net/projects/backapps/files/</loc>
+        <lastmod>2021-02-11</lastmod>
+        <changefreq>daily</changefreq>
+    </url>
+    <url>
+        <loc>https://sourceforge.net/p/backapps/tickets/</loc>
+        <lastmod>2021-02-11</lastmod>
+        <changefreq>daily</changefreq>
+    </url>
 </urlset>
diff --git a/swh/lister/sourceforge/tests/test_lister.py b/swh/lister/sourceforge/tests/test_lister.py
index 0c1f226..5b4438b 100644
--- a/swh/lister/sourceforge/tests/test_lister.py
+++ b/swh/lister/sourceforge/tests/test_lister.py
@@ -1,180 +1,338 @@
 # Copyright (C) 2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
+import datetime
 import functools
 import json
 from pathlib import Path
 import re
 
+from iso8601 import iso8601
 import pytest
 from requests.exceptions import HTTPError
 
 from swh.lister import USER_AGENT
 from swh.lister.sourceforge.lister import (
     MAIN_SITEMAP_URL,
     PROJECT_API_URL_FORMAT,
     SourceForgeLister,
+    SourceForgeListerState,
 )
 
 # Mapping of project name to namespace
+from swh.scheduler.model import ListedOrigin
+
 TEST_PROJECTS = {
     "adobexmp": "adobe",
     "backapps": "p",
     "backapps/website": "p",
     "mojunk": "p",
     "mramm": "p",
     "os3dmodels": "p",
 }
 
 URLS_MATCHER = {
     PROJECT_API_URL_FORMAT.format(namespace=namespace, project=project): project
     for project, namespace in TEST_PROJECTS.items()
 }
 
 
 def get_main_sitemap(datadir):
     return Path(datadir, "main-sitemap.xml").read_text()
 
 
 def get_subsitemap_0(datadir):
     return Path(datadir, "subsitemap-0.xml").read_text()
 
 
 def get_subsitemap_1(datadir):
     return Path(datadir, "subsitemap-1.xml").read_text()
 
 
 def get_project_json(datadir, request, context):
     url = request.url
     project = URLS_MATCHER.get(url)
     assert project is not None, f"Url '{url}' could not be matched"
     project = project.replace("/", "-")
     return json.loads(Path(datadir, f"{project}.json").read_text())
 
 
 def _check_request_headers(request):
     return request.headers.get("User-Agent") == USER_AGENT
 
 
+def _check_listed_origins(lister, swh_scheduler):
+    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
+    res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
+    assert res == {
+        "svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
+        "git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
+        "svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
+        "git.code.sf.net/p/mramm/files": ("git", "2019-04-04"),
+        "git.code.sf.net/p/mramm/git": ("git", "2019-04-04"),
+        "svn.code.sf.net/p/mramm/svn": ("svn", "2019-04-04"),
+        "git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
+        "git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
+        "svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
+    }
+
+
 def test_sourceforge_lister_full(swh_scheduler, requests_mock, datadir):
     """
     Simulate a full listing of an artificially restricted sourceforge.
     There are 5 different projects, spread over two sub-sitemaps, a few of which
     have multiple VCS listed, one has none, one is outside of the standard `/p/`
     namespace, some with custom mount points.
     All non-interesting but related entries have been kept.
     """
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         text=get_main_sitemap(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         text=get_subsitemap_0(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         text=get_subsitemap_1(datadir),
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         json=functools.partial(get_project_json, datadir),
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 4
     assert stats.origins == 9
+    expected_state = {
+        "subsitemap_last_modified": {
+            "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
+            "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
+        },
+        "empty_projects": {
+            "https://sourceforge.net/rest/p/backapps": "2021-02-11",
+            "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
+        },
+    }
+    assert lister.state_to_dict(lister.state) == expected_state
 
-    scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
-    res = {o.url: (o.visit_type, str(o.last_update.date())) for o in scheduler_origins}
-    assert res == {
-        "svn.code.sf.net/p/backapps/website/code": ("svn", "2021-02-11"),
-        "git.code.sf.net/p/os3dmodels/git": ("git", "2017-03-31"),
-        "svn.code.sf.net/p/os3dmodels/svn": ("svn", "2017-03-31"),
-        "git.code.sf.net/p/mramm/files": ("git", "2019-04-04"),
-        "git.code.sf.net/p/mramm/git": ("git", "2019-04-04"),
-        "svn.code.sf.net/p/mramm/svn": ("svn", "2019-04-04"),
-        "git.code.sf.net/p/mojunk/git": ("git", "2017-12-31"),
-        "git.code.sf.net/p/mojunk/git2": ("git", "2017-12-31"),
-        "svn.code.sf.net/p/mojunk/svn": ("svn", "2017-12-31"),
+    _check_listed_origins(lister, swh_scheduler)
+
+
+def test_sourceforge_lister_incremental(swh_scheduler, requests_mock, datadir, mocker):
+    """
+    Simulate an incremental listing of an artificially restricted sourceforge.
+    Same dataset as the full run, because it's enough to validate the different cases.
+    """
+    lister = SourceForgeLister(scheduler=swh_scheduler, incremental=True)
+
+    requests_mock.get(
+        MAIN_SITEMAP_URL,
+        text=get_main_sitemap(datadir),
+        additional_matcher=_check_request_headers,
+    )
+
+    def not_called(request, *args, **kwargs):
+        raise AssertionError(f"Should not have been called: '{request.url}'")
+
+    requests_mock.get(
+        "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
+        text=get_subsitemap_0(datadir),
+        additional_matcher=_check_request_headers,
+    )
+    requests_mock.get(
+        "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
+        text=not_called,
+        additional_matcher=_check_request_headers,
+    )
+
+    def filtered_get_project_json(request, context):
+        # These projects should not be requested again
+        assert URLS_MATCHER[request.url] not in {"adobe", "mojunk"}
+        return get_project_json(datadir, request, context)
+
+    requests_mock.get(
+        re.compile("https://sourceforge.net/rest/.*"),
+        json=filtered_get_project_json,
+        additional_matcher=_check_request_headers,
+    )
+
+    faked_listed_origins = [
+        # mramm: changed
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="git",
+            url="git.code.sf.net/p/mramm/files",
+            last_update=iso8601.parse_date("2019-01-01"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="git",
+            url="git.code.sf.net/p/mramm/git",
+            last_update=iso8601.parse_date("2019-01-01"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="svn",
+            url="svn.code.sf.net/p/mramm/svn",
+            last_update=iso8601.parse_date("2019-01-01"),
+        ),
+        # stayed the same, even though its subsitemap has changed
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="git",
+            url="git.code.sf.net/p/os3dmodels/git",
+            last_update=iso8601.parse_date("2017-03-31"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="svn",
+            url="svn.code.sf.net/p/os3dmodels/svn",
+            last_update=iso8601.parse_date("2017-03-31"),
+        ),
+        # others: stayed the same, should be skipped
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="git",
+            url="git.code.sf.net/p/mojunk/git",
+            last_update=iso8601.parse_date("2017-12-31"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="git",
+            url="git.code.sf.net/p/mojunk/git2",
+            last_update=iso8601.parse_date("2017-12-31"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="svn",
+            url="svn.code.sf.net/p/mojunk/svn",
+            last_update=iso8601.parse_date("2017-12-31"),
+        ),
+        ListedOrigin(
+            lister_id=lister.lister_obj.id,
+            visit_type="svn",
+            url="svn.code.sf.net/p/backapps/website/code",
+            last_update=iso8601.parse_date("2021-02-11"),
+        ),
+    ]
+    swh_scheduler.record_listed_origins(faked_listed_origins)
+
+    to_date = datetime.date.fromisoformat
+    faked_state = SourceForgeListerState(
+        subsitemap_last_modified={
+            # changed
+            "https://sourceforge.net/allura_sitemap/sitemap-0.xml": to_date(
+                "2021-02-18"
+            ),
+            # stayed the same
+            "https://sourceforge.net/allura_sitemap/sitemap-1.xml": to_date(
+                "2021-03-18"
+            ),
+        },
+        empty_projects={
+            "https://sourceforge.net/rest/p/backapps": to_date("2020-02-11"),
+            "https://sourceforge.net/rest/adobe/adobexmp": to_date("2017-10-17"),
+        },
+    )
+    lister.state = faked_state
+
+    stats = lister.run()
+    # - mramm (3 repos),  # changed
+    assert stats.pages == 1
+    assert stats.origins == 3
+    expected_state = {
+        "subsitemap_last_modified": {
+            "https://sourceforge.net/allura_sitemap/sitemap-0.xml": "2021-03-18",
+            "https://sourceforge.net/allura_sitemap/sitemap-1.xml": "2021-03-18",
+        },
+        "empty_projects": {
+            "https://sourceforge.net/rest/p/backapps": "2021-02-11",  # changed
+            "https://sourceforge.net/rest/adobe/adobexmp": "2017-10-17",
+        },
     }
+    assert lister.state_to_dict(lister.state) == expected_state
+
+    # origins have been updated
+    _check_listed_origins(lister, swh_scheduler)
 
 
 def test_sourceforge_lister_retry(swh_scheduler, requests_mock, mocker, datadir):
     # Exponential retries take a long time, so stub time.sleep
     mocked_sleep = mocker.patch("time.sleep", return_value=None)
 
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     requests_mock.get(
         MAIN_SITEMAP_URL,
         [
             {"status_code": 429},
             {"status_code": 429},
             {"text": get_main_sitemap(datadir)},
         ],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-0.xml",
         [{"status_code": 429}, {"text": get_subsitemap_0(datadir), "status_code": 301}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         "https://sourceforge.net/allura_sitemap/sitemap-1.xml",
         [{"status_code": 429}, {"text": get_subsitemap_1(datadir)}],
         additional_matcher=_check_request_headers,
     )
     requests_mock.get(
         re.compile("https://sourceforge.net/rest/.*"),
         [{"status_code": 429}, {"json": functools.partial(get_project_json, datadir)}],
         additional_matcher=_check_request_headers,
     )
 
     stats = lister.run()
     # - os3dmodels (2 repos),
     # - mramm (3 repos),
     # - mojunk (3 repos),
     # - backapps/website (1 repo).
     # adobe and backapps itself have no repos.
     assert stats.pages == 4
     assert stats.origins == 9
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
     assert {o.url: o.visit_type for o in scheduler_origins} == {
         "svn.code.sf.net/p/backapps/website/code": "svn",
         "git.code.sf.net/p/os3dmodels/git": "git",
         "svn.code.sf.net/p/os3dmodels/svn": "svn",
         "git.code.sf.net/p/mramm/files": "git",
         "git.code.sf.net/p/mramm/git": "git",
         "svn.code.sf.net/p/mramm/svn": "svn",
         "git.code.sf.net/p/mojunk/git": "git",
         "git.code.sf.net/p/mojunk/git2": "git",
         "svn.code.sf.net/p/mojunk/svn": "svn",
     }
 
     # Test `time.sleep` is called with exponential retries
     calls = [1.0, 10.0, 1.0, 1.0]
     mocked_sleep.assert_has_calls([mocker.call(c) for c in calls])
 
 
 @pytest.mark.parametrize("status_code", [500, 503, 504, 403, 404])
 def test_sourceforge_lister_http_error(swh_scheduler, requests_mock, status_code):
     lister = SourceForgeLister(scheduler=swh_scheduler)
 
     requests_mock.get(MAIN_SITEMAP_URL, status_code=status_code)
 
     with pytest.raises(HTTPError):
         lister.run()