diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
index d843c2f..635a7a6 100644
--- a/swh/lister/cran/lister.py
+++ b/swh/lister/cran/lister.py
@@ -1,135 +1,144 @@
 # Copyright (C) 2019-2021 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import json
 import logging
 import subprocess
 from typing import Dict, Iterator, List, Optional, Tuple
 
 import pkg_resources
 
 from swh.lister.pattern import CredentialsType, StatelessLister
 from swh.scheduler.interface import SchedulerInterface
 from swh.scheduler.model import ListedOrigin
 
 logger = logging.getLogger(__name__)
 
 CRAN_MIRROR = "https://cran.r-project.org"
 
 PageType = List[Dict[str, str]]
 
 
 class CRANLister(StatelessLister[PageType]):
     """
     List all packages hosted on The Comprehensive R Archive Network.
     """
 
     LISTER_NAME = "CRAN"
 
     def __init__(
         self,
         scheduler: SchedulerInterface,
         credentials: Optional[CredentialsType] = None,
     ):
         super().__init__(
             scheduler, url=CRAN_MIRROR, instance="cran", credentials=credentials
         )
 
     def get_pages(self) -> Iterator[PageType]:
         """
         Yields a single page containing all CRAN packages info.
         """
         yield read_cran_data()
 
     def get_origins_from_page(self, page: PageType) -> Iterator[ListedOrigin]:
         assert self.lister_obj.id is not None
+
+        seen_urls = set()
         for package_info in page:
             origin_url, artifact_url = compute_origin_urls(package_info)
 
+            if origin_url in seen_urls:
+                # prevent multiple listing of an origin,
+                # most recent version will be listed first
+                continue
+
+            seen_urls.add(origin_url)
+
             yield ListedOrigin(
                 lister_id=self.lister_obj.id,
                 url=origin_url,
                 visit_type="tar",
                 last_update=parse_packaged_date(package_info),
                 extra_loader_arguments={
                     "artifacts": [
                         {"url": artifact_url, "version": package_info["Version"]}
                     ]
                 },
             )
 
 
 def read_cran_data() -> List[Dict[str, str]]:
     """
     Runs R script which uses inbuilt API to return a json response
             containing data about the R packages.
 
     Returns:
         List of Dict about R packages. For example::
 
             [
                 {
                     'Package': 'A3',
                     'Version': '1.0.0'
                 },
                 {
                     'Package': 'abbyyR',
                     'Version': '0.5.4'
                 },
                 ...
             ]
     """
     filepath = pkg_resources.resource_filename("swh.lister.cran", "list_all_packages.R")
     logger.debug("Executing R script %s", filepath)
     response = subprocess.run(filepath, stdout=subprocess.PIPE, shell=False)
     return json.loads(response.stdout.decode("utf-8"))
 
 
 def compute_origin_urls(package_info: Dict[str, str]) -> Tuple[str, str]:
     """Compute the package url from the repo dict.
 
     Args:
         repo: dict with key 'Package', 'Version'
 
     Returns:
         the tuple project url, artifact url
 
     """
     package = package_info["Package"]
     version = package_info["Version"]
     origin_url = f"{CRAN_MIRROR}/package={package}"
     artifact_url = f"{CRAN_MIRROR}/src/contrib/{package}_{version}.tar.gz"
     return origin_url, artifact_url
 
 
 def parse_packaged_date(package_info: Dict[str, str]) -> Optional[datetime]:
     packaged_at_str = package_info.get("Packaged", "")
     packaged_at = None
     if packaged_at_str:
         packaged_at_str = packaged_at_str.replace(" UTC", "")
         # Packaged field possible formats:
         #   - "%Y-%m-%d %H:%M:%S[.%f] UTC; <packager>",
         #   - "%a %b %d %H:%M:%S %Y; <packager>"
         for date_format in (
             "%Y-%m-%d %H:%M:%S",
             "%Y-%m-%d %H:%M:%S.%f",
             "%a %b %d %H:%M:%S %Y",
         ):
             try:
                 packaged_at = datetime.strptime(
                     packaged_at_str.split(";")[0], date_format,
                 ).replace(tzinfo=timezone.utc)
                 break
             except Exception:
                 continue
 
         if packaged_at is None:
             logger.debug(
                 "Could not parse %s package release date: %s",
                 package_info["Package"],
                 packaged_at_str,
             )
 
     return packaged_at
diff --git a/swh/lister/cran/tests/test_lister.py b/swh/lister/cran/tests/test_lister.py
index b8822ec..f8707d1 100644
--- a/swh/lister/cran/tests/test_lister.py
+++ b/swh/lister/cran/tests/test_lister.py
@@ -1,136 +1,152 @@
 # Copyright (C) 2019-2021 The Software Heritage developers
 # See the AUTHORS file at the top-level directory of this distribution
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from datetime import datetime, timezone
 import json
 from os import path
 
 import pytest
 
 from swh.lister.cran.lister import (
     CRAN_MIRROR,
     CRANLister,
     compute_origin_urls,
     parse_packaged_date,
 )
 
 
 def test_cran_compute_origin_urls():
     pack = "something"
     vers = "0.0.1"
     origin_url, artifact_url = compute_origin_urls({"Package": pack, "Version": vers,})
 
     assert origin_url == f"{CRAN_MIRROR}/package={pack}"
     assert artifact_url == f"{CRAN_MIRROR}/src/contrib/{pack}_{vers}.tar.gz"
 
 
 def test_cran_compute_origin_urls_failure():
     for incomplete_repo in [{"Version": "0.0.1"}, {"Package": "package"}, {}]:
         with pytest.raises(KeyError):
             compute_origin_urls(incomplete_repo)
 
 
 def test_parse_packaged_date():
     common_date_format = {
         "Package": "test",
         "Packaged": "2017-04-26 11:36:15 UTC; Jonathan",
     }
     assert parse_packaged_date(common_date_format) == datetime(
         year=2017, month=4, day=26, hour=11, minute=36, second=15, tzinfo=timezone.utc
     )
     common_date_format = {
         "Package": "test",
         "Packaged": "2017-04-26 11:36:15.123456 UTC; Jonathan",
     }
     assert parse_packaged_date(common_date_format) == datetime(
         year=2017,
         month=4,
         day=26,
         hour=11,
         minute=36,
         second=15,
         microsecond=123456,
         tzinfo=timezone.utc,
     )
     old_date_format = {
         "Package": "test",
         "Packaged": "Thu Mar 30 10:48:35 2006; hornik",
     }
     assert parse_packaged_date(old_date_format) == datetime(
         year=2006, month=3, day=30, hour=10, minute=48, second=35, tzinfo=timezone.utc
     )
     invalid_date_format = {
         "Package": "test",
         "Packaged": "foo",
     }
     assert parse_packaged_date(invalid_date_format) is None
     missing_date = {
         "Package": "test",
     }
     assert parse_packaged_date(missing_date) is None
 
 
 def test_cran_lister_cran(datadir, swh_scheduler, mocker):
     with open(path.join(datadir, "list-r-packages.json")) as f:
         cran_data = json.loads(f.read())
 
     lister = CRANLister(swh_scheduler)
 
     mock_cran = mocker.patch("swh.lister.cran.lister.read_cran_data")
 
     mock_cran.return_value = cran_data
 
     stats = lister.run()
 
     assert stats.pages == 1
     assert stats.origins == len(cran_data)
 
     scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).results
 
     assert len(scheduler_origins) == len(cran_data)
 
     for package_info in cran_data:
         origin_url, artifact_url = compute_origin_urls(package_info)
 
         filtered_origins = [o for o in scheduler_origins if o.url == origin_url]
 
         assert len(filtered_origins) == 1
 
         assert filtered_origins[0].extra_loader_arguments == {
             "artifacts": [{"url": artifact_url, "version": package_info["Version"]}]
         }
 
         filtered_origins[0].last_update == parse_packaged_date(package_info)
 
 
+def test_cran_lister_duplicated_origins(datadir, swh_scheduler, mocker):
+    with open(path.join(datadir, "list-r-packages.json")) as f:
+        cran_data = json.loads(f.read())
+
+    lister = CRANLister(swh_scheduler)
+
+    mock_cran = mocker.patch("swh.lister.cran.lister.read_cran_data")
+
+    mock_cran.return_value = cran_data + cran_data
+
+    stats = lister.run()
+
+    assert stats.pages == 1
+    assert stats.origins == len(cran_data)
+
+
 @pytest.mark.parametrize(
     "credentials, expected_credentials",
     [
         (None, []),
         ({"key": "value"}, []),
         (
             {"CRAN": {"cran": [{"username": "user", "password": "pass"}]}},
             [{"username": "user", "password": "pass"}],
         ),
     ],
 )
 def test_lister_cran_instantiation_with_credentials(
     credentials, expected_credentials, swh_scheduler
 ):
     lister = CRANLister(swh_scheduler, credentials=credentials)
 
     # Credentials are allowed in constructor
     assert lister.credentials == expected_credentials
 
 
 def test_lister_cran_from_configfile(swh_scheduler_config, mocker):
     load_from_envvar = mocker.patch("swh.lister.pattern.load_from_envvar")
     load_from_envvar.return_value = {
         "scheduler": {"cls": "local", **swh_scheduler_config},
         "credentials": {},
     }
     lister = CRANLister.from_configfile()
     assert lister.scheduler is not None
     assert lister.credentials is not None