diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py
index 04a20aa..e674cc4 100644
--- a/swh/lister/bitbucket/lister.py
+++ b/swh/lister/bitbucket/lister.py
@@ -1,72 +1,71 @@
 # Copyright (C) 2017-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from urllib import parse
 import logging
 import iso8601
 
 from swh.lister.bitbucket.models import BitBucketModel
 from swh.lister.core.indexing_lister import SWHIndexingHttpLister
 
 
 logger = logging.getLogger(__name__)
 
 
 class BitBucketLister(SWHIndexingHttpLister):
     PATH_TEMPLATE = '/repositories?after=%s'
     MODEL = BitBucketModel
     LISTER_NAME = 'bitbucket'
     instance = 'bitbucket'
 
     def get_model_from_repo(self, repo):
         return {
             'uid': repo['uuid'],
             'indexable': repo['created_on'],
             'name': repo['name'],
             'full_name': repo['full_name'],
             'html_url': repo['links']['html']['href'],
             'origin_url': repo['links']['clone'][0]['href'],
             'origin_type': repo['scm'],
-            'description': repo['description']
         }
 
     def get_next_target_from_response(self, response):
         body = response.json()
         if 'next' in body:
             return parse.unquote(body['next'].split('after=')[1])
         else:
             return None
 
     def transport_response_simplified(self, response):
         repos = response.json()['values']
         return [self.get_model_from_repo(repo) for repo in repos]
 
     def request_uri(self, identifier):
         return super().request_uri(identifier or '1970-01-01')
 
     def is_within_bounds(self, inner, lower=None, upper=None):
         # values are expected to be str dates
         try:
             inner = iso8601.parse_date(inner)
             if lower:
                 lower = iso8601.parse_date(lower)
             if upper:
                 upper = iso8601.parse_date(upper)
             if lower is None and upper is None:
                 return True
             elif lower is None:
                 ret = inner <= upper
             elif upper is None:
                 ret = inner >= lower
             else:
                 ret = lower <= inner <= upper
         except Exception as e:
             logger.error(str(e) + ': %s, %s, %s' %
                          (('inner=%s%s' % (type(inner), inner)),
                           ('lower=%s%s' % (type(lower), lower)),
                           ('upper=%s%s' % (type(upper), upper)))
                          )
             raise
 
         return ret
diff --git a/swh/lister/core/models.py b/swh/lister/core/models.py
index 15a1441..d0c61c5 100644
--- a/swh/lister/core/models.py
+++ b/swh/lister/core/models.py
@@ -1,50 +1,48 @@
 # Copyright (C) 2015-2017 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 from datetime import datetime
 
 from sqlalchemy import Column, DateTime, Integer, String
 from sqlalchemy.ext.declarative import declarative_base, DeclarativeMeta
 
 from .abstractattribute import AbstractAttribute
 
 SQLBase = declarative_base()
 
 
 class ABCSQLMeta(abc.ABCMeta, DeclarativeMeta):
     pass
 
 
 class ModelBase(SQLBase, metaclass=ABCSQLMeta):
     """a common repository"""
     __abstract__ = True
     __tablename__ = AbstractAttribute
 
     uid = AbstractAttribute('Column(<uid_type>, primary_key=True)')
 
     name = Column(String, index=True)
     full_name = Column(String, index=True)
     html_url = Column(String)
     origin_url = Column(String)
     origin_type = Column(String)
-    description = Column(String)
 
     last_seen = Column(DateTime, nullable=False)
 
     task_id = Column(Integer)
-    origin_id = Column(Integer)
 
     def __init__(self, **kw):
         kw['last_seen'] = datetime.now()
         super().__init__(**kw)
 
 
 class IndexingModelBase(ModelBase, metaclass=ABCSQLMeta):
     __abstract__ = True
     __tablename__ = AbstractAttribute
 
     # The value used for sorting, segmenting, or api query paging,
     # because uids aren't always sequential.
     indexable = AbstractAttribute('Column(<indexable_type>, index=True)')
diff --git a/swh/lister/core/tests/test_lister.py b/swh/lister/core/tests/test_lister.py
index 5b93b64..a63260f 100644
--- a/swh/lister/core/tests/test_lister.py
+++ b/swh/lister/core/tests/test_lister.py
@@ -1,234 +1,234 @@
 # Copyright (C) 2017-2018 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import abc
 import time
 from unittest import TestCase
 from unittest.mock import Mock, patch
 
 import requests_mock
 from sqlalchemy import create_engine
 from testing.postgresql import Postgresql
 
 from swh.lister.core.abstractattribute import AbstractAttribute
 
 
 def noop(*args, **kwargs):
     pass
 
 
 @requests_mock.Mocker()
 class HttpListerTesterBase(abc.ABC):
     """Base testing class for subclasses of
 
            swh.lister.core.indexing_lister.SWHIndexingHttpLister.
            swh.lister.core.page_by_page_lister.PageByPageHttpLister
 
     See swh.lister.github.tests.test_gh_lister for an example of how
     to customize for a specific listing service.
 
     """
     Lister = AbstractAttribute('The lister class to test')
     test_re = AbstractAttribute('Compiled regex matching the server url. Must'
                                 ' capture the index value.')
     lister_subdir = AbstractAttribute('bitbucket, github, etc.')
     good_api_response_file = AbstractAttribute('Example good response body')
     bad_api_response_file = AbstractAttribute('Example bad response body')
     first_index = AbstractAttribute('First index in good_api_response')
     entries_per_page = AbstractAttribute('Number of results in good response')
     LISTER_NAME = 'fake-lister'
 
     # May need to override this if the headers are used for something
     def response_headers(self, request):
         return {}
 
     # May need to override this if the server uses non-standard rate limiting
     # method.
     # Please keep the requested retry delay reasonably low.
     def mock_rate_quota(self, n, request, context):
         self.rate_limit += 1
         context.status_code = 429
         context.headers['Retry-After'] = '1'
         return '{"error":"dummy"}'
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.rate_limit = 1
         self.response = None
         self.fl = None
         self.helper = None
         if self.__class__ != HttpListerTesterBase:
             self.run = TestCase.run.__get__(self, self.__class__)
         else:
             self.run = noop
 
     def request_index(self, request):
         m = self.test_re.search(request.path_url)
         if m and (len(m.groups()) > 0):
             return m.group(1)
         else:
             return None
 
     def mock_response(self, request, context):
         self.fl.reset_backoff()
         self.rate_limit = 1
         context.status_code = 200
         custom_headers = self.response_headers(request)
         context.headers.update(custom_headers)
         if self.request_index(request) == str(self.first_index):
             with open('swh/lister/%s/tests/%s' % (self.lister_subdir,
                                                   self.good_api_response_file),
                       'r', encoding='utf-8') as r:
                 return r.read()
         else:
             with open('swh/lister/%s/tests/%s' % (self.lister_subdir,
                                                   self.bad_api_response_file),
                       'r', encoding='utf-8') as r:
                 return r.read()
 
     def mock_limit_n_response(self, n, request, context):
         self.fl.reset_backoff()
         if self.rate_limit <= n:
             return self.mock_rate_quota(n, request, context)
         else:
             return self.mock_response(request, context)
 
     def mock_limit_once_response(self, request, context):
         return self.mock_limit_n_response(1, request, context)
 
     def mock_limit_twice_response(self, request, context):
         return self.mock_limit_n_response(2, request, context)
 
     def get_fl(self, override_config=None):
         """Retrieve an instance of fake lister (fl).
 
         """
         if override_config or self.fl is None:
             self.fl = self.Lister(api_baseurl='https://fakeurl',
                                   override_config=override_config)
             self.fl.INITIAL_BACKOFF = 1
 
         self.fl.reset_backoff()
         return self.fl
 
     def get_api_response(self):
         fl = self.get_fl()
         if self.response is None:
             self.response = fl.safely_issue_request(self.first_index)
         return self.response
 
     def test_is_within_bounds(self, http_mocker):
         fl = self.get_fl()
         self.assertFalse(fl.is_within_bounds(1, 2, 3))
         self.assertTrue(fl.is_within_bounds(2, 1, 3))
         self.assertTrue(fl.is_within_bounds(1, 1, 1))
         self.assertTrue(fl.is_within_bounds(1, None, None))
         self.assertTrue(fl.is_within_bounds(1, None, 2))
         self.assertTrue(fl.is_within_bounds(1, 0, None))
         self.assertTrue(fl.is_within_bounds("b", "a", "c"))
         self.assertFalse(fl.is_within_bounds("a", "b", "c"))
         self.assertTrue(fl.is_within_bounds("a", None, "c"))
         self.assertTrue(fl.is_within_bounds("a", None, None))
         self.assertTrue(fl.is_within_bounds("b", "a", None))
         self.assertFalse(fl.is_within_bounds("a", "b", None))
         self.assertTrue(fl.is_within_bounds("aa:02", "aa:01", "aa:03"))
         self.assertFalse(fl.is_within_bounds("aa:12", None, "aa:03"))
         with self.assertRaises(TypeError):
             fl.is_within_bounds(1.0, "b", None)
         with self.assertRaises(TypeError):
             fl.is_within_bounds("A:B", "A::B", None)
 
     def test_api_request(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_limit_twice_response)
         with patch.object(time, 'sleep', wraps=time.sleep) as sleepmock:
             self.get_api_response()
             self.assertEqual(sleepmock.call_count, 2)
 
     def test_repos_list(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         li = self.get_fl().transport_response_simplified(
             self.get_api_response()
         )
         self.assertIsInstance(li, list)
         self.assertEqual(len(li), self.entries_per_page)
 
     def test_model_map(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         fl = self.get_fl()
         li = fl.transport_response_simplified(self.get_api_response())
         di = li[0]
         self.assertIsInstance(di, dict)
         pubs = [k for k in vars(fl.MODEL).keys() if not k.startswith('_')]
         for k in pubs:
-            if k not in ['last_seen', 'task_id', 'origin_id', 'id']:
+            if k not in ['last_seen', 'task_id', 'id']:
                 self.assertIn(k, di)
 
     def disable_scheduler(self, fl):
         fl.schedule_missing_tasks = Mock(return_value=None)
 
     def disable_db(self, fl):
         fl.winnow_models = Mock(return_value=[])
         fl.db_inject_repo = Mock(return_value=fl.MODEL())
         fl.disable_deleted_repo_tasks = Mock(return_value=None)
 
     def test_fetch_none_nodb(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         fl = self.get_fl()
 
         self.disable_scheduler(fl)
         self.disable_db(fl)
 
         fl.run(min_bound=1, max_bound=1)  # stores no results
 
     def test_fetch_one_nodb(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         fl = self.get_fl()
 
         self.disable_scheduler(fl)
         self.disable_db(fl)
 
         fl.run(min_bound=self.first_index, max_bound=self.first_index)
 
     def test_fetch_multiple_pages_nodb(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         fl = self.get_fl()
 
         self.disable_scheduler(fl)
         self.disable_db(fl)
 
         fl.run(min_bound=self.first_index)
 
     def init_db(self, db, model):
         engine = create_engine(db.url())
         model.metadata.create_all(engine)
 
 
 class HttpListerTester(HttpListerTesterBase, abc.ABC):
     last_index = AbstractAttribute('Last index in good_api_response')
 
     @requests_mock.Mocker()
     def test_fetch_multiple_pages_yesdb(self, http_mocker):
         http_mocker.get(self.test_re, text=self.mock_response)
         initdb_args = Postgresql.DEFAULT_SETTINGS['initdb_args']
         initdb_args = ' '.join([initdb_args, '-E UTF-8'])
         db = Postgresql(initdb_args=initdb_args)
 
         fl = self.get_fl(override_config={
             'lister': {
                 'cls': 'local',
                 'args': {'db': db.url()}
                 }
             })
         self.init_db(db, fl.MODEL)
 
         self.disable_scheduler(fl)
 
         fl.run(min_bound=self.first_index)
 
         self.assertEqual(fl.db_last_index(), self.last_index)
         partitions = fl.db_partition_indices(5)
         self.assertGreater(len(partitions), 0)
         for k in partitions:
             self.assertLessEqual(len(k), 5)
             self.assertGreater(len(k), 0)
diff --git a/swh/lister/cran/lister.py b/swh/lister/cran/lister.py
index 4277e14..73eeac9 100644
--- a/swh/lister/cran/lister.py
+++ b/swh/lister/cran/lister.py
@@ -1,119 +1,118 @@
 # Copyright (C) 2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 import subprocess
 import json
 import logging
 import pkg_resources
 
 from swh.lister.cran.models import CRANModel
 
 from swh.scheduler.utils import create_task_dict
 from swh.core import utils
 from swh.lister.core.simple_lister import SimpleLister
 
 
 class CRANLister(SimpleLister):
     MODEL = CRANModel
     LISTER_NAME = 'cran'
     instance = 'cran'
 
     def task_dict(self, origin_type, origin_url, **kwargs):
         """Return task format dict
 
         This is overridden from the lister_base as more information is
         needed for the ingestion task creation.
         """
         return create_task_dict(
             'load-%s' % origin_type, 'recurring',
             kwargs.get('name'), origin_url, kwargs.get('version'),
             project_metadata=kwargs.get('description'))
 
     def r_script_request(self):
         """Runs R script which uses inbuilt API to return a json
             response containing data about all the R packages
 
         Returns:
             List of dictionaries
             example
             [
                 {'Package': 'A3',
                 'Version': '1.0.0',
                 'Title':
                     'Accurate, Adaptable, and Accessible Error Metrics for
                      Predictive\nModels',
                 'Description':
                     'Supplies tools for tabulating and analyzing the results
                      of predictive models. The methods employed are ... '
                 }
                 {'Package': 'abbyyR',
                 'Version': '0.5.4',
                 'Title':
                     'Access to Abbyy Optical Character Recognition (OCR) API',
                 'Description': 'Get text from images of text using Abbyy
                                  Cloud Optical Character\n ...'
                 }
                 ...
             ]
         """
         file_path = pkg_resources.resource_filename('swh.lister.cran',
                                                     'list_all_packages.R')
         response = subprocess.run(file_path, stdout=subprocess.PIPE,
                                   shell=False)
         return json.loads(response.stdout)
 
     def get_model_from_repo(self, repo):
         """Transform from repository representation to model
 
         """
         project_url = 'https://cran.r-project.org/src/contrib' \
                       '/%(Package)s_%(Version)s.tar.gz' % repo
         return {
             'uid': repo["Package"],
             'name': repo["Package"],
             'full_name': repo["Title"],
             'version': repo["Version"],
             'html_url': project_url,
             'origin_url': project_url,
             'origin_type': 'cran',
-            'description': repo["Description"]
         }
 
     def transport_response_simplified(self, response):
         """Transform response to list for model manipulation
 
         """
         return [self.get_model_from_repo(repo) for repo in response]
 
     def ingest_data(self, identifier, checks=False):
         """Rework the base ingest_data.
            Request server endpoint which gives all in one go.
 
            Simplify and filter response list of repositories.  Inject
            repo information into local db. Queue loader tasks for
            linked repositories.
 
         Args:
             identifier: Resource identifier (unused)
             checks (bool): Additional checks required (unused)
 
         """
         response = self.r_script_request()
         if not response:
             return response, []
         models_list = self.transport_response_simplified(response)
         models_list = self.filter_before_inject(models_list)
         all_injected = []
         for models in utils.grouper(models_list, n=10000):
             models = list(models)
             logging.debug('models: %s' % len(models))
             # inject into local db
             injected = self.inject_repo_data_into_db(models)
             # queue workers
             self.create_missing_origins_and_tasks(models, injected)
             all_injected.append(injected)
             # flush
             self.db_session.commit()
             self.db_session = self.mk_session()
 
         return response, all_injected
diff --git a/swh/lister/github/lister.py b/swh/lister/github/lister.py
index 245afde..866d0e5 100644
--- a/swh/lister/github/lister.py
+++ b/swh/lister/github/lister.py
@@ -1,53 +1,52 @@
 # Copyright (C) 2017-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import re
 import time
 
 from swh.lister.core.indexing_lister import SWHIndexingHttpLister
 from swh.lister.github.models import GitHubModel
 
 
 class GitHubLister(SWHIndexingHttpLister):
     PATH_TEMPLATE = '/repositories?since=%d'
     MODEL = GitHubModel
     API_URL_INDEX_RE = re.compile(r'^.*/repositories\?since=(\d+)')
     LISTER_NAME = 'github'
     instance = 'github'  # There is only 1 instance of such lister
 
     def get_model_from_repo(self, repo):
         return {
             'uid': repo['id'],
             'indexable': repo['id'],
             'name': repo['name'],
             'full_name': repo['full_name'],
             'html_url': repo['html_url'],
             'origin_url': repo['html_url'],
             'origin_type': 'git',
-            'description': repo['description'],
             'fork': repo['fork'],
         }
 
     def transport_quota_check(self, response):
         reqs_remaining = int(response.headers['X-RateLimit-Remaining'])
         if response.status_code == 403 and reqs_remaining == 0:
             reset_at = int(response.headers['X-RateLimit-Reset'])
             delay = min(reset_at - time.time(), 3600)
             return True, delay
         else:
             return False, 0
 
     def get_next_target_from_response(self, response):
         if 'next' in response.links:
             next_url = response.links['next']['url']
             return int(self.API_URL_INDEX_RE.match(next_url).group(1))
         else:
             return None
 
     def transport_response_simplified(self, response):
         repos = response.json()
         return [self.get_model_from_repo(repo) for repo in repos]
 
     def request_headers(self):
         return {'Accept': 'application/vnd.github.v3+json'}
diff --git a/swh/lister/gnu/lister.py b/swh/lister/gnu/lister.py
index 82bea89..f329682 100644
--- a/swh/lister/gnu/lister.py
+++ b/swh/lister/gnu/lister.py
@@ -1,223 +1,222 @@
 # Copyright (C) 2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import random
 import gzip
 import json
 import requests
 from pathlib import Path
 from collections import defaultdict
 
 from .models import GNUModel
 
 from swh.scheduler import utils
 from swh.lister.core.simple_lister import SimpleLister
 
 
 class GNULister(SimpleLister):
     MODEL = GNUModel
     LISTER_NAME = 'gnu'
     TREE_URL = 'https://ftp.gnu.org/tree.json.gz'
     BASE_URL = 'https://ftp.gnu.org'
     instance = 'gnu'
     tarballs = defaultdict(dict)  # Dict of key with project name value the
     # associated is list of tarballs of package to ingest from the gnu mirror
 
     def task_dict(self, origin_type, origin_url, **kwargs):
         """
         Return task format dict
 
         This is overridden from the lister_base as more information is
         needed for the ingestion task creation.
         """
         return utils.create_task_dict(
             'load-%s' % origin_type, 'recurring', kwargs.get('name'),
             origin_url, tarballs=self.tarballs[kwargs.get('name')])
 
     def get_file(self):
         '''
         Download and unzip tree.json.gz file and returns its content
         in JSON format
 
         Returns
         File content in dictionary format
         '''
         response = requests.get(self.TREE_URL,
                                 allow_redirects=True)
         uncompressed_content = gzip.decompress(response.content)
         return json.loads(uncompressed_content.decode('utf-8'))
 
     def safely_issue_request(self, identifier):
         '''
         Make network request to download the file which
         has file structure of the GNU website.
 
         Args:
             identifier: resource identifier
         Returns:
             Server response
         '''
         return self.get_file()
 
     def list_packages(self, response):
         """
         List the actual gnu origins with their names,url and the list
         of all the tarball for a package from the response.
 
         Args:
             response : File structure of the website
             in dictionary format
 
         Returns:
             A list of all the packages with their names, url of their root
             directory and the tarballs present for the particular package.
             [
                 {'name': '3dldf', 'url': 'https://ftp.gnu.org/gnu/3dldf/',
                  'tarballs':
                     [
                         {'archive':
                             'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
                         'date': '1071002600'},
                         {'archive':
                             'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
                         'date': '1071078759'}}
                     ]
                 },
                 {'name': '8sync', 'url': 'https://ftp.gnu.org/gnu/8sync/',
                 'tarballs':
                     [
                         {'archive':
                             'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
                         'date': '1461357336'},
                         {'archive':
                             'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz',
                         'date': '1480991830'}
                     ]
             ]
         """
         response = filter_directories(response)
         packages = []
         for directory in response:
             content = directory['contents']
             for repo in content:
                 if repo['type'] == 'directory':
                     package_url = '%s/%s/%s/' % (self.BASE_URL,
                                                  directory['name'],
                                                  repo['name'])
                     package_tarballs = find_tarballs(
                                       repo['contents'], package_url)
                     if package_tarballs != []:
                         repo_details = {
                             'name': repo['name'],
                             'url': package_url,
                             'time_modified': repo['time'],
                         }
                         self.tarballs[repo['name']] = package_tarballs
                         packages.append(repo_details)
         random.shuffle(packages)
         return packages
 
     def get_model_from_repo(self, repo):
         """Transform from repository representation to model
 
         """
         return {
             'uid': repo['name'],
             'name': repo['name'],
             'full_name': repo['name'],
             'html_url': repo['url'],
             'origin_url': repo['url'],
             'time_last_updated': repo['time_modified'],
             'origin_type': 'gnu',
-            'description': None,
         }
 
     def transport_response_simplified(self, response):
         """Transform response to list for model manipulation
 
         """
         return [self.get_model_from_repo(repo) for repo in response]
 
 
 def find_tarballs(package_file_structure, url):
     '''
     Recursively lists all the tarball present in the folder and
     subfolders for a particular package url.
 
     Args
         package_file_structure : File structure of the package root directory
         url : URL of the corresponding package
 
     Returns
         List of all the tarball urls and the last their time of update
         example-
         For a package called 3dldf
 
         [
             {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.3.tar.gz',
                                                         'date': '1071002600'}
             {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.4.tar.gz',
                                                         'date': '1071078759'}
             {'archive': 'https://ftp.gnu.org/gnu/3dldf/3DLDF-1.1.5.1.tar.gz',
                                                         'date': '1074278633'}
             ...
         ]
     '''
     tarballs = []
     for single_file in package_file_structure:
         file_type = single_file['type']
         file_name = single_file['name']
         if file_type == 'file':
             if file_extension_check(file_name):
                 tarballs .append({
                     "archive": url + file_name,
                     "date": single_file['time']
                 })
         # It will recursively check for tarballs in all sub-folders
         elif file_type == 'directory':
             tarballs_in_dir = find_tarballs(
                                       single_file['contents'],
                                       url + file_name + '/')
             tarballs.extend(tarballs_in_dir)
 
     return tarballs
 
 
 def filter_directories(response):
     '''
     Keep only gnu and old-gnu folders from JSON
     '''
     final_response = []
     file_system = response[0]['contents']
     for directory in file_system:
         if directory['name'] in ('gnu', 'old-gnu'):
             final_response.append(directory)
     return final_response
 
 
 def file_extension_check(file_name):
     '''
     Check for the extension of the file, if the file is of zip format of
     .tar.x format, where x could be anything, then returns true.
 
     Args:
         file_name : name of the file for which the extensions is needs to
                     be checked.
 
     Returns:
         True or False
 
     example
         file_extension_check('abc.zip')  will return True
         file_extension_check('abc.tar.gz')  will return True
         file_extension_check('abc.tar.gz.sig')  will return False
 
     '''
     file_suffixes = Path(file_name).suffixes
     if len(file_suffixes) == 1 and file_suffixes[-1] == '.zip':
         return True
     elif len(file_suffixes) > 1:
         if file_suffixes[-1] == '.zip' or file_suffixes[-2] == '.tar':
             return True
     return False
diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py
index d686851..bd8c4a6 100644
--- a/swh/lister/npm/lister.py
+++ b/swh/lister/npm/lister.py
@@ -1,158 +1,157 @@
 # Copyright (C) 2018-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 from urllib.parse import quote
 
 from swh.lister.core.indexing_lister import SWHIndexingHttpLister
 from swh.lister.npm.models import NpmModel
 from swh.scheduler.utils import create_task_dict
 
 
 class NpmListerBase(SWHIndexingHttpLister):
     """List packages available in the npm registry in a paginated way
     """
     MODEL = NpmModel
     LISTER_NAME = 'npm'
     instance = 'npm'
 
     def __init__(self, api_baseurl='https://replicate.npmjs.com',
                  per_page=1000, override_config=None):
         super().__init__(api_baseurl=api_baseurl,
                          override_config=override_config)
         self.per_page = per_page + 1
         self.PATH_TEMPLATE += '&limit=%s' % self.per_page
 
     @property
     def ADDITIONAL_CONFIG(self):
         """(Override) Add extra configuration
 
         """
         default_config = super().ADDITIONAL_CONFIG
         default_config['loading_task_policy'] = ('str', 'recurring')
         return default_config
 
     def get_model_from_repo(self, repo_name):
         """(Override) Transform from npm package name to model
 
         """
         package_url, package_metadata_url = self._compute_urls(repo_name)
         return {
             'uid': repo_name,
             'indexable': repo_name,
             'name': repo_name,
             'full_name': repo_name,
             'html_url': package_metadata_url,
             'origin_url': package_url,
             'origin_type': 'npm',
-            'description': None
         }
 
     def task_dict(self, origin_type, origin_url, **kwargs):
         """(Override) Return task dict for loading a npm package into the archive
 
         This is overridden from the lister_base as more information is
         needed for the ingestion task creation.
 
         """
         task_type = 'load-%s' % origin_type
         task_policy = self.config['loading_task_policy']
         package_name = kwargs.get('name')
         package_metadata_url = kwargs.get('html_url')
         return create_task_dict(task_type, task_policy,
                                 package_name, origin_url,
                                 package_metadata_url=package_metadata_url)
 
     def request_headers(self):
         """(Override) Set requests headers to send when querying the npm registry
 
         """
         return {'User-Agent': 'Software Heritage npm lister',
                 'Accept': 'application/json'}
 
     def _compute_urls(self, repo_name):
         """Return a tuple (package_url, package_metadata_url)
         """
         return (
             'https://www.npmjs.com/package/%s' % repo_name,
             # package metadata url needs to be escaped otherwise some requests
             # may fail (for instance when a package name contains '/')
             '%s/%s' % (self.api_baseurl, quote(repo_name, safe=''))
         )
 
     def string_pattern_check(self, inner, lower, upper=None):
         """ (Override) Inhibit the effect of that method as packages indices
         correspond to package names and thus do not respect any kind
         of fixed length string pattern
         """
         pass
 
 
 class NpmLister(NpmListerBase):
     """List all packages available in the npm registry in a paginated way
     """
     PATH_TEMPLATE = '/_all_docs?startkey="%s"'
 
     def get_next_target_from_response(self, response):
         """(Override) Get next npm package name to continue the listing
 
         """
         repos = response.json()['rows']
         return repos[-1]['id'] if len(repos) == self.per_page else None
 
     def transport_response_simplified(self, response):
         """(Override) Transform npm registry response to list for model manipulation
 
         """
         repos = response.json()['rows']
         if len(repos) == self.per_page:
             repos = repos[:-1]
         return [self.get_model_from_repo(repo['id']) for repo in repos]
 
 
 class NpmIncrementalLister(NpmListerBase):
     """List packages in the npm registry, updated since a specific
     update_seq value of the underlying CouchDB database, in a paginated way
     """
     PATH_TEMPLATE = '/_changes?since=%s'
 
     @property
     def CONFIG_BASE_FILENAME(self):  # noqa: N802
         return 'lister_npm_incremental'
 
     def get_next_target_from_response(self, response):
         """(Override) Get next npm package name to continue the listing
 
         """
         repos = response.json()['results']
         return repos[-1]['seq'] if len(repos) == self.per_page else None
 
     def transport_response_simplified(self, response):
         """(Override) Transform npm registry response to list for model manipulation
 
         """
         repos = response.json()['results']
         if len(repos) == self.per_page:
             repos = repos[:-1]
         return [self.get_model_from_repo(repo['id']) for repo in repos]
 
     def filter_before_inject(self, models_list):
         """(Override) Filter out documents in the CouchDB database
         not related to a npm package
         """
         models_filtered = []
         for model in models_list:
             package_name = model['name']
             # document related to CouchDB internals
             if package_name.startswith('_design/'):
                 continue
             models_filtered.append(model)
         return models_filtered
 
     def disable_deleted_repo_tasks(self, start, end, keep_these):
         """(Override) Disable the processing performed by that method
         as it is not relevant in this incremental lister context
         and it raises and exception due to a different index type
         (int instead of str)
         """
         pass
diff --git a/swh/lister/phabricator/lister.py b/swh/lister/phabricator/lister.py
index dd30308..e5efc4c 100644
--- a/swh/lister/phabricator/lister.py
+++ b/swh/lister/phabricator/lister.py
@@ -1,179 +1,178 @@
 # Copyright (C) 2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import logging
 
 import urllib.parse
 
 from swh.lister.core.indexing_lister import SWHIndexingHttpLister
 from swh.lister.phabricator.models import PhabricatorModel
 from collections import defaultdict
 
 logger = logging.getLogger(__name__)
 
 
 class PhabricatorLister(SWHIndexingHttpLister):
     PATH_TEMPLATE = '?order=oldest&attachments[uris]=1&after=%s'
     MODEL = PhabricatorModel
     LISTER_NAME = 'phabricator'
 
     def __init__(self, forge_url, instance=None, api_token=None,
                  override_config=None):
         if forge_url.endswith("/"):
             forge_url = forge_url[:-1]
         self.forge_url = forge_url
         api_baseurl = '%s/api/diffusion.repository.search' % forge_url
         self.api_token = api_token
         if not instance:
             instance = urllib.parse.urlparse(forge_url).hostname
         self.instance = instance
         super().__init__(api_baseurl=api_baseurl,
                          override_config=override_config)
 
     def _build_query_params(self, params, api_token):
         """Build query params to include the forge's api token
 
         Returns:
             updated params dict with 'params' entry.
 
         """
         params.update({'params': {'api.token': api_token}})
         return params
 
     def request_params(self, identifier):
         """Override the default params behavior to retrieve the api token
 
         Credentials are stored as:
 
         credentials:
           phabricator:
             <instance>:
               - username: <account>
                 password: <api-token>
 
         """
         params = {}
         params['headers'] = self.request_headers() or {}
         if self.api_token:
             return self._build_query_params(params, self.api_token)
         instance_creds = self.request_instance_credentials()
         if not instance_creds:
             raise ValueError(
                 'Phabricator forge needs authentication credential to list.')
         api_token = instance_creds[0]['password']
         return self._build_query_params(params, api_token)
 
     def request_headers(self):
         """
         (Override) Set requests headers to send when querying the
         Phabricator API
         """
         return {'User-Agent': 'Software Heritage phabricator lister',
                 'Accept': 'application/json'}
 
     def get_model_from_repo(self, repo):
         url = get_repo_url(repo['attachments']['uris']['uris'])
         if url is None:
             return None
         return {
             'uid': self.forge_url + str(repo['id']),
             'indexable': repo['id'],
             'name': repo['fields']['shortName'],
             'full_name': repo['fields']['name'],
             'html_url': url,
             'origin_url': url,
-            'description': None,
             'origin_type': repo['fields']['vcs'],
             'instance': self.instance,
         }
 
     def get_next_target_from_response(self, response):
         body = response.json()['result']['cursor']
         if body['after'] != 'null':
             return body['after']
         else:
             return None
 
     def transport_response_simplified(self, response):
         repos = response.json()
         if repos['result'] is None:
             raise ValueError(
                 'Problem during information fetch: %s' % repos['error_code'])
         repos = repos['result']['data']
         return [self.get_model_from_repo(repo) for repo in repos]
 
     def filter_before_inject(self, models_list):
         """
         (Overrides) SWHIndexingLister.filter_before_inject
         Bounds query results by this Lister's set max_index.
         """
         models_list = [m for m in models_list if m is not None]
         return super().filter_before_inject(models_list)
 
     def _bootstrap_repositories_listing(self):
         """
         Method called when no min_bound value has been provided
         to the lister. Its purpose is to:
 
             1. get the first repository data hosted on the Phabricator
                instance
 
             2. inject them into the lister database
 
             3. return the first repository index to start the listing
                after that value
 
         Returns:
              int: The first repository index
         """
         params = '&order=oldest&limit=1'
         response = self.safely_issue_request(params)
         models_list = self.transport_response_simplified(response)
         self.max_index = models_list[0]['indexable']
         models_list = self.filter_before_inject(models_list)
         injected = self.inject_repo_data_into_db(models_list)
         self.schedule_missing_tasks(models_list, injected)
         return self.max_index
 
     def run(self, min_bound=None, max_bound=None):
         """
         (Override) Run the lister on the specified Phabricator instance
 
         Args:
             min_bound (int): Optional repository index to start the listing
                 after it
             max_bound (int): Optional repository index to stop the listing
                 after it
         """
         # initial call to the lister, we need to bootstrap it in that case
         if min_bound is None:
             min_bound = self._bootstrap_repositories_listing()
         super().run(min_bound, max_bound)
 
 
 def get_repo_url(attachments):
     """
     Return url for a hosted repository from its uris attachments according
     to the following priority lists:
     * protocol: https > http
     * identifier: shortname > callsign > id
     """
     processed_urls = defaultdict(dict)
     for uri in attachments:
         protocol = uri['fields']['builtin']['protocol']
         url = uri['fields']['uri']['effective']
         identifier = uri['fields']['builtin']['identifier']
         if protocol in ('http', 'https'):
             processed_urls[protocol][identifier] = url
         elif protocol is None:
             for protocol in ('https', 'http'):
                 if url.startswith(protocol):
                     processed_urls[protocol]['undefined'] = url
                 break
     for protocol in ['https', 'http']:
         for identifier in ['shortname', 'callsign', 'id', 'undefined']:
             if (protocol in processed_urls and
                     identifier in processed_urls[protocol]):
                 return processed_urls[protocol][identifier]
     return None
diff --git a/swh/lister/pypi/lister.py b/swh/lister/pypi/lister.py
index fedffd0..f45a3a6 100644
--- a/swh/lister/pypi/lister.py
+++ b/swh/lister/pypi/lister.py
@@ -1,77 +1,76 @@
 # Copyright (C) 2018-2019 the Software Heritage developers
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
 import random
 import xmltodict
 
 from .models import PyPIModel
 
 from swh.scheduler import utils
 from swh.lister.core.simple_lister import SimpleLister
 from swh.lister.core.lister_transports import ListerOnePageApiTransport
 
 
 class PyPILister(ListerOnePageApiTransport, SimpleLister):
     MODEL = PyPIModel
     LISTER_NAME = 'pypi'
     PAGE = 'https://pypi.org/simple/'
     instance = 'pypi'  # As of today only the main pypi.org is used
 
     def __init__(self, override_config=None):
         ListerOnePageApiTransport .__init__(self)
         SimpleLister.__init__(self, override_config=override_config)
 
     def task_dict(self, origin_type, origin_url, **kwargs):
         """(Override) Return task format dict
 
         This is overridden from the lister_base as more information is
         needed for the ingestion task creation.
 
         """
         _type = 'load-%s' % origin_type
         _policy = 'recurring'
         project_name = kwargs.get('name')
         project_metadata_url = kwargs.get('html_url')
         return utils.create_task_dict(
             _type, _policy, project_name, origin_url,
             project_metadata_url=project_metadata_url)
 
     def list_packages(self, response):
         """(Override) List the actual pypi origins from the response.
 
         """
         result = xmltodict.parse(response.content)
         _packages = [p['#text'] for p in result['html']['body']['a']]
         random.shuffle(_packages)
         return _packages
 
     def _compute_urls(self, repo_name):
         """Returns a tuple (project_url, project_metadata_url)
 
         """
         return (
             'https://pypi.org/project/%s/' % repo_name,
             'https://pypi.org/pypi/%s/json' % repo_name
         )
 
     def get_model_from_repo(self, repo_name):
         """(Override) Transform from repository representation to model
 
         """
         project_url, project_url_meta = self._compute_urls(repo_name)
         return {
             'uid': repo_name,
             'name': repo_name,
             'full_name': repo_name,
             'html_url': project_url_meta,
             'origin_url': project_url,
             'origin_type': 'pypi',
-            'description': None,
         }
 
     def transport_response_simplified(self, response):
         """(Override) Transform response to list for model manipulation
 
         """
         return [self.get_model_from_repo(repo_name) for repo_name in response]