diff --git a/swh/lister/npm/lister.py b/swh/lister/npm/lister.py --- a/swh/lister/npm/lister.py +++ b/swh/lister/npm/lister.py @@ -11,6 +11,7 @@ class NpmListerBase(IndexingHttpLister): """List packages available in the npm registry in a paginated way + """ MODEL = NpmModel LISTER_NAME = 'npm' @@ -47,7 +48,8 @@ } def task_dict(self, origin_type, origin_url, **kwargs): - """(Override) Return task dict for loading a npm package into the archive + """(Override) Return task dict for loading a npm package into the + archive. This is overridden from the lister_base as more information is needed for the ingestion task creation. @@ -62,7 +64,8 @@ package_metadata_url=package_metadata_url) def request_headers(self): - """(Override) Set requests headers to send when querying the npm registry + """(Override) Set requests headers to send when querying the npm + registry. """ return {'User-Agent': 'Software Heritage npm lister', @@ -82,12 +85,14 @@ """ (Override) Inhibit the effect of that method as packages indices correspond to package names and thus do not respect any kind of fixed length string pattern + """ pass class NpmLister(NpmListerBase): """List all packages available in the npm registry in a paginated way + """ PATH_TEMPLATE = '/_all_docs?startkey="%s"' @@ -110,7 +115,8 @@ class NpmIncrementalLister(NpmListerBase): """List packages in the npm registry, updated since a specific - update_seq value of the underlying CouchDB database, in a paginated way + update_seq value of the underlying CouchDB database, in a paginated way. + """ PATH_TEMPLATE = '/_changes?since=%s' @@ -119,14 +125,15 @@ return 'lister_npm_incremental' def get_next_target_from_response(self, response): - """(Override) Get next npm package name to continue the listing + """(Override) Get next npm package name to continue the listing. """ repos = response.json()['results'] return repos[-1]['seq'] if len(repos) == self.per_page else None def transport_response_simplified(self, response): - """(Override) Transform npm registry response to list for model manipulation + """(Override) Transform npm registry response to list for model + manipulation. """ repos = response.json()['results'] @@ -136,7 +143,8 @@ def filter_before_inject(self, models_list): """(Override) Filter out documents in the CouchDB database - not related to a npm package + not related to a npm package. + """ models_filtered = [] for model in models_list: @@ -148,9 +156,9 @@ return models_filtered def disable_deleted_repo_tasks(self, start, end, keep_these): - """(Override) Disable the processing performed by that method - as it is not relevant in this incremental lister context - and it raises and exception due to a different index type - (int instead of str) + """(Override) Disable the processing performed by that method as it is + not relevant in this incremental lister context. It also raises an + exception due to a different index type (int instead of str). + """ pass diff --git a/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22%22,limit=11 b/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22%22,limit=11 new file mode 100644 --- /dev/null +++ b/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22%22,limit=11 @@ -0,0 +1,83 @@ +{ + "total_rows": 839080, + "offset": 422482, + "rows": [ + { + "id": "jquery", + "key": "jquery", + "value": { + "rev": "212-2eac7c93af4c8bccdf7317739f0319b6" + } + }, + { + "id": "jquery-1.8", + "key": "jquery-1.8", + "value": { + "rev": "1-711ded49a7453adce85ce7a51c2157de" + } + }, + { + "id": "jquery-1x", + "key": "jquery-1x", + "value": { + "rev": "1-c53fa04d9c8fb231336704508732c287" + } + }, + { + "id": "jquery-2-typescript-async-await-adapter", + "key": "jquery-2-typescript-async-await-adapter", + "value": { + "rev": "8-5cfb484e9afaa6e326a97240fccd8f93" + } + }, + { + "id": "jquery-accessible-accordion-aria", + "key": "jquery-accessible-accordion-aria", + "value": { + "rev": "15-9fc0df7cb2f1cd1001e2da302443b56e" + } + }, + { + "id": "jquery-accessible-autocomplete-list-aria", + "key": "jquery-accessible-autocomplete-list-aria", + "value": { + "rev": "8-961b382442c1a5bafe58f0e05424701d" + } + }, + { + "id": "jquery-accessible-carrousel-aria", + "key": "jquery-accessible-carrousel-aria", + "value": { + "rev": "9-f33f59d7f601bafe023bd711b551282b" + } + }, + { + "id": "jquery-accessible-dialog-tooltip-aria", + "key": "jquery-accessible-dialog-tooltip-aria", + "value": { + "rev": "12-0a7b5ba6f7717c2c6603cabdb29de9ba" + } + }, + { + "id": "jquery-accessible-hide-show-aria", + "key": "jquery-accessible-hide-show-aria", + "value": { + "rev": "10-5a03c47a8995b08246e4bc103782dafa" + } + }, + { + "id": "jquery-accessible-modal-window-aria", + "key": "jquery-accessible-modal-window-aria", + "value": { + "rev": "18-50266e260f6b807019cfcfcd3a3685ab" + } + }, + { + "id": "jquery-accessible-simple-tooltip-aria", + "key": "jquery-accessible-simple-tooltip-aria", + "value": { + "rev": "6-ea71aa37760790dc603b56117f054e1b" + } + } + ] +} diff --git a/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22jquery-accessible-simple-tooltip-aria%22,limit=11 b/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22jquery-accessible-simple-tooltip-aria%22,limit=11 new file mode 100644 --- /dev/null +++ b/swh/lister/npm/tests/data/https_replicate.npmjs.com/_all_docs,startkey=%22jquery-accessible-simple-tooltip-aria%22,limit=11 @@ -0,0 +1,83 @@ +{ + "total_rows": 839080, + "offset": 422482, + "rows": [ + { + "id": "jquery-accessible-simple-tooltip-aria", + "key": "jquery-accessible-simple-tooltip-aria", + "value": { + "rev": "6-ea71aa37760790dc603b56117f054e1b" + } + }, + { + "id": "jquery-accessible-subnav-dropdown", + "key": "jquery-accessible-subnav-dropdown", + "value": { + "rev": "2-496f017a9ac243655225e43b5697b09b" + } + }, + { + "id": "jquery-accessible-tabpanel-aria", + "key": "jquery-accessible-tabpanel-aria", + "value": { + "rev": "11-659971471e6ac0fbb3b2f78ad208722a" + } + }, + { + "id": "jquery-accessible-tabs-umd", + "key": "jquery-accessible-tabs-umd", + "value": { + "rev": "1-f92015de5bb36e411d8c0940cca2883f" + } + }, + { + "id": "jquery-active-descendant", + "key": "jquery-active-descendant", + "value": { + "rev": "8-79aed7a6cbca4e1f3c3ac0570d0290de" + } + }, + { + "id": "jquery-ada-validation", + "key": "jquery-ada-validation", + "value": { + "rev": "1-9aab9629027c29fbece90485dd9d3112" + } + }, + { + "id": "jquery-adaptText", + "key": "jquery-adaptText", + "value": { + "rev": "3-2e15fc801ea8235b9180a3defc782ed0" + } + }, + { + "id": "jquery-adapttr", + "key": "jquery-adapttr", + "value": { + "rev": "6-74585f2d4be60b3f493585a6d28b90bc" + } + }, + { + "id": "jquery-add-prefixed-class", + "key": "jquery-add-prefixed-class", + "value": { + "rev": "1-9e43aee9758504b3f5271e9804a95f20" + } + }, + { + "id": "jquery-address", + "key": "jquery-address", + "value": { + "rev": "1-64173ede32157b26f4de910ad0f49590" + } + }, + { + "id": "jquery-address-suggestion", + "key": "jquery-address-suggestion", + "value": { + "rev": "6-18d9df51d472c365bcd84a61c9105774" + } + } + ] +} diff --git a/swh/lister/npm/tests/test_lister.py b/swh/lister/npm/tests/test_lister.py --- a/swh/lister/npm/tests/test_lister.py +++ b/swh/lister/npm/tests/test_lister.py @@ -12,6 +12,8 @@ from swh.lister.core.tests.test_lister import HttpListerTesterBase from swh.lister.npm.lister import NpmLister, NpmIncrementalLister +from typing import Any, List + logger = logging.getLogger(__name__) @@ -50,15 +52,12 @@ pass -def test_lister_npm_basic_listing(swh_listers, requests_mock_datadir): - lister = swh_listers['npm'] - - lister.run() +def check_tasks(tasks: List[Any]): + """Ensure scheduled tasks are in the expected format. - r = lister.scheduler.search_tasks(task_type='load-npm') - assert len(r) == 100 - for row in r: + """ + for row in tasks: logger.debug('row: %s', row) assert row['type'] == 'load-npm' # arguments check @@ -76,3 +75,28 @@ assert row['policy'] == 'recurring' assert row['priority'] is None + + +def test_lister_npm_basic_listing(swh_listers, requests_mock_datadir): + lister = swh_listers['npm'] + + lister.run() + + tasks = lister.scheduler.search_tasks(task_type='load-npm') + assert len(tasks) == 100 + + check_tasks(tasks) + + +def test_lister_npm_listing_pagination(swh_listers, requests_mock_datadir): + lister = swh_listers['npm'] + # Patch per page pagination + lister.per_page = 10 + 1 + lister.PATH_TEMPLATE = lister.PATH_TEMPLATE.replace( + '&limit=1001', '&limit=%s' % lister.per_page) + lister.run() + + tasks = lister.scheduler.search_tasks(task_type='load-npm') + assert len(tasks) == 2 * 10 # only 2 files with 10 results each + + check_tasks(tasks)