diff --git a/requirements-test.txt b/requirements-test.txt --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,4 +1,5 @@ pytest +pytest-mock requests_mock sqlalchemy-stubs testing.postgresql diff --git a/swh/lister/bitbucket/__init__.py b/swh/lister/bitbucket/__init__.py --- a/swh/lister/bitbucket/__init__.py +++ b/swh/lister/bitbucket/__init__.py @@ -1,14 +1,13 @@ -# Copyright (C) 2019 the Software Heritage developers +# Copyright (C) 2019-2021 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information def register(): - from .lister import BitBucketLister - from .models import BitBucketModel + from .lister import BitbucketLister return { - "models": [BitBucketModel], - "lister": BitBucketLister, + "models": [], + "lister": BitbucketLister, "task_modules": ["%s.tasks" % __name__], } diff --git a/swh/lister/bitbucket/lister.py b/swh/lister/bitbucket/lister.py --- a/swh/lister/bitbucket/lister.py +++ b/swh/lister/bitbucket/lister.py @@ -1,85 +1,201 @@ -# Copyright (C) 2017-2019 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from datetime import datetime, timezone +from dataclasses import asdict, dataclass +from datetime import datetime import logging -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterator, List, Optional from urllib import parse import iso8601 -from requests import Response +import requests +from tenacity.before_sleep import before_sleep_log -from swh.lister.bitbucket.models import BitBucketModel -from swh.lister.core.indexing_lister import IndexingHttpLister +from swh.lister.utils import throttling_retry +from swh.scheduler.interface import SchedulerInterface +from swh.scheduler.model import ListedOrigin + +from .. import USER_AGENT +from ..pattern import CredentialsType, Lister logger = logging.getLogger(__name__) -class BitBucketLister(IndexingHttpLister): - PATH_TEMPLATE = "/repositories?after=%s" - MODEL = BitBucketModel +@dataclass +class BitbucketListerState: + """State of Bitbucket lister""" + + last_repo_cdate: Optional[datetime] = None + """Creation date and time of the last listed repository during an + incremental pass""" + + +class BitbucketLister(Lister[BitbucketListerState, List[Dict[str, Any]]]): + """List origins from Bitbucket using its REST API. + + Bitbucket API has the following rate-limit configuration: + + * 60 requests per hour for anonymous users + + * 1000 requests per hour for authenticated users + + The lister is working in anonymous mode by default but Bitbucket account + credentials can be provided to perform authenticated requests. + """ + LISTER_NAME = "bitbucket" - DEFAULT_URL = "https://api.bitbucket.org/2.0" - instance = "bitbucket" - default_min_bound = datetime.fromtimestamp(0, timezone.utc) # type: Any + INSTANCE = "bitbucket" + + API_URL = "https://api.bitbucket.org/2.0/repositories" def __init__( - self, url: str = None, override_config=None, per_page: int = 100 - ) -> None: - super().__init__(url=url, override_config=override_config) - per_page = self.config.get("per_page", per_page) - - self.PATH_TEMPLATE = "%s&pagelen=%s" % (self.PATH_TEMPLATE, per_page) - - def get_model_from_repo(self, repo: Dict) -> Dict[str, Any]: - return { - "uid": repo["uuid"], - "indexable": iso8601.parse_date(repo["created_on"]), - "name": repo["name"], - "full_name": repo["full_name"], - "html_url": repo["links"]["html"]["href"], - "origin_url": repo["links"]["clone"][0]["href"], - "origin_type": repo["scm"], + self, + scheduler: SchedulerInterface, + page_size: int = 1000, + incremental: bool = True, + credentials: CredentialsType = None, + ): + super().__init__( + scheduler=scheduler, + credentials=credentials, + url=self.API_URL, + instance=self.INSTANCE, + ) + + self.incremental = incremental + + self.url_params = { + "pagelen": page_size, + # only return needed JSON fields in bitbucket API responses + # (also prevent errors 500 when listing) + "fields": ( + "next,values.links.clone.href,values.scm,values.updated_on," + "values.created_on" + ), } - def get_next_target_from_response(self, response: Response) -> Optional[datetime]: - """This will read the 'next' link from the api response if any - and return it as a datetime. - - Args: - response (Response): requests' response from api call - - Returns: - next date as a datetime + self.session = requests.Session() + self.session.headers.update( + {"Accept": "application/json", "User-Agent": USER_AGENT} + ) + + if len(self.credentials) > 0: + if len(self.credentials) > 1: + logger.warning( + "Bitbucket lister support only one username:password" + " pair as of now. Will use the first one." + ) + cred = self.credentials[0] + self.set_credentials(cred["username"], cred["password"]) + + def state_from_dict(self, d: Dict[str, Any]) -> BitbucketListerState: + last_repo_cdate = d.get("last_repo_cdate") + if last_repo_cdate is not None: + d["last_repo_cdate"] = iso8601.parse_date(last_repo_cdate) + return BitbucketListerState(**d) + + def state_to_dict(self, state: BitbucketListerState) -> Dict[str, Any]: + d = asdict(state) + last_repo_cdate = d.get("last_repo_cdate") + if last_repo_cdate is not None: + d["last_repo_cdate"] = last_repo_cdate.isoformat() + return d + + def set_credentials(self, username: Optional[str], password: Optional[str]) -> None: + """Set basic authentication headers with given credentials.""" + if username is not None and password is not None: + self.session.auth = (username, password) + + @throttling_retry(before_sleep=before_sleep_log(logger, logging.DEBUG)) + def page_request(self, last_repo_cdate: str) -> requests.Response: + + self.url_params["after"] = last_repo_cdate + logger.debug("Fetching URL %s with params %s", self.url, self.url_params) + + response = self.session.get(self.url, params=self.url_params) + + if response.status_code != 200: + logger.warning( + "Unexpected HTTP status code %s on %s: %s", + response.status_code, + response.url, + response.content, + ) + response.raise_for_status() + + return response + + def get_pages(self) -> Iterator[List[Dict[str, Any]]]: + + last_repo_cdate: str = "1970-01-01" + if ( + self.incremental + and self.state is not None + and self.state.last_repo_cdate is not None + ): + last_repo_cdate = self.state.last_repo_cdate.isoformat() + + while True: + body = self.page_request(last_repo_cdate).json() + + yield body["values"] + + next_page_url = body.get("next") + if next_page_url is not None: + next_page_url = parse.urlparse(next_page_url) + if not next_page_url.query: + logger.warning("Failed to parse url %s", next_page_url) + break + last_repo_cdate = parse.parse_qs(next_page_url.query)["after"][0] + else: + # last page + break + + def get_origins_from_page( + self, page: List[Dict[str, Any]] + ) -> Iterator[ListedOrigin]: + """Convert a page of Bitbucket repositories into a list of ListedOrigins. """ - body = response.json() - next_ = body.get("next") - if next_ is not None: - next_ = parse.urlparse(next_) - return iso8601.parse_date(parse.parse_qs(next_.query)["after"][0]) - return None - - def transport_response_simplified(self, response: Response) -> List[Dict[str, Any]]: - repos = response.json()["values"] - return [self.get_model_from_repo(repo) for repo in repos] - - def request_uri(self, identifier: datetime) -> str: # type: ignore - identifier_str = parse.quote(identifier.isoformat()) - return super().request_uri(identifier_str or "1970-01-01") - - def is_within_bounds( - self, inner: int, lower: Optional[int] = None, upper: Optional[int] = None - ) -> bool: - # values are expected to be datetimes - if lower is None and upper is None: - ret = True - elif lower is None: - ret = inner <= upper # type: ignore - elif upper is None: - ret = inner >= lower - else: - ret = lower <= inner <= upper - return ret + assert self.lister_obj.id is not None + + for repo in page: + last_update = iso8601.parse_date(repo["updated_on"]) + origin_url = repo["links"]["clone"][0]["href"] + origin_type = repo["scm"] + + yield ListedOrigin( + lister_id=self.lister_obj.id, + url=origin_url, + visit_type=origin_type, + last_update=last_update, + ) + + def commit_page(self, page: List[Dict[str, Any]]) -> None: + """Update the currently stored state using the latest listed page.""" + if self.incremental: + last_repo = page[-1] + last_repo_cdate = iso8601.parse_date(last_repo["created_on"]) + + if ( + self.state.last_repo_cdate is None + or last_repo_cdate > self.state.last_repo_cdate + ): + self.state.last_repo_cdate = last_repo_cdate + + def finalize(self) -> None: + if self.incremental: + scheduler_state = self.get_state_from_scheduler() + + if self.state.last_repo_cdate is None: + return + + # Update the lister state in the backend only if the last seen id of + # the current run is higher than that stored in the database. + if ( + scheduler_state.last_repo_cdate is None + or self.state.last_repo_cdate > scheduler_state.last_repo_cdate + ): + self.updated = True diff --git a/swh/lister/bitbucket/models.py b/swh/lister/bitbucket/models.py deleted file mode 100644 --- a/swh/lister/bitbucket/models.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2017-2019 the Software Heritage developers -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - -from sqlalchemy import Column, DateTime, String - -from swh.lister.core.models import IndexingModelBase - - -class BitBucketModel(IndexingModelBase): - """a BitBucket repository""" - - __tablename__ = "bitbucket_repo" - - uid = Column(String, primary_key=True) - indexable = Column(DateTime(timezone=True), index=True) diff --git a/swh/lister/bitbucket/tasks.py b/swh/lister/bitbucket/tasks.py --- a/swh/lister/bitbucket/tasks.py +++ b/swh/lister/bitbucket/tasks.py @@ -1,53 +1,36 @@ -# Copyright (C) 2017-2019 the Software Heritage developers +# Copyright (C) 2017-2021 the Software Heritage developers # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import random +from typing import Optional -from celery import group, shared_task +from celery import shared_task -from .lister import BitBucketLister - -GROUP_SPLIT = 10000 +from .lister import BitbucketLister @shared_task(name=__name__ + ".IncrementalBitBucketLister") -def list_bitbucket_incremental(**lister_args): - """Incremental update of the BitBucket forge""" - lister = BitBucketLister(**lister_args) - return lister.run(min_bound=lister.db_last_index(), max_bound=None) - - -@shared_task(name=__name__ + ".RangeBitBucketLister") -def _range_bitbucket_lister(start, end, **lister_args): - lister = BitBucketLister(**lister_args) - return lister.run(min_bound=start, max_bound=end) - - -@shared_task(name=__name__ + ".FullBitBucketRelister", bind=True) -def list_bitbucket_full(self, split=None, **lister_args): - """Full update of the BitBucket forge - - It's not to be called for an initial listing. - - """ - lister = BitBucketLister(**lister_args) - ranges = lister.db_partition_indices(split or GROUP_SPLIT) - if not ranges: - self.log.info("Nothing to list") - return - - random.shuffle(ranges) - promise = group( - _range_bitbucket_lister.s(minv, maxv, **lister_args) for minv, maxv in ranges - )() - self.log.debug("%s OK (spawned %s subtasks)", (self.name, len(ranges))) - try: - promise.save() # so that we can restore the GroupResult in tests - except (NotImplementedError, AttributeError): - self.log.info("Unable to call save_group with current result backend.") - # FIXME: what to do in terms of return here? - return promise.id +def list_bitbucket_incremental( + page_size: Optional[int] = None, + username: Optional[str] = None, + password: Optional[str] = None, +): + """Incremental listing of the public Bitbucket repositories.""" + lister = BitbucketLister.from_configfile(page_size=page_size, incremental=True) + lister.set_credentials(username, password) + return lister.run().dict() + + +@shared_task(name=__name__ + ".FullBitBucketRelister") +def list_bitbucket_full( + page_size: Optional[int] = None, + username: Optional[str] = None, + password: Optional[str] = None, +): + """Full listing of the public Bitbucket repositories.""" + lister = BitbucketLister.from_configfile(page_size=page_size, incremental=False) + lister.set_credentials(username, password) + return lister.run().dict() @shared_task(name=__name__ + ".ping") diff --git a/swh/lister/bitbucket/tests/conftest.py b/swh/lister/bitbucket/tests/conftest.py --- a/swh/lister/bitbucket/tests/conftest.py +++ b/swh/lister/bitbucket/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/lister/bitbucket/tests/data/bb_api_repositories_page1.json b/swh/lister/bitbucket/tests/data/bb_api_repositories_page1.json new file mode 100644 --- /dev/null +++ b/swh/lister/bitbucket/tests/data/bb_api_repositories_page1.json @@ -0,0 +1,124 @@ +{ + "values": [{ + "scm": "git", + "updated_on": "2014-11-16T23:19:16.674082+00:00", + "created_on": "2011-06-06T03:40:09.505792+00:00", + "slug": "xwork", + "links": { + "clone": [{ + "href": "https://bitbucket.org/opensymphony/xwork.git" + }, { + "href": "git@bitbucket.org:opensymphony/xwork.git" + }] + } + }, { + "scm": "git", + "updated_on": "2013-08-16T05:17:12.385393+00:00", + "created_on": "2011-06-07T02:25:57.515877+00:00", + "slug": "webwork", + "links": { + "clone": [{ + "href": "https://bitbucket.org/opensymphony/webwork.git" + }, { + "href": "git@bitbucket.org:opensymphony/webwork.git" + }] + } + }, { + "scm": "git", + "updated_on": "2017-02-05T20:25:16.398281+00:00", + "created_on": "2011-06-07T04:13:28.097554+00:00", + "slug": "propertyset", + "links": { + "clone": [{ + "href": "https://bitbucket.org/opensymphony/propertyset.git" + }, { + "href": "git@bitbucket.org:opensymphony/propertyset.git" + }] + } + }, { + "scm": "git", + "updated_on": "2012-07-06T23:05:13.437602+00:00", + "created_on": "2011-06-07T04:15:47.909191+00:00", + "slug": "quartz", + "links": { + "clone": [{ + "href": "https://bitbucket.org/opensymphony/quartz.git" + }, { + "href": "git@bitbucket.org:opensymphony/quartz.git" + }] + } + }, { + "scm": "git", + "updated_on": "2018-02-06T04:36:52.369420+00:00", + "created_on": "2011-06-16T09:16:27.957216+00:00", + "slug": "opup", + "links": { + "clone": [{ + "href": "https://bitbucket.org/jwalton/opup.git" + }, { + "href": "git@bitbucket.org:jwalton/opup.git" + }] + } + }, { + "scm": "git", + "updated_on": "2017-03-19T16:09:30.336053+00:00", + "created_on": "2011-07-08T08:59:53.298617+00:00", + "slug": "git-scripts", + "links": { + "clone": [{ + "href": "https://bitbucket.org/jwalton/git-scripts.git" + }, { + "href": "git@bitbucket.org:jwalton/git-scripts.git" + }] + } + }, { + "scm": "git", + "updated_on": "2015-10-15T17:35:06.978690+00:00", + "created_on": "2011-08-10T00:42:35.509559+00:00", + "slug": "git-tests", + "links": { + "clone": [{ + "href": "https://bitbucket.org/evzijst/git-tests.git" + }, { + "href": "git@bitbucket.org:evzijst/git-tests.git" + }] + } + }, { + "scm": "git", + "updated_on": "2013-07-17T23:08:05.997544+00:00", + "created_on": "2011-08-10T03:48:05.820933+00:00", + "slug": "libgit2", + "links": { + "clone": [{ + "href": "https://bitbucket.org/brodie/libgit2.git" + }, { + "href": "git@bitbucket.org:brodie/libgit2.git" + }] + } + }, { + "scm": "git", + "updated_on": "2013-10-10T23:43:15.183665+00:00", + "created_on": "2011-08-15T05:19:11.022316+00:00", + "slug": "git", + "links": { + "clone": [{ + "href": "https://bitbucket.org/evzijst/git.git" + }, { + "href": "git@bitbucket.org:evzijst/git.git" + }] + } + }, { + "scm": "git", + "updated_on": "2013-06-12T22:42:52.654728+00:00", + "created_on": "2011-08-18T00:17:00.862842+00:00", + "slug": "streams-jira-delete-issue-plugin", + "links": { + "clone": [{ + "href": "https://bitbucket.org/atlassian_tutorial/streams-jira-delete-issue-plugin.git" + }, { + "href": "git@bitbucket.org:atlassian_tutorial/streams-jira-delete-issue-plugin.git" + }] + } + }], + "next": "https://api.bitbucket.org/2.0/repositories?pagelen=10&after=2011-09-03T12%3A33%3A16.028393%2B00%3A00&fields=next%2Cvalues.links.clone.href%2Cvalues.slug%2Cvalues.scm%2Cvalues.updated_on%2Cvalues.created_on" +} \ No newline at end of file diff --git a/swh/lister/bitbucket/tests/data/bb_api_repositories_page2.json b/swh/lister/bitbucket/tests/data/bb_api_repositories_page2.json new file mode 100644 --- /dev/null +++ b/swh/lister/bitbucket/tests/data/bb_api_repositories_page2.json @@ -0,0 +1,123 @@ +{ + "values": [{ + "scm": "git", + "updated_on": "2012-08-08T21:49:39.837528+00:00", + "created_on": "2011-09-03T12:33:16.028393+00:00", + "slug": "jreversepro", + "links": { + "clone": [{ + "href": "https://bitbucket.org/puffnfresh/jreversepro.git" + }, { + "href": "git@bitbucket.org:puffnfresh/jreversepro.git" + }] + } + }, { + "scm": "git", + "updated_on": "2012-07-24T08:11:00.229299+00:00", + "created_on": "2011-09-08T01:43:21.182004+00:00", + "slug": "jira4-compat", + "links": { + "clone": [{ + "href": "https://bitbucket.org/mrdon/jira4-compat.git" + }, { + "href": "git@bitbucket.org:mrdon/jira4-compat.git" + }] + } + }, { + "scm": "git", + "updated_on": "2019-03-29T16:07:35.214957+00:00", + "created_on": "2011-09-12T20:21:47.109184+00:00", + "slug": "chrome-confluence-activity-stream", + "links": { + "clone": [{ + "href": "https://bitbucket.org/rmanalan/chrome-confluence-activity-stream.git" + }, { + "href": "git@bitbucket.org:rmanalan/chrome-confluence-activity-stream.git" + }] + } + }, { + "scm": "git", + "updated_on": "2014-03-31T14:30:43.850637+00:00", + "created_on": "2011-09-14T05:21:02.811713+00:00", + "slug": "anode", + "links": { + "clone": [{ + "href": "https://bitbucket.org/tarkasteve/anode.git" + }, { + "href": "git@bitbucket.org:tarkasteve/anode.git" + }] + } + }, { + "scm": "git", + "updated_on": "2011-11-02T07:45:17.681629+00:00", + "created_on": "2011-09-20T04:27:56.852255+00:00", + "slug": "pac-release-plugin", + "links": { + "clone": [{ + "href": "https://bitbucket.org/jschumacher/pac-release-plugin.git" + }, { + "href": "git@bitbucket.org:jschumacher/pac-release-plugin.git" + }] + } + }, { + "scm": "git", + "updated_on": "2012-07-27T00:54:30.098265+00:00", + "created_on": "2011-09-21T00:05:50.970472+00:00", + "slug": "metadata-confluence-plugin", + "links": { + "clone": [{ + "href": "https://bitbucket.org/jwalton/metadata-confluence-plugin.git" + }, { + "href": "git@bitbucket.org:jwalton/metadata-confluence-plugin.git" + }] + } + }, { + "scm": "git", + "updated_on": "2014-01-18T05:28:48.832287+00:00", + "created_on": "2011-09-21T22:05:29.955410+00:00", + "slug": "coffee-script", + "links": { + "clone": [{ + "href": "https://bitbucket.org/detkin/coffee-script.git" + }, { + "href": "git@bitbucket.org:detkin/coffee-script.git" + }] + } + }, { + "scm": "git", + "updated_on": "2012-07-17T23:32:25.879023+00:00", + "created_on": "2011-09-27T08:37:17.132670+00:00", + "slug": "taleo-link-fix", + "links": { + "clone": [{ + "href": "https://bitbucket.org/christo/taleo-link-fix.git" + }, { + "href": "git@bitbucket.org:christo/taleo-link-fix.git" + }] + } + }, { + "scm": "git", + "updated_on": "2012-06-26T22:55:05.634860+00:00", + "created_on": "2011-09-27T21:10:47.586400+00:00", + "slug": "bdoc", + "links": { + "clone": [{ + "href": "https://bitbucket.org/rmanalan/bdoc.git" + }, { + "href": "git@bitbucket.org:rmanalan/bdoc.git" + }] + } + }, { + "scm": "git", + "updated_on": "2020-04-20T18:16:50.540634+00:00", + "created_on": "2011-09-29T23:36:49.719055+00:00", + "slug": "git", + "links": { + "clone": [{ + "href": "https://bitbucket.org/mirror/git.git" + }, { + "href": "git@bitbucket.org:mirror/git.git" + }] + } + }] +} \ No newline at end of file diff --git a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/2.0_repositories,after=1970-01-01T00:00:00+00:00,pagelen=100 b/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/2.0_repositories,after=1970-01-01T00:00:00+00:00,pagelen=100 deleted file mode 100644 --- a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/2.0_repositories,after=1970-01-01T00:00:00+00:00,pagelen=100 +++ /dev/null @@ -1,806 +0,0 @@ -{ - "pagelen": 10, - "values": [ - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "app-template", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/bebac/app-template", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/bebac/app-template", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template" - }, - "html": { - "href": "https://bitbucket.org/bebac/app-template" - }, - "avatar": { - "href": "https://bitbucket.org/bebac/app-template/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/bebac/app-template/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{0cf80a6e-e91f-4a4c-a61b-8c8ff51ca3ec}", - "language": "c++", - "created_on": "2008-07-12T07:44:01.476818+00:00", - "full_name": "bebac/app-template", - "has_issues": true, - "owner": { - "username": "bebac", - "display_name": "Benny Bach", - "type": "user", - "uuid": "{d1a83a2a-be1b-4034-8c1d-386a6690cddb}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/bebac" - }, - "html": { - "href": "https://bitbucket.org/bebac/" - }, - "avatar": { - "href": "https://bitbucket.org/account/bebac/avatar/32/" - } - } - }, - "updated_on": "2011-10-05T15:36:19.409008+00:00", - "size": 71548, - "type": "repository", - "slug": "app-template", - "is_private": false, - "description": "Basic files and directory structure for a C++ project. Intended as a starting point for a new project. Includes a basic cross platform core library." - }, - { - "scm": "git", - "website": "", - "has_wiki": true, - "name": "mercurialeclipse", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/bastiand/mercurialeclipse", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/bastiand/mercurialeclipse", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse" - }, - "html": { - "href": "https://bitbucket.org/bastiand/mercurialeclipse" - }, - "avatar": { - "href": "https://bitbucket.org/bastiand/mercurialeclipse/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/bastiand/mercurialeclipse/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{f7a08670-bdd1-4465-aa97-7a5ce8d1a25b}", - "language": "", - "created_on": "2008-07-12T09:37:06.254721+00:00", - "full_name": "bastiand/mercurialeclipse", - "has_issues": false, - "owner": { - "username": "bastiand", - "display_name": "Bastian Doetsch", - "type": "user", - "uuid": "{3742cd48-adad-4205-ab0d-04fc992a1728}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/bastiand" - }, - "html": { - "href": "https://bitbucket.org/bastiand/" - }, - "avatar": { - "href": "https://bitbucket.org/account/bastiand/avatar/32/" - } - } - }, - "updated_on": "2011-09-17T02:36:59.062596+00:00", - "size": 6445145, - "type": "repository", - "slug": "mercurialeclipse", - "is_private": false, - "description": "my own repo for MercurialEclipse." - }, - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "sandboxpublic", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/aleax/sandboxpublic", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/aleax/sandboxpublic", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic" - }, - "html": { - "href": "https://bitbucket.org/aleax/sandboxpublic" - }, - "avatar": { - "href": "https://bitbucket.org/aleax/sandboxpublic/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/aleax/sandboxpublic/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{452c716c-a1ce-42bc-a95b-d38da49cbb37}", - "language": "", - "created_on": "2008-07-14T01:59:23.568048+00:00", - "full_name": "aleax/sandboxpublic", - "has_issues": true, - "owner": { - "username": "aleax", - "display_name": "Alex Martelli", - "type": "user", - "uuid": "{1155d94d-fb48-43fe-a431-ec07c900b636}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/aleax" - }, - "html": { - "href": "https://bitbucket.org/aleax/" - }, - "avatar": { - "href": "https://bitbucket.org/account/aleax/avatar/32/" - } - } - }, - "updated_on": "2012-06-22T21:55:28.753727+00:00", - "size": 3120, - "type": "repository", - "slug": "sandboxpublic", - "is_private": false, - "description": "to help debug ACLs for private vs public bitbucket repos" - }, - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "otrsfix-ng", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/adiakin/otrsfix-ng", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/adiakin/otrsfix-ng", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng" - }, - "html": { - "href": "https://bitbucket.org/adiakin/otrsfix-ng" - }, - "avatar": { - "href": "https://bitbucket.org/adiakin/otrsfix-ng/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/adiakin/otrsfix-ng/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{05b1b9dc-a7b6-46d6-ae1b-e66a17aa4f55}", - "language": "", - "created_on": "2008-07-15T06:14:39.306314+00:00", - "full_name": "adiakin/otrsfix-ng", - "has_issues": true, - "owner": { - "username": "adiakin", - "display_name": "adiakin", - "type": "user", - "uuid": "{414012b5-1ac9-4096-9f46-8893cfa3cda5}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/adiakin" - }, - "html": { - "href": "https://bitbucket.org/adiakin/" - }, - "avatar": { - "href": "https://bitbucket.org/account/adiakin/avatar/32/" - } - } - }, - "updated_on": "2016-06-02T18:56:34.868302+00:00", - "size": 211631, - "type": "repository", - "slug": "otrsfix-ng", - "is_private": false, - "description": "OTRS greasemonkey extension" - }, - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "pida-pypaned", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/aafshar/pida-pypaned", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/aafshar/pida-pypaned", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned" - }, - "html": { - "href": "https://bitbucket.org/aafshar/pida-pypaned" - }, - "avatar": { - "href": "https://bitbucket.org/aafshar/pida-pypaned/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/aafshar/pida-pypaned/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{94cb830a-1784-4e51-9791-8f5cc93990a9}", - "language": "", - "created_on": "2008-07-16T22:47:38.682491+00:00", - "full_name": "aafshar/pida-pypaned", - "has_issues": true, - "owner": { - "username": "aafshar", - "display_name": "Ali Afshar", - "type": "user", - "uuid": "{bcb87110-6a92-41fc-b95c-680feeea5512}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/aafshar" - }, - "html": { - "href": "https://bitbucket.org/aafshar/" - }, - "avatar": { - "href": "https://bitbucket.org/account/aafshar/avatar/32/" - } - } - }, - "updated_on": "2012-06-22T21:55:42.451431+00:00", - "size": 4680652, - "type": "repository", - "slug": "pida-pypaned", - "is_private": false, - "description": "" - }, - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "TLOMM-testing", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/tgrimley/tlomm-testing", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/tgrimley/tlomm-testing", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing" - }, - "html": { - "href": "https://bitbucket.org/tgrimley/tlomm-testing" - }, - "avatar": { - "href": "https://bitbucket.org/tgrimley/tlomm-testing/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/tgrimley/tlomm-testing/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{95283ca1-f77e-40d6-b3ed-5bfae6ed2d15}", - "language": "", - "created_on": "2008-07-18T21:05:17.750587+00:00", - "full_name": "tgrimley/tlomm-testing", - "has_issues": true, - "owner": { - "username": "tgrimley", - "display_name": "Thomas Grimley", - "type": "user", - "uuid": "{c958a08f-4669-4c77-81ec-4e2faa8ebf35}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/tgrimley" - }, - "html": { - "href": "https://bitbucket.org/tgrimley/" - }, - "avatar": { - "href": "https://bitbucket.org/account/tgrimley/avatar/32/" - } - } - }, - "updated_on": "2012-06-22T21:55:46.627825+00:00", - "size": 3128, - "type": "repository", - "slug": "tlomm-testing", - "is_private": false, - "description": "File related to testing functionality of TLOMM->TLOTTS transition" - }, - { - "scm": "hg", - "website": "", - "has_wiki": true, - "name": "test", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/tingle/test", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/tingle/test", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test" - }, - "html": { - "href": "https://bitbucket.org/tingle/test" - }, - "avatar": { - "href": "https://bitbucket.org/tingle/test/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/tingle/test/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{457953ec-fe87-41b9-b659-94756fb40ece}", - "language": "", - "created_on": "2008-07-18T22:24:31.984981+00:00", - "full_name": "tingle/test", - "has_issues": true, - "owner": { - "username": "tingle", - "display_name": "tingle", - "type": "user", - "uuid": "{dddce42b-bd19-417b-90ff-72cdbfb6eb7d}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/tingle" - }, - "html": { - "href": "https://bitbucket.org/tingle/" - }, - "avatar": { - "href": "https://bitbucket.org/account/tingle/avatar/32/" - } - } - }, - "updated_on": "2012-06-22T21:55:49.860564+00:00", - "size": 3090, - "type": "repository", - "slug": "test", - "is_private": false, - "description": "" - }, - { - "scm": "hg", - "website": "http://shaze.myopenid.com/", - "has_wiki": true, - "name": "Repository", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/Shaze/repository", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/Shaze/repository", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository" - }, - "html": { - "href": "https://bitbucket.org/Shaze/repository" - }, - "avatar": { - "href": "https://bitbucket.org/Shaze/repository/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/Shaze/repository/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{3c0b8076-caef-465a-8d08-a184459f659b}", - "language": "", - "created_on": "2008-07-18T22:39:51.380134+00:00", - "full_name": "Shaze/repository", - "has_issues": true, - "owner": { - "username": "Shaze", - "display_name": "Shaze", - "type": "user", - "uuid": "{f57817e9-bfe4-4c65-84dd-662152430323}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/Shaze" - }, - "html": { - "href": "https://bitbucket.org/Shaze/" - }, - "avatar": { - "href": "https://bitbucket.org/account/Shaze/avatar/32/" - } - } - }, - "updated_on": "2012-06-22T21:55:51.570502+00:00", - "size": 3052, - "type": "repository", - "slug": "repository", - "is_private": false, - "description": "Mine, all mine!" - }, - { - "scm": "hg", - "website": "http://bitbucket.org/copiesofcopies/identifox/", - "has_wiki": true, - "name": "identifox", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/uncryptic/identifox", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/uncryptic/identifox", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox" - }, - "html": { - "href": "https://bitbucket.org/uncryptic/identifox" - }, - "avatar": { - "href": "https://bitbucket.org/uncryptic/identifox/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/uncryptic/identifox/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{78a1a080-a77e-4d0d-823a-b107484477a8}", - "language": "", - "created_on": "2008-07-19T00:33:14.065946+00:00", - "full_name": "uncryptic/identifox", - "has_issues": true, - "owner": { - "username": "uncryptic", - "display_name": "Uncryptic Communications", - "type": "user", - "uuid": "{db87bb9a-9980-4840-bd4a-61f7748a56b4}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/uncryptic" - }, - "html": { - "href": "https://bitbucket.org/uncryptic/" - }, - "avatar": { - "href": "https://bitbucket.org/account/uncryptic/avatar/32/" - } - } - }, - "updated_on": "2008-07-19T00:33:14+00:00", - "size": 1918, - "type": "repository", - "slug": "identifox", - "is_private": false, - "description": "TwitterFox, modified to work with Identi.ca, including cosmetic and subtle code changes. For the most part, the code is nearly identical to the TwitterFox base: http://www.naan.net/trac/wiki/TwitterFox" - }, - { - "scm": "hg", - "website": "http://rforce.rubyforge.org", - "has_wiki": false, - "name": "rforce", - "links": { - "watchers": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/watchers" - }, - "branches": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/refs/branches" - }, - "tags": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/refs/tags" - }, - "commits": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/commits" - }, - "clone": [ - { - "href": "https://bitbucket.org/undees/rforce", - "name": "https" - }, - { - "href": "ssh://hg@bitbucket.org/undees/rforce", - "name": "ssh" - } - ], - "self": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce" - }, - "html": { - "href": "https://bitbucket.org/undees/rforce" - }, - "avatar": { - "href": "https://bitbucket.org/undees/rforce/avatar/32/" - }, - "hooks": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/hooks" - }, - "forks": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/forks" - }, - "downloads": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/downloads" - }, - "pullrequests": { - "href": "https://api.bitbucket.org/2.0/repositories/undees/rforce/pullrequests" - } - }, - "fork_policy": "allow_forks", - "uuid": "{ec2ffee7-bfcd-4e95-83c8-22ac31e69fa3}", - "language": "", - "created_on": "2008-07-19T06:16:43.044743+00:00", - "full_name": "undees/rforce", - "has_issues": false, - "owner": { - "username": "undees", - "display_name": "Ian Dees", - "type": "user", - "uuid": "{6ff66a34-6412-4f28-bf57-707a2a5c6d7b}", - "links": { - "self": { - "href": "https://api.bitbucket.org/2.0/users/undees" - }, - "html": { - "href": "https://bitbucket.org/undees/" - }, - "avatar": { - "href": "https://bitbucket.org/account/undees/avatar/32/" - } - } - }, - "updated_on": "2015-02-09T00:48:15.408680+00:00", - "size": 338402, - "type": "repository", - "slug": "rforce", - "is_private": false, - "description": "A simple, usable binding to the SalesForce API." - } - ], - "next": "https://api.bitbucket.org/2.0/repositories?after=2008-07-19T19%3A53%3A07.031845%2B00%3A00" -} diff --git a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/empty_response.json b/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/empty_response.json deleted file mode 100644 --- a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/empty_response.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "pagelen": 10, - "values": [] -} diff --git a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/response.json b/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/response.json deleted file mode 120000 --- a/swh/lister/bitbucket/tests/data/https_api.bitbucket.org/response.json +++ /dev/null @@ -1 +0,0 @@ -2.0_repositories,after=1970-01-01T00:00:00+00:00,pagelen=100 \ No newline at end of file diff --git a/swh/lister/bitbucket/tests/test_lister.py b/swh/lister/bitbucket/tests/test_lister.py --- a/swh/lister/bitbucket/tests/test_lister.py +++ b/swh/lister/bitbucket/tests/test_lister.py @@ -1,117 +1,181 @@ -# Copyright (C) 2017-2020 The Software Heritage developers +# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from datetime import timedelta -import re -import unittest -from urllib.parse import unquote - -import iso8601 -import requests_mock - -from swh.lister.bitbucket.lister import BitBucketLister -from swh.lister.core.tests.test_lister import HttpListerTester - - -def _convert_type(req_index): - """Convert the req_index to its right type according to the model's - "indexable" column. - - """ - return iso8601.parse_date(unquote(req_index)) - - -class BitBucketListerTester(HttpListerTester, unittest.TestCase): - Lister = BitBucketLister - test_re = re.compile(r"/repositories\?after=([^?&]+)") - lister_subdir = "bitbucket" - good_api_response_file = "data/https_api.bitbucket.org/response.json" - bad_api_response_file = "data/https_api.bitbucket.org/empty_response.json" - first_index = _convert_type("2008-07-12T07:44:01.476818+00:00") - last_index = _convert_type("2008-07-19T06:16:43.044743+00:00") - entries_per_page = 10 - convert_type = _convert_type - - def request_index(self, request): - """(Override) This is needed to emulate the listing bootstrap - when no min_bound is provided to run - """ - m = self.test_re.search(request.path_url) - idx = _convert_type(m.group(1)) - if idx == self.Lister.default_min_bound: - idx = self.first_index - return idx - - @requests_mock.Mocker() - def test_fetch_none_nodb(self, http_mocker): - """Overridden because index is not an integer nor a string - - """ - http_mocker.get(self.test_re, text=self.mock_response) - fl = self.get_fl() - - self.disable_scheduler(fl) - self.disable_db(fl) - - # stores no results - fl.run( - min_bound=self.first_index - timedelta(days=3), max_bound=self.first_index - ) - - def test_is_within_bounds(self): - fl = self.get_fl() - self.assertTrue( - fl.is_within_bounds( - iso8601.parse_date("2008-07-15"), self.first_index, self.last_index - ) - ) - self.assertFalse( - fl.is_within_bounds( - iso8601.parse_date("2008-07-20"), self.first_index, self.last_index - ) - ) - self.assertFalse( - fl.is_within_bounds( - iso8601.parse_date("2008-07-11"), self.first_index, self.last_index - ) - ) - - -def test_lister_bitbucket(lister_bitbucket, requests_mock_datadir): - """Simple bitbucket listing should create scheduled tasks (git, hg) - - """ - lister_bitbucket.run() - - r = lister_bitbucket.scheduler.search_tasks(task_type="load-hg") - assert len(r) == 9 - - for row in r: - args = row["arguments"]["args"] - kwargs = row["arguments"]["kwargs"] - - assert len(args) == 0 - assert len(kwargs) == 1 - url = kwargs["url"] - - assert url.startswith("https://bitbucket.org") - - assert row["policy"] == "recurring" - assert row["priority"] is None - - r = lister_bitbucket.scheduler.search_tasks(task_type="load-git") - assert len(r) == 1 - - for row in r: - args = row["arguments"]["args"] - kwargs = row["arguments"]["kwargs"] - assert len(args) == 0 - assert len(kwargs) == 1 - url = kwargs["url"] - - assert url.startswith("https://bitbucket.org") - - assert row["policy"] == "recurring" - assert row["priority"] is None +from datetime import datetime +import json +import os + +import pytest + +from swh.lister.bitbucket.lister import BitbucketLister + + +@pytest.fixture +def bb_api_repositories_page1(datadir): + data_file_path = os.path.join(datadir, "bb_api_repositories_page1.json") + with open(data_file_path, "r") as data_file: + return json.load(data_file) + + +@pytest.fixture +def bb_api_repositories_page2(datadir): + data_file_path = os.path.join(datadir, "bb_api_repositories_page2.json") + with open(data_file_path, "r") as data_file: + return json.load(data_file) + + +def _check_listed_origins(lister_origins, scheduler_origins): + """Asserts that the two collections have the same origins from the point of view of + the lister""" + + sorted_lister_origins = list(sorted(lister_origins)) + sorted_scheduler_origins = list(sorted(scheduler_origins)) + + assert len(sorted_lister_origins) == len(sorted_scheduler_origins) + + for lo, so in zip(sorted_lister_origins, sorted_scheduler_origins): + assert lo.url == so.url + assert lo.last_update == so.last_update + + +def test_bitbucket_incremental_lister( + swh_scheduler, + requests_mock, + mocker, + bb_api_repositories_page1, + bb_api_repositories_page2, +): + """Simple Bitbucket listing with two pages containing 10 origins""" + + requests_mock.get( + BitbucketLister.API_URL, + [{"json": bb_api_repositories_page1}, {"json": bb_api_repositories_page2},], + ) + + lister = BitbucketLister(scheduler=swh_scheduler, page_size=10) + + # First listing + stats = lister.run() + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins + + assert stats.pages == 2 + assert stats.origins == 20 + assert len(scheduler_origins) == 20 + + assert lister.updated + lister_state = lister.get_state_from_scheduler() + last_repo_cdate = lister_state.last_repo_cdate.isoformat() + assert hasattr(lister_state, "last_repo_cdate") + assert last_repo_cdate == bb_api_repositories_page2["values"][-1]["created_on"] + + # Second listing, restarting from last state + lister.session.get = mocker.spy(lister.session, "get") + + lister.run() + + url_params = lister.url_params + url_params["after"] = last_repo_cdate + + lister.session.get.assert_called_once_with(lister.API_URL, params=url_params) + + all_origins = ( + bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"] + ) + + _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins) + + +def test_bitbucket_lister_rate_limit_hit( + swh_scheduler, + requests_mock, + mocker, + bb_api_repositories_page1, + bb_api_repositories_page2, +): + """Simple Bitbucket listing with two pages containing 10 origins""" + + requests_mock.get( + BitbucketLister.API_URL, + [ + {"json": bb_api_repositories_page1, "status_code": 200}, + {"json": None, "status_code": 429}, + {"json": None, "status_code": 429}, + {"json": bb_api_repositories_page2, "status_code": 200}, + ], + ) + + lister = BitbucketLister(scheduler=swh_scheduler, page_size=10) + + mocker.patch.object(lister.page_request.retry, "sleep") + + stats = lister.run() + + assert stats.pages == 2 + assert stats.origins == 20 + assert len(swh_scheduler.get_listed_origins(lister.lister_obj.id).origins) == 20 + + +def test_bitbucket_full_lister( + swh_scheduler, + requests_mock, + mocker, + bb_api_repositories_page1, + bb_api_repositories_page2, +): + """Simple Bitbucket listing with two pages containing 10 origins""" + + requests_mock.get( + BitbucketLister.API_URL, + [ + {"json": bb_api_repositories_page1}, + {"json": bb_api_repositories_page2}, + {"json": bb_api_repositories_page1}, + {"json": bb_api_repositories_page2}, + ], + ) + + credentials = {"bitbucket": {"bitbucket": [{"username": "u", "password": "p"}]}} + lister = BitbucketLister( + scheduler=swh_scheduler, page_size=10, incremental=True, credentials=credentials + ) + assert lister.session.auth is not None + + # First do a incremental run to have an initial lister state + stats = lister.run() + + last_lister_state = lister.get_state_from_scheduler() + + assert stats.origins == 20 + + # Then do the full run and verify lister state did not change + # Modify last listed repo modification date to check it will be not saved + # to lister state after its execution + last_page2_repo = bb_api_repositories_page2["values"][-1] + last_page2_repo["created_on"] = datetime.now().isoformat() + last_page2_repo["updated_on"] = datetime.now().isoformat() + + lister = BitbucketLister(scheduler=swh_scheduler, page_size=10, incremental=False) + assert lister.session.auth is None + + stats = lister.run() + + assert stats.pages == 2 + assert stats.origins == 20 + + scheduler_origins = swh_scheduler.get_listed_origins(lister.lister_obj.id).origins + + # 20 because scheduler upserts based on (id, type, url) + assert len(scheduler_origins) == 20 + + # Modification on created_on SHOULD NOT impact lister state + assert lister.get_state_from_scheduler() == last_lister_state + + # Modification on updated_on SHOULD impact lister state + all_origins = ( + bb_api_repositories_page1["values"] + bb_api_repositories_page2["values"] + ) + + _check_listed_origins(lister.get_origins_from_page(all_origins), scheduler_origins) diff --git a/swh/lister/bitbucket/tests/test_tasks.py b/swh/lister/bitbucket/tests/test_tasks.py --- a/swh/lister/bitbucket/tests/test_tasks.py +++ b/swh/lister/bitbucket/tests/test_tasks.py @@ -1,12 +1,11 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from time import sleep from unittest.mock import patch -from celery.result import GroupResult +from swh.lister.pattern import ListerStats def test_ping(swh_scheduler_celery_app, swh_scheduler_celery_worker): @@ -17,79 +16,35 @@ assert res.result == "OK" -@patch("swh.lister.bitbucket.tasks.BitBucketLister") -def test_incremental(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): - # setup the mocked BitbucketLister - lister.return_value = lister - lister.db_last_index.return_value = 42 - lister.run.return_value = None +@patch("swh.lister.bitbucket.tasks.BitbucketLister") +def test_incremental_listing( + lister, swh_scheduler_celery_app, swh_scheduler_celery_worker +): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=5, origins=5000) res = swh_scheduler_celery_app.send_task( - "swh.lister.bitbucket.tasks.IncrementalBitBucketLister" + "swh.lister.bitbucket.tasks.IncrementalBitBucketLister", + kwargs=dict(page_size=100, username="username", password="password",), ) assert res res.wait() assert res.successful() - lister.assert_called_once_with() - lister.db_last_index.assert_called_once_with() - lister.run.assert_called_once_with(min_bound=42, max_bound=None) + lister.run.assert_called_once() -@patch("swh.lister.bitbucket.tasks.BitBucketLister") -def test_range(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): - # setup the mocked BitbucketLister - lister.return_value = lister - lister.run.return_value = None +@patch("swh.lister.bitbucket.tasks.BitbucketLister") +def test_full_listing(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): + lister.from_configfile.return_value = lister + lister.run.return_value = ListerStats(pages=5, origins=5000) res = swh_scheduler_celery_app.send_task( - "swh.lister.bitbucket.tasks.RangeBitBucketLister", kwargs=dict(start=12, end=42) + "swh.lister.bitbucket.tasks.FullBitBucketRelister", + kwargs=dict(page_size=100, username="username", password="password",), ) assert res res.wait() assert res.successful() - lister.assert_called_once_with() - lister.db_last_index.assert_not_called() - lister.run.assert_called_once_with(min_bound=12, max_bound=42) - - -@patch("swh.lister.bitbucket.tasks.BitBucketLister") -def test_relister(lister, swh_scheduler_celery_app, swh_scheduler_celery_worker): - # setup the mocked BitbucketLister - lister.return_value = lister - lister.run.return_value = None - lister.db_partition_indices.return_value = [(i, i + 9) for i in range(0, 50, 10)] - - res = swh_scheduler_celery_app.send_task( - "swh.lister.bitbucket.tasks.FullBitBucketRelister" - ) - assert res - - res.wait() - assert res.successful() - - # retrieve the GroupResult for this task and wait for all the subtasks - # to complete - promise_id = res.result - assert promise_id - promise = GroupResult.restore(promise_id, app=swh_scheduler_celery_app) - for i in range(5): - if promise.ready(): - break - sleep(1) - - lister.assert_called_with() - - # one by the FullBitbucketRelister task - # + 5 for the RangeBitbucketLister subtasks - assert lister.call_count == 6 - - lister.db_last_index.assert_not_called() - lister.db_partition_indices.assert_called_once_with(10000) - - # lister.run should have been called once per partition interval - for i in range(5): - assert ( - dict(min_bound=10 * i, max_bound=10 * i + 9), - ) in lister.run.call_args_list + lister.run.assert_called_once()