diff --git a/swh/scanner/benchmark_algos.py b/swh/scanner/benchmark_algos.py --- a/swh/scanner/benchmark_algos.py +++ b/swh/scanner/benchmark_algos.py @@ -12,6 +12,8 @@ from typing import Dict, Iterable, List, Optional import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry from swh.model.from_disk import Content, Directory, accept_all_directories from swh.model.identifiers import CONTENT, DIRECTORY, swhid @@ -20,6 +22,10 @@ from .model import Status, Tree from .scanner import directory_filter, extract_regex_objs +session = requests.Session() +retries_rule = Retry(total=5, backoff_factor=1) +session.mount("http://", HTTPAdapter(max_retries=retries_rule)) + def query_swhids( swhids: List[Tree], api_url: str, counter: Optional[collections.Counter] = None @@ -40,7 +46,7 @@ def make_request(swhids): swhids = [swhid.swhid for swhid in swhids] - req = requests.post(endpoint, json=swhids) + req = session.post(endpoint, json=swhids) if req.status_code != 200: error_message = "%s with given values %s" % (req.text, str(swhids)) raise APIError(error_message) @@ -254,6 +260,7 @@ for node in all_nodes: if node.otype == CONTENT and not node.known: + all_nodes_copy.remove(node) remove_parents(node, all_nodes_copy) for node in all_nodes_copy: