Changeset View
Changeset View
Standalone View
Standalone View
swh/scanner/scanner.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import asyncio | import asyncio | ||||
from typing import Any, Dict, Iterable | from typing import Any, Dict, Iterable | ||||
import aiohttp | import aiohttp | ||||
from swh.model.cli import model_of_dir | from swh.model.cli import model_of_dir | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from .data import MerkleNodeInfo | from .data import MerkleNodeInfo | ||||
from .output import Output | from .output import Output | ||||
from .policy import DirectoryPriority, FilePriority, LazyBFS | from .policy import QUERY_LIMIT, DirectoryPriority, FilePriority, LazyBFS, QueryAll | ||||
async def run(config: Dict[str, Any], policy) -> None: | async def run(config: Dict[str, Any], policy) -> None: | ||||
"""Scan a given source code according to the policy given in input. | """Scan a given source code according to the policy given in input. | ||||
Args: | Args: | ||||
root: the root path to scan | root: the root path to scan | ||||
api_url: url for the API request | api_url: url for the API request | ||||
""" | """ | ||||
api_url = config["web-api"]["url"] | api_url = config["web-api"]["url"] | ||||
if config["web-api"]["auth-token"]: | if config["web-api"]["auth-token"]: | ||||
headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | ||||
else: | else: | ||||
headers = {} | headers = {} | ||||
async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | ||||
await policy.run(session, api_url) | await policy.run(session, api_url) | ||||
def source_size(source_tree: Directory): | |||||
return len([n for n in source_tree.iter_tree(dedup=False)]) | |||||
def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | ||||
if policy == "bfs": | if policy == "auto": | ||||
return ( | |||||
QueryAll(source_tree, nodes_data) | |||||
if source_size(source_tree) <= QUERY_LIMIT | |||||
else LazyBFS(source_tree, nodes_data) | |||||
) | |||||
elif policy == "bfs": | |||||
return LazyBFS(source_tree, nodes_data) | return LazyBFS(source_tree, nodes_data) | ||||
elif policy == "filepriority": | elif policy == "filepriority": | ||||
return FilePriority(source_tree, nodes_data) | return FilePriority(source_tree, nodes_data) | ||||
elif policy == "dirpriority": | elif policy == "dirpriority": | ||||
return DirectoryPriority(source_tree, nodes_data) | return DirectoryPriority(source_tree, nodes_data) | ||||
else: | else: | ||||
raise Exception(f"policy '{policy}' not found") | raise Exception(f"policy '{policy}' not found") | ||||
Show All 24 Lines |