Changeset View
Changeset View
Standalone View
Standalone View
swh/scanner/scanner.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import asyncio | import asyncio | ||||
from typing import Any, Dict, Iterable | from typing import Any, Dict, Iterable | ||||
import aiohttp | import aiohttp | ||||
from swh.model.cli import model_of_dir | from swh.model.cli import model_of_dir | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from .data import MerkleNodeInfo | from .data import MerkleNodeInfo | ||||
from .output import Output | from .output import Output | ||||
from .policy import QUERY_LIMIT, DirectoryPriority, FilePriority, LazyBFS, QueryAll | from .policy import ( | ||||
QUERY_LIMIT, | |||||
DirectoryPriority, | |||||
FilePriority, | |||||
GreedyBFS, | |||||
LazyBFS, | |||||
QueryAll, | |||||
source_size, | |||||
) | |||||
async def run(config: Dict[str, Any], policy) -> None: | async def run(config: Dict[str, Any], policy) -> None: | ||||
"""Scan a given source code according to the policy given in input. | """Scan a given source code according to the policy given in input. | ||||
Args: | Args: | ||||
root: the root path to scan | root: the root path to scan | ||||
api_url: url for the API request | api_url: url for the API request | ||||
""" | """ | ||||
api_url = config["web-api"]["url"] | api_url = config["web-api"]["url"] | ||||
if config["web-api"]["auth-token"]: | if config["web-api"]["auth-token"]: | ||||
headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | ||||
else: | else: | ||||
headers = {} | headers = {} | ||||
async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | ||||
await policy.run(session, api_url) | await policy.run(session, api_url) | ||||
def source_size(source_tree: Directory): | |||||
return len([n for n in source_tree.iter_tree(dedup=False)]) | |||||
def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | ||||
if policy == "auto": | if policy == "auto": | ||||
return ( | return ( | ||||
QueryAll(source_tree, nodes_data) | QueryAll(source_tree, nodes_data) | ||||
if source_size(source_tree) <= QUERY_LIMIT | if source_size(source_tree) <= QUERY_LIMIT | ||||
else LazyBFS(source_tree, nodes_data) | else LazyBFS(source_tree, nodes_data) | ||||
) | ) | ||||
elif policy == "bfs": | elif policy == "bfs": | ||||
return LazyBFS(source_tree, nodes_data) | return LazyBFS(source_tree, nodes_data) | ||||
elif policy == "greedybfs": | |||||
return GreedyBFS(source_tree, nodes_data) | |||||
elif policy == "filepriority": | elif policy == "filepriority": | ||||
return FilePriority(source_tree, nodes_data) | return FilePriority(source_tree, nodes_data) | ||||
elif policy == "dirpriority": | elif policy == "dirpriority": | ||||
return DirectoryPriority(source_tree, nodes_data) | return DirectoryPriority(source_tree, nodes_data) | ||||
else: | else: | ||||
raise Exception(f"policy '{policy}' not found") | raise Exception(f"policy '{policy}' not found") | ||||
Show All 23 Lines |