Changeset View
Changeset View
Standalone View
Standalone View
swh/scanner/scanner.py
# Copyright (C) 2020-2021 The Software Heritage developers | # Copyright (C) 2020-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import asyncio | import asyncio | ||||
from typing import Any, Dict, Iterable | from typing import Any, Dict, Iterable | ||||
import aiohttp | import aiohttp | ||||
from swh.model.cli import model_of_dir | from swh.model.cli import model_of_dir | ||||
from swh.model.from_disk import Directory | from swh.model.from_disk import Directory | ||||
from .data import MerkleNodeInfo | from .client import Client | ||||
from .data import MerkleNodeInfo, add_origin, init_merkle_node_info | |||||
from .output import Output | from .output import Output | ||||
from .policy import ( | from .policy import ( | ||||
QUERY_LIMIT, | QUERY_LIMIT, | ||||
DirectoryPriority, | DirectoryPriority, | ||||
FilePriority, | FilePriority, | ||||
GreedyBFS, | GreedyBFS, | ||||
LazyBFS, | LazyBFS, | ||||
QueryAll, | QueryAll, | ||||
source_size, | source_size, | ||||
) | ) | ||||
async def run(config: Dict[str, Any], policy) -> None: | async def run( | ||||
config: Dict[str, Any], | |||||
policy, | |||||
source_tree: Directory, | |||||
nodes_data: MerkleNodeInfo, | |||||
extra_info: set, | |||||
) -> None: | |||||
"""Scan a given source code according to the policy given in input. | """Scan a given source code according to the policy given in input. | ||||
Args: | |||||
root: the root path to scan | |||||
api_url: url for the API request | |||||
""" | """ | ||||
api_url = config["web-api"]["url"] | api_url = config["web-api"]["url"] | ||||
if config["web-api"]["auth-token"]: | if config["web-api"]["auth-token"]: | ||||
headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} | ||||
else: | else: | ||||
headers = {} | headers = {} | ||||
async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | async with aiohttp.ClientSession(headers=headers, trust_env=True) as session: | ||||
await policy.run(session, api_url) | client = Client(api_url, session) | ||||
for info in extra_info: | |||||
if info == "known": | |||||
await policy.run(client) | |||||
elif info == "origin": | |||||
await add_origin(source_tree, nodes_data, client) | |||||
else: | |||||
raise Exception(f"The information '{info}' cannot be retrieved") | |||||
def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | ||||
if policy == "auto": | if policy == "auto": | ||||
return ( | return ( | ||||
QueryAll(source_tree, nodes_data) | QueryAll(source_tree, nodes_data) | ||||
if source_size(source_tree) <= QUERY_LIMIT | if source_size(source_tree) <= QUERY_LIMIT | ||||
else LazyBFS(source_tree, nodes_data) | else LazyBFS(source_tree, nodes_data) | ||||
Show All 12 Lines | |||||
def scan( | def scan( | ||||
config: Dict[str, Any], | config: Dict[str, Any], | ||||
root_path: str, | root_path: str, | ||||
exclude_patterns: Iterable[str], | exclude_patterns: Iterable[str], | ||||
out_fmt: str, | out_fmt: str, | ||||
interactive: bool, | interactive: bool, | ||||
policy: str, | policy: str, | ||||
extra_info: set, | |||||
): | ): | ||||
"""Scan a source code project to discover files and directories already | """Scan a source code project to discover files and directories already | ||||
present in the archive""" | present in the archive""" | ||||
converted_patterns = [pattern.encode() for pattern in exclude_patterns] | converted_patterns = [pattern.encode() for pattern in exclude_patterns] | ||||
source_tree = model_of_dir(root_path.encode(), converted_patterns) | source_tree = model_of_dir(root_path.encode(), converted_patterns) | ||||
nodes_data = MerkleNodeInfo() | nodes_data = MerkleNodeInfo() | ||||
extra_info.add("known") | |||||
init_merkle_node_info(source_tree, nodes_data, extra_info) | |||||
policy = get_policy_obj(source_tree, nodes_data, policy) | policy = get_policy_obj(source_tree, nodes_data, policy) | ||||
loop = asyncio.get_event_loop() | loop = asyncio.get_event_loop() | ||||
loop.run_until_complete(run(config, policy)) | loop.run_until_complete(run(config, policy, source_tree, nodes_data, extra_info)) | ||||
out = Output(root_path, nodes_data, source_tree) | out = Output(root_path, nodes_data, source_tree) | ||||
if interactive: | if interactive: | ||||
out.show("interactive") | out.show("interactive") | ||||
else: | else: | ||||
out.show(out_fmt) | out.show(out_fmt) |