Changeset View
Changeset View
Standalone View
Standalone View
swh/scanner/scanner.py
Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | def get_policy_obj(source_tree: Directory, nodes_data: MerkleNodeInfo, policy: str): | ||||
elif policy == "filepriority": | elif policy == "filepriority": | ||||
return FilePriority(source_tree, nodes_data) | return FilePriority(source_tree, nodes_data) | ||||
elif policy == "dirpriority": | elif policy == "dirpriority": | ||||
return DirectoryPriority(source_tree, nodes_data) | return DirectoryPriority(source_tree, nodes_data) | ||||
else: | else: | ||||
raise Exception(f"policy '{policy}' not found") | raise Exception(f"policy '{policy}' not found") | ||||
# here is a set of directory we should disregard | |||||
# | |||||
# TODO: make its usage configurable | |||||
# TODO: make it extensible through configuration | |||||
COMMON_EXCLUDE_PATTERNS = [ | |||||
b".bzr", | |||||
b".coverage", | |||||
b"*.egg-info", | |||||
b".eggs", | |||||
b".git", | |||||
b".hg", | |||||
b".mypy_cache", | |||||
ardumont: Maybe add a todo on it to make this configurable from the usual swh config file. | |||||
b"__pycache__", | |||||
b".svn", | |||||
b".tox", | |||||
] | |||||
COMMON_EXCLUDE_PATTERNS.extend([b"*/" + p for p in COMMON_EXCLUDE_PATTERNS]) | |||||
def scan( | def scan( | ||||
config: Dict[str, Any], | config: Dict[str, Any], | ||||
root_path: str, | root_path: str, | ||||
exclude_patterns: Iterable[str], | exclude_patterns: Iterable[str], | ||||
out_fmt: str, | out_fmt: str, | ||||
interactive: bool, | interactive: bool, | ||||
policy: str, | policy: str, | ||||
extra_info: set, | extra_info: set, | ||||
): | ): | ||||
"""Scan a source code project to discover files and directories already | """Scan a source code project to discover files and directories already | ||||
present in the archive""" | present in the archive""" | ||||
converted_patterns = [pattern.encode() for pattern in exclude_patterns] | converted_patterns = [pattern.encode() for pattern in exclude_patterns] | ||||
converted_patterns.extend(COMMON_EXCLUDE_PATTERNS) | |||||
source_tree = model_of_dir(root_path.encode(), converted_patterns) | source_tree = model_of_dir(root_path.encode(), converted_patterns) | ||||
nodes_data = MerkleNodeInfo() | nodes_data = MerkleNodeInfo() | ||||
extra_info.add("known") | extra_info.add("known") | ||||
init_merkle_node_info(source_tree, nodes_data, extra_info) | init_merkle_node_info(source_tree, nodes_data, extra_info) | ||||
policy = get_policy_obj(source_tree, nodes_data, policy) | policy = get_policy_obj(source_tree, nodes_data, policy) | ||||
loop = asyncio.get_event_loop() | loop = asyncio.get_event_loop() | ||||
loop.run_until_complete(run(config, policy, source_tree, nodes_data, extra_info)) | loop.run_until_complete(run(config, policy, source_tree, nodes_data, extra_info)) | ||||
out = Output(root_path, nodes_data, source_tree) | out = Output(root_path, nodes_data, source_tree) | ||||
if interactive: | if interactive: | ||||
out.show("interactive") | out.show("interactive") | ||||
else: | else: | ||||
out.show(out_fmt) | out.show(out_fmt) |
Maybe add a todo on it to make this configurable from the usual swh config file.