diff --git a/Manifest.in b/Manifest.in index 97d0c7a..3f2d8c2 100644 --- a/Manifest.in +++ b/Manifest.in @@ -1,5 +1,5 @@ include README.md include LICENSE include tree_sitter/core/lib/utf8proc/* include tree_sitter/core/lib/src/* -include tree_sitter/core/lib/include/* +include tree_sitter/core/lib/include/tree_sitter/* diff --git a/README.md b/README.md index c167868..20d00ff 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,88 @@ Py-Tree-sitter ================== This module provides Python bindings to the [tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. ## Installation This package currently only works with Python 3. There are no library dependencies. ```sh pip3 install tree_sitter ``` ## Usage #### Setup First you'll need a Tree-sitter language implementation for each language that you want to parse. You can clone some of the [existing language repos](https://github.com/tree-sitter) or [create your own](http://tree-sitter.github.io/tree-sitter/creating-parsers): ```sh git clone https://github.com/tree-sitter/tree-sitter-go git clone https://github.com/tree-sitter/tree-sitter-javascript git clone https://github.com/tree-sitter/tree-sitter-python ``` Use the `Language.build_library` method to compile these into a library that's usable from Python. This function will return immediately if the library has already been compiled since the last time its source code was modified: ```python from tree_sitter import Language Language.build_library( # Store the library in the `build` directory 'build/my-languages.so', # Include one or more languages - 'vendor/tree-sitter-go', - 'vendor/tree-sitter-javascript', - 'vendor/tree-sitter-python' + [ + 'vendor/tree-sitter-go', + 'vendor/tree-sitter-javascript', + 'vendor/tree-sitter-python' + ] ) ``` Load the languages into your app as `Language` objects: ```python GO_LANGUAGE = Language('build/my-languages.so', 'go') JS_LANGUAGE = Language('build/my-languages.so', 'javascript') PY_LANGUAGE = Language('build/my-languages.so', 'python') ``` #### Basic Parsing Create a `Parser` and configure it to use one of the languages: ```python parser = Parser() parser.set_language(PY_LANGUAGE) ``` Parse some source code: ```python tree = parser.parse(""" def foo(): if bar: baz() """) ``` Inspect the resulting `Tree`: ```python root_node = tree.root_node assert root_node.type == 'module' assert root_node.start_point == (1, 0) assert root_node.end_point == (3, 13) function_node = root_node.children[0] assert root_node.type == 'function_definition' function_name_node = function_node.children[1] assert function_name_node.type == 'identifier' assert function_name_node.start_point == (1, 4) assert function_name_node.end_point == (1, 7) assert root_node.sexp() == '' ``` diff --git a/tests/__init__.py b/tests/__init__.py index 11836df..b77a84f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,57 +1,56 @@ import unittest from tree_sitter import Parser, Language LIB_PATH = "build/languages.so" -Language.build_library( - LIB_PATH, +Language.build_library(LIB_PATH, [ "tests/fixtures/tree-sitter-python", "tests/fixtures/tree-sitter-javascript", -) +]) PYTHON = Language(LIB_PATH, "python") JAVASCRIPT = Language(LIB_PATH, "javascript") class TestTreeSitter(unittest.TestCase): def test_basic_parsing(self): parser = Parser() # First parse some python code parser.set_language(PYTHON) tree = parser.parse("def foo():\n bar()") root_node = tree.root_node self.assertEqual( root_node.sexp(), "(module (function_definition (identifier) (parameters) (expression_statement (call (identifier) (argument_list)))))" ) assert root_node.type == 'module' assert root_node.start_point == (0, 0) self.assertEqual(root_node.type, "module") self.assertEqual(root_node.start_byte, 0) self.assertEqual(root_node.end_byte, 18) self.assertEqual(root_node.start_point, (0, 0)) self.assertEqual(root_node.end_point, (1, 7)) fn_node = root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertEqual(fn_node.start_byte, 0) self.assertEqual(fn_node.end_byte, 18) self.assertEqual(fn_node.start_point, (0, 0)) self.assertEqual(fn_node.end_point, (1, 7)) self.assertEqual(fn_node.children[0].type, "def") self.assertEqual(fn_node.children[1].type, "identifier") self.assertEqual(fn_node.children[2].type, "parameters") self.assertEqual(fn_node.children[3].type, ":") self.assertEqual(fn_node.children[4].type, "expression_statement") # Parse some javascript code parser.set_language(JAVASCRIPT) tree = parser.parse("function foo() {\n bar();\n}") root_node = tree.root_node self.assertEqual( root_node.sexp(), "(program (function (identifier) (formal_parameters) (statement_block (expression_statement (call_expression (identifier) (arguments))))))" ) diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py index f099b49..a6cc04d 100644 --- a/tree_sitter/__init__.py +++ b/tree_sitter/__init__.py @@ -1,61 +1,64 @@ from ctypes import cdll, c_void_p from distutils.ccompiler import new_compiler from tempfile import TemporaryDirectory from tree_sitter_binding import Parser import os.path as path -INCLUDE_PATH = path.join(path.dirname(__file__), "core", "lib", "include") - - class Language: - def build_library(output_path, *repo_paths): + def build_library(output_path, repo_paths): """ Build a dynamic library at the given path, based on the parser repositories at the given paths. Returns `True` if the dynamic library was compiled and `False` if the library already existed and was modified more recently than any of the source files. """ compiler = new_compiler() - compiler.add_include_dir(INCLUDE_PATH) output_mtime = 0 if path.exists(output_path): output_mtime = path.getmtime(output_path) + if len(repo_paths) == 0: + raise ValueError('Must provide at least one language folder') + source_paths = [] source_mtimes = [] for repo_path in repo_paths: src_path = path.join(repo_path, 'src') source_paths.append(path.join(src_path, "parser.c")) source_mtimes.append(path.getmtime(source_paths[-1])) if path.exists(path.join(src_path, "scanner.cc")): compiler.add_library('c++') source_paths.append(path.join(src_path, "scanner.cc")) source_mtimes.append(path.getmtime(source_paths[-1])) elif path.exists(path.join(src_path, "scanner.c")): source_paths.append(path.join(src_path, "scanner.c")) source_mtimes.append(path.getmtime(source_paths[-1])) if max(source_mtimes) > output_mtime: with TemporaryDirectory(suffix = 'tree_sitter_language') as dir: object_paths = [] for source_path in source_paths: - object_paths.append(compiler.compile([source_path], output_dir = dir)[0]) + object_paths.append(compiler.compile( + [source_path], + output_dir = dir, + include_dirs = [path.dirname(source_path)] + )[0]) compiler.link_shared_object(object_paths, output_path) return True else: return False def __init__(self, library_path, name): """ Load the language with the given name from the dynamic library at the given path. """ self.name = name self.lib = cdll.LoadLibrary(library_path) language_function = getattr(self.lib, "tree_sitter_%s" % name) language_function.restype = c_void_p self.language_id = language_function()