diff --git a/.travis.yml b/.travis.yml index f8d161f..2ee0bf2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,11 @@ language: python -python: 3.6 +python: 3.7 install: "pip install tox" script: - script/fetch-fixtures - tox branches: only: - master diff --git a/MANIFEST.in b/MANIFEST.in index 8ee8b16..f5b018d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,5 @@ include README.md include LICENSE prune tree_sitter/core graft tree_sitter/core/lib/src graft tree_sitter/core/lib/include/tree_sitter -include tree_sitter/core/lib/utf8proc/*.c -include tree_sitter/core/lib/utf8proc/*.h diff --git a/setup.py b/setup.py index dae813a..23bcb26 100644 --- a/setup.py +++ b/setup.py @@ -1,48 +1,48 @@ """ Py-Tree-sitter """ import platform from os import path from setuptools import Extension from setuptools import setup with open(path.join(path.dirname(__file__), "README.md")) as f: LONG_DESCRIPTION = f.read() setup( name="tree_sitter", version="0.0.8", maintainer="Max Brunsfeld", maintainer_email="maxbrunsfeld@gmail.com", author="Max Brunsfeld", author_email="maxbrunsfeld@gmail.com", url="https://github.com/tree-sitter/py-tree-sitter", license="MIT", platforms=["any"], python_requires=">=3.3", description="Python bindings to the Tree-sitter parsing library", long_description=LONG_DESCRIPTION, long_description_content_type="text/markdown", classifiers=[ "License :: OSI Approved :: MIT License", "Topic :: Software Development :: Compilers", "Topic :: Text Processing :: Linguistic", ], packages=["tree_sitter"], ext_modules=[ Extension( "tree_sitter.binding", ["tree_sitter/core/lib/src/lib.c", "tree_sitter/binding.c"], include_dirs=[ "tree_sitter/core/lib/include", - "tree_sitter/core/lib/utf8proc", + "tree_sitter/core/lib/src", ], extra_compile_args=( ["-std=c99"] if platform.system() != "Windows" else None ), ) ], project_urls={"Source": "https://github.com/tree-sitter/py-tree-sitter"}, ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_tree_sitter.py b/tests/test_tree_sitter.py index b8bb73b..0befe48 100644 --- a/tests/test_tree_sitter.py +++ b/tests/test_tree_sitter.py @@ -1,228 +1,278 @@ # pylint: disable=missing-docstring import re import unittest from os import path from tree_sitter import Language from tree_sitter import Parser LIB_PATH = path.join("build", "languages.so") Language.build_library( LIB_PATH, [ path.join("tests", "fixtures", "tree-sitter-python"), path.join("tests", "fixtures", "tree-sitter-javascript"), ], ) PYTHON = Language(LIB_PATH, "python") JAVASCRIPT = Language(LIB_PATH, "javascript") def _collapse_ws(string): return re.sub(r"\s+", " ", string).strip() -class TestTreeSitter(unittest.TestCase): +class TestParser(unittest.TestCase): def test_set_language(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") self.assertEqual( tree.root_node.sexp(), _collapse_ws( """(module (function_definition name: (identifier) parameters: (parameters) body: (block (expression_statement (call function: (identifier) arguments: (argument_list))))))""" ), ) parser.set_language(JAVASCRIPT) tree = parser.parse(b"function foo() {\n bar();\n}") self.assertEqual( tree.root_node.sexp(), _collapse_ws( """(program (function_declaration name: (identifier) parameters: (formal_parameters) body: (statement_block (expression_statement (call_expression function: (identifier) arguments: (arguments))))))""" ), ) def test_multibyte_characters(self): parser = Parser() parser.set_language(JAVASCRIPT) source_code = bytes("'😎' && '🐍'", "utf8") tree = parser.parse(source_code) root_node = tree.root_node statement_node = root_node.children[0] binary_node = statement_node.children[0] snake_node = binary_node.children[2] self.assertEqual(binary_node.type, "binary_expression") self.assertEqual(snake_node.type, "string") self.assertEqual( source_code[snake_node.start_byte : snake_node.end_byte].decode( "utf8" ), "'🐍'", ) - def test_node_child_by_field_id(self): + +class TestNode(unittest.TestCase): + def test_child_by_field_id(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") root_node = tree.root_node fn_node = tree.root_node.children[0] self.assertEqual(PYTHON.field_id_for_name("nameasdf"), None) name_field = PYTHON.field_id_for_name("name") alias_field = PYTHON.field_id_for_name("alias") self.assertIsInstance(alias_field, int) self.assertIsInstance(name_field, int) self.assertRaises(TypeError, root_node.child_by_field_id, "") self.assertEqual(root_node.child_by_field_id(alias_field), None) self.assertEqual(root_node.child_by_field_id(name_field), None) self.assertEqual(fn_node.child_by_field_id(alias_field), None) self.assertEqual( fn_node.child_by_field_id(name_field).type, "identifier" ) self.assertRaises(TypeError, root_node.child_by_field_name, True) self.assertRaises(TypeError, root_node.child_by_field_name, 1) self.assertEqual( fn_node.child_by_field_name("name").type, "identifier" ) self.assertEqual(fn_node.child_by_field_name("asdfasdfname"), None) - def test_node_children(self): + def test_children(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") root_node = tree.root_node self.assertEqual(root_node.type, "module") self.assertEqual(root_node.start_byte, 0) self.assertEqual(root_node.end_byte, 18) self.assertEqual(root_node.start_point, (0, 0)) self.assertEqual(root_node.end_point, (1, 7)) # List object is reused self.assertIs(root_node.children, root_node.children) fn_node = root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertEqual(fn_node.start_byte, 0) self.assertEqual(fn_node.end_byte, 18) self.assertEqual(fn_node.start_point, (0, 0)) self.assertEqual(fn_node.end_point, (1, 7)) def_node = fn_node.children[0] self.assertEqual(def_node.type, "def") self.assertEqual(def_node.is_named, False) id_node = fn_node.children[1] self.assertEqual(id_node.type, "identifier") self.assertEqual(id_node.is_named, True) self.assertEqual(len(id_node.children), 0) params_node = fn_node.children[2] self.assertEqual(params_node.type, "parameters") self.assertEqual(params_node.is_named, True) colon_node = fn_node.children[3] self.assertEqual(colon_node.type, ":") self.assertEqual(colon_node.is_named, False) statement_node = fn_node.children[4] self.assertEqual(statement_node.type, "block") self.assertEqual(statement_node.is_named, True) - def test_tree_walk(self): + +class TestTree(unittest.TestCase): + def test_walk(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") cursor = tree.walk() # Node always returns the same instance self.assertIs(cursor.node, cursor.node) self.assertEqual(cursor.node.type, "module") self.assertEqual(cursor.node.start_byte, 0) self.assertEqual(cursor.node.end_byte, 18) self.assertEqual(cursor.node.start_point, (0, 0)) self.assertEqual(cursor.node.end_point, (1, 7)) self.assertTrue(cursor.goto_first_child()) self.assertEqual(cursor.node.type, "function_definition") self.assertEqual(cursor.node.start_byte, 0) self.assertEqual(cursor.node.end_byte, 18) self.assertEqual(cursor.node.start_point, (0, 0)) self.assertEqual(cursor.node.end_point, (1, 7)) self.assertTrue(cursor.goto_first_child()) self.assertEqual(cursor.node.type, "def") self.assertEqual(cursor.node.is_named, False) self.assertEqual(cursor.node.sexp(), '("def")') def_node = cursor.node # Node remains cached after a failure to move self.assertFalse(cursor.goto_first_child()) self.assertIs(cursor.node, def_node) self.assertTrue(cursor.goto_next_sibling()) self.assertEqual(cursor.node.type, "identifier") self.assertEqual(cursor.node.is_named, True) self.assertFalse(cursor.goto_first_child()) self.assertTrue(cursor.goto_next_sibling()) self.assertEqual(cursor.node.type, "parameters") self.assertEqual(cursor.node.is_named, True) def test_edit(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse(b"def foo():\n bar()") edit_offset = len(b"def foo(") tree.edit( start_byte=edit_offset, old_end_byte=edit_offset, new_end_byte=edit_offset + 2, start_point=(0, edit_offset), old_end_point=(0, edit_offset), new_end_point=(0, edit_offset + 2), ) fn_node = tree.root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertTrue(fn_node.has_changes) self.assertFalse(fn_node.children[0].has_changes) self.assertFalse(fn_node.children[1].has_changes) self.assertFalse(fn_node.children[3].has_changes) params_node = fn_node.children[2] self.assertEqual(params_node.type, "parameters") self.assertTrue(params_node.has_changes) self.assertEqual(params_node.start_point, (0, edit_offset - 1)) self.assertEqual(params_node.end_point, (0, edit_offset + 3)) new_tree = parser.parse(b"def foo(ab):\n bar()", tree) self.assertEqual( new_tree.root_node.sexp(), _collapse_ws( """(module (function_definition name: (identifier) parameters: (parameters (identifier)) body: (block (expression_statement (call function: (identifier) arguments: (argument_list))))))""" ), ) + + +class TestQuery(unittest.TestCase): + def test_errors(self): + with self.assertRaisesRegex(NameError, "Invalid node type foo"): + PYTHON.query("(list (foo))") + with self.assertRaisesRegex(NameError, "Invalid field name buzz"): + PYTHON.query("(function_definition buzz: (identifier))") + with self.assertRaisesRegex(NameError, "Invalid capture name garbage"): + PYTHON.query("((function_definition) (eq? @garbage foo))") + with self.assertRaisesRegex(SyntaxError, "Invalid syntax at offset 6"): + PYTHON.query("(list))") + PYTHON.query("(function_definition)") + + def test_captures(self): + parser = Parser() + parser.set_language(PYTHON) + source = b"def foo():\n bar()\ndef baz():\n quux()\n" + tree = parser.parse(source) + query = PYTHON.query( + """ + (function_definition name: (identifier) @func-def) + (call function: (identifier) @func-call) + """ + ) + + captures = query.captures(tree.root_node) + captures = query.captures(tree.root_node) + captures = query.captures(tree.root_node) + captures = query.captures(tree.root_node) + + self.assertEqual(captures[0][0].start_point, (0, 4)) + self.assertEqual(captures[0][0].end_point, (0, 7)) + self.assertEqual(captures[0][1], "func-def") + + self.assertEqual(captures[1][0].start_point, (1, 2)) + self.assertEqual(captures[1][0].end_point, (1, 5)) + self.assertEqual(captures[1][1], "func-call") + + self.assertEqual(captures[2][0].start_point, (2, 4)) + self.assertEqual(captures[2][0].end_point, (2, 7)) + self.assertEqual(captures[2][1], "func-def") + + self.assertEqual(captures[3][0].start_point, (3, 2)) + self.assertEqual(captures[3][0].end_point, (3, 6)) + self.assertEqual(captures[3][1], "func-call") diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py index f006542..66fc989 100644 --- a/tree_sitter/__init__.py +++ b/tree_sitter/__init__.py @@ -1,96 +1,100 @@ """Python bindings for tree-sitter.""" import platform from ctypes import c_void_p from ctypes import cdll from ctypes.util import find_library from distutils.ccompiler import new_compiler from os import path from tempfile import TemporaryDirectory # pylint: disable=no-name-in-module,import-error -from tree_sitter.binding import _language_field_id_for_name +from tree_sitter.binding import _language_field_id_for_name, _language_query from tree_sitter.binding import Node from tree_sitter.binding import Parser from tree_sitter.binding import Tree from tree_sitter.binding import TreeCursor class Language: """A tree-sitter language""" @staticmethod def build_library(output_path, repo_paths): """ Build a dynamic library at the given path, based on the parser repositories at the given paths. Returns `True` if the dynamic library was compiled and `False` if the library already existed and was modified more recently than any of the source files. """ output_mtime = ( path.getmtime(output_path) if path.exists(output_path) else 0 ) if not repo_paths: raise ValueError("Must provide at least one language folder") cpp = False source_paths = [] for repo_path in repo_paths: src_path = path.join(repo_path, "src") source_paths.append(path.join(src_path, "parser.c")) if path.exists(path.join(src_path, "scanner.cc")): cpp = True source_paths.append(path.join(src_path, "scanner.cc")) elif path.exists(path.join(src_path, "scanner.c")): source_paths.append(path.join(src_path, "scanner.c")) source_mtimes = [path.getmtime(__file__)] + [ path.getmtime(path_) for path_ in source_paths ] compiler = new_compiler() if cpp: if find_library("c++"): compiler.add_library("c++") elif find_library("stdc++"): compiler.add_library("stdc++") if max(source_mtimes) <= output_mtime: return False with TemporaryDirectory(suffix="tree_sitter_language") as out_dir: object_paths = [] for source_path in source_paths: if platform.system() == "Windows": flags = None else: flags = ["-fPIC"] if source_path.endswith(".c"): flags.append("-std=c99") object_paths.append( compiler.compile( [source_path], output_dir=out_dir, include_dirs=[path.dirname(source_path)], extra_preargs=flags, )[0] ) compiler.link_shared_object(object_paths, output_path) return True def __init__(self, library_path, name): """ Load the language with the given name from the dynamic library at the given path. """ self.name = name self.lib = cdll.LoadLibrary(library_path) language_function = getattr(self.lib, "tree_sitter_%s" % name) language_function.restype = c_void_p self.language_id = language_function() def field_id_for_name(self, name): """Return the field id for a field name.""" return _language_field_id_for_name(self.language_id, name) + + def query(self, source): + """Create a Query with the given source code.""" + return _language_query(self.language_id, source) diff --git a/tree_sitter/binding.c b/tree_sitter/binding.c index fb6e50d..14e4f59 100644 --- a/tree_sitter/binding.c +++ b/tree_sitter/binding.c @@ -1,600 +1,766 @@ #include "Python.h" #include "tree_sitter/api.h" // Types typedef struct { PyObject_HEAD TSNode node; PyObject *children; } Node; typedef struct { PyObject_HEAD TSTree *tree; } Tree; typedef struct { PyObject_HEAD TSParser *parser; } Parser; typedef struct { PyObject_HEAD TSTreeCursor cursor; PyObject *node; } TreeCursor; +typedef struct { + PyObject_HEAD + TSQuery *query; + PyObject *capture_names; +} Query; + static TSTreeCursor default_cursor = {0}; +static TSQueryCursor *query_cursor = NULL; // Point static PyObject *point_new(TSPoint point) { PyObject *row = PyLong_FromSize_t((size_t)point.row); PyObject *column = PyLong_FromSize_t((size_t)point.column); if (!row || !column) { Py_XDECREF(row); Py_XDECREF(column); return NULL; } return PyTuple_Pack(2, row, column); } // Node static PyObject *node_new_internal(TSNode node); static PyObject *tree_cursor_new_internal(TSNode node); static void node_dealloc(Node *self) { Py_XDECREF(self->children); Py_TYPE(self)->tp_free(self); } static PyObject *node_repr(Node *self) { const char *type = ts_node_type(self->node); TSPoint start_point = ts_node_start_point(self->node); TSPoint end_point = ts_node_end_point(self->node); const char *format_string = ts_node_is_named(self->node) ? "" : ""; return PyUnicode_FromFormat( format_string, type, start_point.row, start_point.column, end_point.row, end_point.column ); } static PyObject *node_sexp(Node *self, PyObject *args) { char *string = ts_node_string(self->node); PyObject *result = PyUnicode_FromString(string); free(string); return result; } static PyObject *node_walk(Node *self, PyObject *args) { return tree_cursor_new_internal(self->node); } static PyObject *node_chield_by_field_id(Node *self, PyObject *args) { TSFieldId field_id; if (!PyArg_ParseTuple(args, "H", &field_id)) { return NULL; } TSNode child = ts_node_child_by_field_id(self->node, field_id); if (ts_node_is_null(child)) { Py_RETURN_NONE; } return node_new_internal(child); } static PyObject *node_chield_by_field_name(Node *self, PyObject *args) { char *name; int length; if (!PyArg_ParseTuple(args, "s#", &name, &length)) { return NULL; } TSNode child = ts_node_child_by_field_name(self->node, name, length); if (ts_node_is_null(child)) { Py_RETURN_NONE; } return node_new_internal(child); } static PyObject *node_get_type(Node *self, void *payload) { return PyUnicode_FromString(ts_node_type(self->node)); } static PyObject *node_get_is_named(Node *self, void *payload) { return PyBool_FromLong(ts_node_is_named(self->node)); } static PyObject *node_get_has_changes(Node *self, void *payload) { return PyBool_FromLong(ts_node_has_changes(self->node)); } static PyObject *node_get_has_error(Node *self, void *payload) { return PyBool_FromLong(ts_node_has_error(self->node)); } static PyObject *node_get_start_byte(Node *self, void *payload) { return PyLong_FromSize_t((size_t)ts_node_start_byte(self->node)); } static PyObject *node_get_end_byte(Node *self, void *payload) { return PyLong_FromSize_t((size_t)ts_node_end_byte(self->node)); } static PyObject *node_get_start_point(Node *self, void *payload) { return point_new(ts_node_start_point(self->node)); } static PyObject *node_get_end_point(Node *self, void *payload) { return point_new(ts_node_end_point(self->node)); } static PyObject *node_get_children(Node *self, void *payload) { if (self->children) { Py_INCREF(self->children); return self->children; } long length = (long)ts_node_child_count(self->node); PyObject *result = PyList_New(length); if (length > 0) { ts_tree_cursor_reset(&default_cursor, self->node); ts_tree_cursor_goto_first_child(&default_cursor); int i = 0; do { TSNode child = ts_tree_cursor_current_node(&default_cursor); PyList_SetItem(result, i, node_new_internal(child)); i++; } while (ts_tree_cursor_goto_next_sibling(&default_cursor)); } Py_INCREF(result); self->children = result; return result; } static PyMethodDef node_methods[] = { { .ml_name = "walk", .ml_meth = (PyCFunction)node_walk, .ml_flags = METH_NOARGS, .ml_doc = "walk()\n--\n\n\ Get a tree cursor for walking the tree starting at this node.", }, { .ml_name = "sexp", .ml_meth = (PyCFunction)node_sexp, .ml_flags = METH_NOARGS, .ml_doc = "sexp()\n--\n\n\ Get an S-expression representing the node.", }, { .ml_name = "child_by_field_id", .ml_meth = (PyCFunction)node_chield_by_field_id, .ml_flags = METH_VARARGS, .ml_doc = "child_by_field_id(id)\n--\n\n\ Get child for the given field id.", }, { .ml_name = "child_by_field_name", .ml_meth = (PyCFunction)node_chield_by_field_name, .ml_flags = METH_VARARGS, .ml_doc = "child_by_field_name(name)\n--\n\n\ Get child for the given field name.", }, {NULL}, }; static PyGetSetDef node_accessors[] = { {"type", (getter)node_get_type, NULL, "The node's type", NULL}, {"is_named", (getter)node_get_is_named, NULL, "Is this a named node", NULL}, {"has_changes", (getter)node_get_has_changes, NULL, "Does this node have text changes since it was parsed", NULL}, {"has_error", (getter)node_get_has_error, NULL, "Does this node contain any errors", NULL}, {"start_byte", (getter)node_get_start_byte, NULL, "The node's start byte", NULL}, {"end_byte", (getter)node_get_end_byte, NULL, "The node's end byte", NULL}, {"start_point", (getter)node_get_start_point, NULL, "The node's start point", NULL}, {"end_point", (getter)node_get_end_point, NULL, "The node's end point", NULL}, {"children", (getter)node_get_children, NULL, "The node's children", NULL}, {NULL} }; static PyTypeObject node_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Node", .tp_doc = "A syntax node", .tp_basicsize = sizeof(Node), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)node_dealloc, .tp_repr = (reprfunc)node_repr, .tp_methods = node_methods, .tp_getset = node_accessors, }; static PyObject *node_new_internal(TSNode node) { Node *self = (Node *)node_type.tp_alloc(&node_type, 0); if (self != NULL) { self->node = node; self->children = NULL; } return (PyObject *)self; } // Tree static void tree_dealloc(Tree *self) { ts_tree_delete(self->tree); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *tree_get_root_node(Tree *self, void *payload) { return node_new_internal(ts_tree_root_node(self->tree)); } static PyObject *tree_walk(Tree *self, PyObject *args) { return tree_cursor_new_internal(ts_tree_root_node(self->tree)); } static PyObject *tree_edit(Tree *self, PyObject *args, PyObject *kwargs) { unsigned start_byte, start_row, start_column; unsigned old_end_byte, old_end_row, old_end_column; unsigned new_end_byte, new_end_row, new_end_column; char *keywords[] = { "start_byte", "old_end_byte", "new_end_byte", "start_point", "old_end_point", "new_end_point", NULL, }; int ok = PyArg_ParseTupleAndKeywords( args, kwargs, "III(II)(II)(II)", keywords, &start_byte, &old_end_byte, &new_end_byte, &start_row, &start_column, &old_end_row, &old_end_column, &new_end_row, &new_end_column ); if (ok) { TSInputEdit edit = { .start_byte = start_byte, .old_end_byte = old_end_byte, .new_end_byte = new_end_byte, .start_point = {start_row, start_column}, .old_end_point = {old_end_row, old_end_column}, .new_end_point = {new_end_row, new_end_column}, }; ts_tree_edit(self->tree, &edit); } Py_RETURN_NONE; } static PyMethodDef tree_methods[] = { { .ml_name = "walk", .ml_meth = (PyCFunction)tree_walk, .ml_flags = METH_NOARGS, .ml_doc = "walk()\n--\n\n\ Get a tree cursor for walking this tree.", }, { .ml_name = "edit", .ml_meth = (PyCFunction)tree_edit, .ml_flags = METH_KEYWORDS|METH_VARARGS, .ml_doc = "edit(start_byte, old_end_byte, new_end_byte,\ start_point, old_end_point, new_end_point)\n--\n\n\ Edit the syntax tree.", }, {NULL}, }; static PyGetSetDef tree_accessors[] = { {"root_node", (getter)tree_get_root_node, NULL, "The root node of this tree.", NULL}, {NULL} }; static PyTypeObject tree_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Tree", .tp_doc = "A Syntax Tree", .tp_basicsize = sizeof(Tree), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)tree_dealloc, .tp_methods = tree_methods, .tp_getset = tree_accessors, }; static PyObject *tree_new_internal(TSTree *tree) { Tree *self = (Tree *)tree_type.tp_alloc(&tree_type, 0); if (self != NULL) self->tree = tree; return (PyObject *)self; } // TreeCursor static void tree_cursor_dealloc(TreeCursor *self) { ts_tree_cursor_delete(&self->cursor); Py_XDECREF(self->node); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *tree_cursor_get_node(TreeCursor *self, void *payload) { if (!self->node) { self->node = node_new_internal(ts_tree_cursor_current_node(&self->cursor)); } Py_INCREF(self->node); return self->node; } static PyObject *tree_cursor_goto_parent(TreeCursor *self, PyObject *args) { bool result = ts_tree_cursor_goto_parent(&self->cursor); if (result) { Py_XDECREF(self->node); self->node = NULL; } return PyBool_FromLong(result); } static PyObject *tree_cursor_goto_first_child(TreeCursor *self, PyObject *args) { bool result = ts_tree_cursor_goto_first_child(&self->cursor); if (result) { Py_XDECREF(self->node); self->node = NULL; } return PyBool_FromLong(result); } static PyObject *tree_cursor_goto_next_sibling(TreeCursor *self, PyObject *args) { bool result = ts_tree_cursor_goto_next_sibling(&self->cursor); if (result) { Py_XDECREF(self->node); self->node = NULL; } return PyBool_FromLong(result); } static PyMethodDef tree_cursor_methods[] = { { .ml_name = "goto_parent", .ml_meth = (PyCFunction)tree_cursor_goto_parent, .ml_flags = METH_NOARGS, .ml_doc = "goto_parent()\n--\n\n\ Go to parent.\n\n\ If the current node is not the root, move to its parent and\n\ return True. Otherwise, return False.", }, { .ml_name = "goto_first_child", .ml_meth = (PyCFunction)tree_cursor_goto_first_child, .ml_flags = METH_NOARGS, .ml_doc = "goto_first_child()\n--\n\n\ Go to first child.\n\n\ If the current node has children, move to the first child and\n\ return True. Otherwise, return False.", }, { .ml_name = "goto_next_sibling", .ml_meth = (PyCFunction)tree_cursor_goto_next_sibling, .ml_flags = METH_NOARGS, .ml_doc = "goto_next_sibling()\n--\n\n\ Go to next sibling.\n\n\ If the current node has a next sibling, move to the next sibling\n\ and return True. Otherwise, return False.", }, {NULL}, }; static PyGetSetDef tree_cursor_accessors[] = { {"node", (getter)tree_cursor_get_node, NULL, "The current node.", NULL}, {NULL}, }; static PyTypeObject tree_cursor_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.TreeCursor", .tp_doc = "A syntax tree cursor.", .tp_basicsize = sizeof(TreeCursor), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)tree_cursor_dealloc, .tp_methods = tree_cursor_methods, .tp_getset = tree_cursor_accessors, }; static PyObject *tree_cursor_new_internal(TSNode node) { TreeCursor *cursor = (TreeCursor *)tree_cursor_type.tp_alloc(&tree_cursor_type, 0); if (cursor != NULL) cursor->cursor = ts_tree_cursor_new(node); return (PyObject *)cursor; } // Parser static PyObject *parser_new( PyTypeObject *type, PyObject *args, PyObject *kwds ) { Parser *self = (Parser *)type->tp_alloc(type, 0); if (self != NULL) self->parser = ts_parser_new(); return (PyObject *)self; } static void parser_dealloc(Parser *self) { ts_parser_delete(self->parser); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *parser_parse(Parser *self, PyObject *args) { PyObject *source_code = NULL; PyObject *old_tree_arg = NULL; if (!PyArg_UnpackTuple(args, "ref", 1, 2, &source_code, &old_tree_arg)) { return NULL; } if (!PyBytes_Check(source_code)) { PyErr_SetString(PyExc_TypeError, "First argument to parse must be bytes"); return NULL; } const TSTree *old_tree = NULL; if (old_tree_arg) { if (!PyObject_IsInstance(old_tree_arg, (PyObject *)&tree_type)) { PyErr_SetString(PyExc_TypeError, "Second argument to parse must be a Tree"); return NULL; } old_tree = ((Tree *)old_tree_arg)->tree; } size_t length = PyBytes_Size(source_code); char *source_bytes = PyBytes_AsString(source_code); TSTree *new_tree = ts_parser_parse_string(self->parser, old_tree, source_bytes, length); if (!new_tree) { PyErr_SetString(PyExc_ValueError, "Parsing failed"); return NULL; } return tree_new_internal(new_tree); } static PyObject *parser_set_language(Parser *self, PyObject *arg) { PyObject *language_id = PyObject_GetAttrString(arg, "language_id"); if (!language_id) { PyErr_SetString(PyExc_TypeError, "Argument to set_language must be a Language"); return NULL; } if (!PyLong_Check(language_id)) { PyErr_SetString(PyExc_TypeError, "Language ID must be an integer"); return NULL; } TSLanguage *language = (TSLanguage *)PyLong_AsLong(language_id); if (!language) { PyErr_SetString(PyExc_ValueError, "Language ID must not be null"); return NULL; } unsigned version = ts_language_version(language); if (version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION || TREE_SITTER_LANGUAGE_VERSION < version) { return PyErr_Format( PyExc_ValueError, "Incompatible Language version %u. Must not be between %u and %u", version, TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION, TREE_SITTER_LANGUAGE_VERSION ); } ts_parser_set_language(self->parser, language); Py_RETURN_NONE; } static PyMethodDef parser_methods[] = { { .ml_name = "parse", .ml_meth = (PyCFunction)parser_parse, .ml_flags = METH_VARARGS, .ml_doc = "parse(bytes, old_tree=None)\n--\n\n\ Parse source code, creating a syntax tree.", }, { .ml_name = "set_language", .ml_meth = (PyCFunction)parser_set_language, .ml_flags = METH_O, .ml_doc = "set_language(language)\n--\n\n\ Set the parser language.", }, {NULL}, }; static PyTypeObject parser_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Parser", .tp_doc = "A Parser", .tp_basicsize = sizeof(Parser), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_new = parser_new, .tp_dealloc = (destructor)parser_dealloc, .tp_methods = parser_methods, }; -// Module +// Query + +static PyObject *query_matches(Query *self, PyObject *args) { + PyErr_SetString(PyExc_NotImplementedError, "Not Implemented"); + return NULL; +} + +static PyObject *query_captures(Query *self, PyObject *args, PyObject *kwargs) { + char *keywords[] = { + "node", + "start_point", + "end_point", + NULL, + }; + + Node *node = NULL; + unsigned start_row = 0, start_column = 0, end_row = 0, end_column = 0; + + int ok = PyArg_ParseTupleAndKeywords( + args, + kwargs, + "O|(II)(II)", + keywords, + (PyObject **)&node, + &start_row, + &start_column, + &end_row, + &end_column + ); + if (!ok) return NULL; + + if (!PyObject_IsInstance((PyObject *)node, (PyObject *)&node_type)) { + PyErr_SetString(PyExc_TypeError, "First argument to captures must be a Node"); + return NULL; + } + + if (!query_cursor) query_cursor = ts_query_cursor_new(); + ts_query_cursor_exec(query_cursor, self->query, node->node); + + PyObject *result = PyList_New(0); + + uint32_t capture_index; + TSQueryMatch match; + while (ts_query_cursor_next_capture(query_cursor, &match, &capture_index)) { + const TSQueryCapture *capture = &match.captures[capture_index]; + PyObject *node = node_new_internal(capture->node); + PyObject *capture_name = PyList_GetItem(self->capture_names, capture->index); + PyList_Append(result, PyTuple_Pack(2, node, capture_name)); + } + + return result; +} + +static void query_dealloc(Query *self) { + if (self->query) ts_query_delete(self->query); + Py_XDECREF(self->capture_names); + Py_TYPE(self)->tp_free(self); +} + +static PyMethodDef query_methods[] = { + { + .ml_name = "matches", + .ml_meth = (PyCFunction)query_matches, + .ml_flags = METH_VARARGS, + .ml_doc = "matches(node)\n--\n\n\ + Get a list of all of the matches within the given node." + }, + { + .ml_name = "captures", + .ml_meth = (PyCFunction)query_captures, + .ml_flags = METH_KEYWORDS|METH_VARARGS, + .ml_doc = "captures(node)\n--\n\n\ + Get a list of all of the captures within the given node.", + }, + {NULL}, +}; + +static PyTypeObject query_type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "tree_sitter.Query", + .tp_doc = "A set of patterns to search for in a syntax tree.", + .tp_basicsize = sizeof(Query), + .tp_itemsize = 0, + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_dealloc = (destructor)query_dealloc, + .tp_methods = query_methods, +}; + +static PyObject *query_new_internal( + TSLanguage *language, + char *source, + int length +) { + Query *query = (Query *)query_type.tp_alloc(&query_type, 0); + if (query == NULL) return NULL; + uint32_t error_offset; + TSQueryError error_type; + query->query = ts_query_new( + language, source, length, &error_offset, &error_type + ); + if (!query->query) { + char *word_start = &source[error_offset]; + char *word_end = word_start; + while ( + word_end < &source[length] && + (iswalnum(*word_end) || *word_end == '-' || *word_end == '_' || *word_end == '?' || *word_end == '.') + ) word_end++; + char c = *word_end; + *word_end = 0; + switch (error_type) { + case TSQueryErrorNodeType: + PyErr_Format(PyExc_NameError, "Invalid node type %s", &source[error_offset]); + break; + case TSQueryErrorField: + PyErr_Format(PyExc_NameError, "Invalid field name %s", &source[error_offset]); + break; + case TSQueryErrorCapture: + PyErr_Format(PyExc_NameError, "Invalid capture name %s", &source[error_offset]); + break; + default: + PyErr_Format(PyExc_SyntaxError, "Invalid syntax at offset %u", error_offset); + } + *word_end = c; + query_dealloc(query); + return NULL; + } -static PyObject *language_field_id_for_name(Node *self, PyObject *args) { + unsigned n = ts_query_capture_count(query->query); + query->capture_names = PyList_New(n); + Py_INCREF(Py_None); + for (unsigned i = 0; i < n; i++) { + unsigned length; + const char *capture_name = ts_query_capture_name_for_id(query->query, i, &length); + PyList_SetItem(query->capture_names, i, PyUnicode_FromStringAndSize(capture_name, length)); + } + return (PyObject *)query; +} + +// Module + +static PyObject *language_field_id_for_name(PyObject *self, PyObject *args) { TSLanguage *language; char *field_name; int length; if (!PyArg_ParseTuple(args, "ls#", &language, &field_name, &length)) { return NULL; } TSFieldId field_id = ts_language_field_id_for_name(language, field_name, length); if (field_id == 0) { Py_RETURN_NONE; } return PyLong_FromSize_t((size_t)field_id); } +static PyObject *language_query(PyObject *self, PyObject *args) { + TSLanguage *language; + char *source; + int length; + if (!PyArg_ParseTuple(args, "ls#", &language, &source, &length)) { + return NULL; + } + + return query_new_internal(language, source, length); +} + static PyMethodDef module_methods[] = { { .ml_name = "_language_field_id_for_name", .ml_meth = (PyCFunction)language_field_id_for_name, .ml_flags = METH_VARARGS, .ml_doc = "(internal)", }, + { + .ml_name = "_language_query", + .ml_meth = (PyCFunction)language_query, + .ml_flags = METH_VARARGS, + .ml_doc = "(internal)", + }, {NULL}, }; static struct PyModuleDef module_definition = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "binding", .m_doc = NULL, .m_size = -1, .m_methods = module_methods, }; PyMODINIT_FUNC PyInit_binding(void) { PyObject *module = PyModule_Create(&module_definition); if (module == NULL) return NULL; if (PyType_Ready(&parser_type) < 0) return NULL; Py_INCREF(&parser_type); PyModule_AddObject(module, "Parser", (PyObject *)&parser_type); if (PyType_Ready(&tree_type) < 0) return NULL; Py_INCREF(&tree_type); PyModule_AddObject(module, "Tree", (PyObject *)&tree_type); if (PyType_Ready(&node_type) < 0) return NULL; Py_INCREF(&node_type); PyModule_AddObject(module, "Node", (PyObject *)&node_type); if (PyType_Ready(&tree_cursor_type) < 0) return NULL; Py_INCREF(&tree_cursor_type); PyModule_AddObject(module, "TreeCursor", (PyObject *)&tree_cursor_type); + if (PyType_Ready(&query_type) < 0) return NULL; + Py_INCREF(&query_type); + PyModule_AddObject(module, "Query", (PyObject *)&query_type); + return module; } diff --git a/tree_sitter/core b/tree_sitter/core index ea515b6..967da88 160000 --- a/tree_sitter/core +++ b/tree_sitter/core @@ -1 +1 @@ -Subproject commit ea515b69679593c93b542e06ba86cab83f87ea02 +Subproject commit 967da88371d4cfab43a1ede310c2f95bf3239b0e