diff --git a/.appveyor.yml b/.appveyor.yml new file mode 100644 index 0000000..7305778 --- /dev/null +++ b/.appveyor.yml @@ -0,0 +1,15 @@ +build: false + +platform: + # TODO - get 64 bit build working + # - x64 + - x86 + +install: + - git submodule update --init --recursive + - IF "%PLATFORM%" == "x86" set PYTHON=C:\Python37 + - IF "%PLATFORM%" == "x64" set PYTHON=C:\Python37-x64 + +test_script: + - script\fetch-fixtures.cmd + - "%PYTHON%\\python.exe setup.py test" diff --git a/README.md b/README.md index 607c845..6393e12 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,91 @@ py-tree-sitter ================== [![Build Status](https://travis-ci.org/tree-sitter/py-tree-sitter.svg?branch=master)](https://travis-ci.org/tree-sitter/py-tree-sitter) +[![Build status](https://ci.appveyor.com/api/projects/status/mde790v0v9gux85w/branch/master?svg=true)](https://ci.appveyor.com/project/maxbrunsfeld/py-tree-sitter/branch/master) This module provides Python bindings to the [tree-sitter](https://github.com/tree-sitter/tree-sitter) parsing library. ## Installation This package currently only works with Python 3. There are no library dependencies. ```sh pip3 install tree_sitter ``` ## Usage #### Setup First you'll need a Tree-sitter language implementation for each language that you want to parse. You can clone some of the [existing language repos](https://github.com/tree-sitter) or [create your own](http://tree-sitter.github.io/tree-sitter/creating-parsers): ```sh git clone https://github.com/tree-sitter/tree-sitter-go git clone https://github.com/tree-sitter/tree-sitter-javascript git clone https://github.com/tree-sitter/tree-sitter-python ``` Use the `Language.build_library` method to compile these into a library that's usable from Python. This function will return immediately if the library has already been compiled since the last time its source code was modified: ```python from tree_sitter import Language Language.build_library( # Store the library in the `build` directory 'build/my-languages.so', # Include one or more languages [ 'vendor/tree-sitter-go', 'vendor/tree-sitter-javascript', 'vendor/tree-sitter-python' ] ) ``` Load the languages into your app as `Language` objects: ```python GO_LANGUAGE = Language('build/my-languages.so', 'go') JS_LANGUAGE = Language('build/my-languages.so', 'javascript') PY_LANGUAGE = Language('build/my-languages.so', 'python') ``` #### Basic Parsing Create a `Parser` and configure it to use one of the languages: ```python parser = Parser() parser.set_language(PY_LANGUAGE) ``` Parse some source code: ```python tree = parser.parse(""" def foo(): if bar: baz() """) ``` Inspect the resulting `Tree`: ```python root_node = tree.root_node assert root_node.type == 'module' assert root_node.start_point == (1, 0) assert root_node.end_point == (3, 13) function_node = root_node.children[0] assert root_node.type == 'function_definition' function_name_node = function_node.children[1] assert function_name_node.type == 'identifier' assert function_name_node.start_point == (1, 4) assert function_name_node.end_point == (1, 7) assert root_node.sexp() == '' ``` diff --git a/script/fetch-fixtures.cmd b/script/fetch-fixtures.cmd new file mode 100644 index 0000000..75f10cc --- /dev/null +++ b/script/fetch-fixtures.cmd @@ -0,0 +1,22 @@ +@echo off + +if not exist tests\fixtures mkdir test\fixtures + +call:fetch_grammar javascript master +call:fetch_grammar python master + +exit /B 0 + +:fetch_grammar +setlocal +set grammar_dir=tests\fixtures\tree-sitter-%~1 +set grammar_url=https://github.com/tree-sitter/tree-sitter-%~1 +set grammar_branch=%~2 +@if not exist %grammar_dir% ( + git clone %grammar_url% %grammar_dir% --depth=1 +) +pushd %grammar_dir% +git fetch origin %2 --depth=1 +git reset --hard FETCH_HEAD +popd +exit /B 0 diff --git a/tests/__init__.py b/tests/__init__.py index d8e63f9..73f9861 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,71 +1,72 @@ import unittest +import os.path as path from tree_sitter import Parser, Language -LIB_PATH = "build/languages.so" +LIB_PATH = path.join("build", "languages.so") Language.build_library(LIB_PATH, [ - "tests/fixtures/tree-sitter-python", - "tests/fixtures/tree-sitter-javascript", + path.join("tests", "fixtures", "tree-sitter-python"), + path.join("tests", "fixtures", "tree-sitter-javascript"), ]) PYTHON = Language(LIB_PATH, "python") JAVASCRIPT = Language(LIB_PATH, "javascript") class TestTreeSitter(unittest.TestCase): def test_set_language(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse("def foo():\n bar()") self.assertEqual( tree.root_node.sexp(), "(module (function_definition (identifier) (parameters) (expression_statement (call (identifier) (argument_list)))))" ) parser.set_language(JAVASCRIPT) tree = parser.parse("function foo() {\n bar();\n}") self.assertEqual( tree.root_node.sexp(), "(program (function (identifier) (formal_parameters) (statement_block (expression_statement (call_expression (identifier) (arguments))))))" ) def test_node_children(self): parser = Parser() parser.set_language(PYTHON) tree = parser.parse("def foo():\n bar()") root_node = tree.root_node self.assertEqual(root_node.type, "module") self.assertEqual(root_node.start_byte, 0) self.assertEqual(root_node.end_byte, 18) self.assertEqual(root_node.start_point, (0, 0)) self.assertEqual(root_node.end_point, (1, 7)) # List object is reused self.assertIs(root_node.children, root_node.children) fn_node = root_node.children[0] self.assertEqual(fn_node.type, "function_definition") self.assertEqual(fn_node.start_byte, 0) self.assertEqual(fn_node.end_byte, 18) self.assertEqual(fn_node.start_point, (0, 0)) self.assertEqual(fn_node.end_point, (1, 7)) def_node = fn_node.children[0] self.assertEqual(def_node.type, "def") self.assertEqual(def_node.is_named, False) id_node = fn_node.children[1] self.assertEqual(id_node.type, "identifier") self.assertEqual(id_node.is_named, True) self.assertEqual(len(id_node.children), 0) params_node = fn_node.children[2] self.assertEqual(params_node.type, "parameters") self.assertEqual(params_node.is_named, True) colon_node = fn_node.children[3] self.assertEqual(colon_node.type, ":") self.assertEqual(colon_node.is_named, False) statement_node = fn_node.children[4] self.assertEqual(statement_node.type, "expression_statement") self.assertEqual(statement_node.is_named, True) diff --git a/tree_sitter/binding.c b/tree_sitter/binding.c index b0f6811..e02e152 100644 --- a/tree_sitter/binding.c +++ b/tree_sitter/binding.c @@ -1,340 +1,340 @@ #include "Python.h" #include "tree_sitter/api.h" // Types typedef struct { - PyObject_HEAD; + PyObject_HEAD TSNode node; PyObject *children; } Node; typedef struct { - PyObject_HEAD; + PyObject_HEAD TSTree *tree; } Tree; typedef struct { - PyObject_HEAD; + PyObject_HEAD TSParser *parser; } Parser; static TSTreeCursor default_cursor = {0}; // Point static PyObject *point_new(TSPoint point) { PyObject *row = PyLong_FromSize_t((size_t)point.row); PyObject *column = PyLong_FromSize_t((size_t)point.column); if (!row || !column) { Py_XDECREF(row); Py_XDECREF(column); return NULL; } return PyTuple_Pack(2, row, column); } // Node static PyObject *node_new_internal(TSNode node); static void node_dealloc(Node *self) { Py_XDECREF(self->children); Py_TYPE(self)->tp_free(self); } static PyObject *node_repr(Node *self) { const char *type = ts_node_type(self->node); TSPoint start_point = ts_node_start_point(self->node); TSPoint end_point = ts_node_end_point(self->node); const char *format_string = ts_node_is_named(self->node) ? "" : ""; return PyUnicode_FromFormat( format_string, type, start_point.row, start_point.column, end_point.row, end_point.column ); } static PyObject *node_sexp(Node *self, PyObject *args) { char *string = ts_node_string(self->node); PyObject *result = PyUnicode_FromString(string); free(string); return result; } static PyObject *node_get_type(Node *self, void *payload) { return PyUnicode_FromString(ts_node_type(self->node)); } static PyObject *node_get_is_named(Node *self, void *payload) { return PyBool_FromLong(ts_node_is_named(self->node)); } static PyObject *node_get_start_byte(Node *self, void *payload) { return PyLong_FromSize_t((size_t)ts_node_start_byte(self->node)); } static PyObject *node_get_end_byte(Node *self, void *payload) { return PyLong_FromSize_t((size_t)ts_node_end_byte(self->node)); } static PyObject *node_get_start_point(Node *self, void *payload) { return point_new(ts_node_start_point(self->node)); } static PyObject *node_get_end_point(Node *self, void *payload) { return point_new(ts_node_end_point(self->node)); } static PyObject *node_get_children(Node *self, void *payload) { if (self->children) { Py_INCREF(self->children); return self->children; } long length = (long)ts_node_child_count(self->node); PyObject *result = PyList_New(length); if (length > 0) { ts_tree_cursor_reset(&default_cursor, self->node); ts_tree_cursor_goto_first_child(&default_cursor); int i = 0; do { TSNode child = ts_tree_cursor_current_node(&default_cursor); PyList_SetItem(result, i, node_new_internal(child)); i++; } while (ts_tree_cursor_goto_next_sibling(&default_cursor)); } Py_INCREF(result); self->children = result; return result; } static PyMethodDef node_methods[] = { { .ml_name = "sexp", .ml_meth = (PyCFunction)node_sexp, .ml_flags = METH_NOARGS, .ml_doc = "Get an S-expression representing the name", }, {NULL}, }; static PyGetSetDef node_accessors[] = { {"type", (getter)node_get_type, NULL, "The node's type", NULL}, {"is_named", (getter)node_get_is_named, NULL, "Is this a named node", NULL}, {"start_byte", (getter)node_get_start_byte, NULL, "The node's start byte", NULL}, {"end_byte", (getter)node_get_end_byte, NULL, "The node's end byte", NULL}, {"start_point", (getter)node_get_start_point, NULL, "The node's start point", NULL}, {"end_point", (getter)node_get_end_point, NULL, "The node's end point", NULL}, {"children", (getter)node_get_children, NULL, "The node's children", NULL}, {NULL} }; static PyTypeObject node_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Node", .tp_doc = "A syntax node", .tp_basicsize = sizeof(Node), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)node_dealloc, .tp_repr = (reprfunc)node_repr, .tp_methods = node_methods, .tp_getset = node_accessors, }; static PyObject *node_new_internal(TSNode node) { Node *self = (Node *)node_type.tp_alloc(&node_type, 0); if (self != NULL) { self->node = node; self->children = NULL; } return (PyObject *)self; } // Tree static void tree_dealloc(Tree *self) { ts_tree_delete(self->tree); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *tree_get_root_node(Tree *self, void *payload) { return node_new_internal(ts_tree_root_node(self->tree)); } static PyMethodDef tree_methods[] = { {NULL}, }; static PyGetSetDef tree_accessors[] = { {"root_node", (getter)tree_get_root_node, NULL, "root node", NULL}, {NULL} }; static PyTypeObject tree_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Tree", .tp_doc = "A Syntax Tree", .tp_basicsize = sizeof(Tree), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_dealloc = (destructor)tree_dealloc, .tp_methods = tree_methods, .tp_getset = tree_accessors, }; static PyObject *tree_new_internal(TSTree *tree) { Tree *self = (Tree *)tree_type.tp_alloc(&tree_type, 0); if (self != NULL) self->tree = tree; return (PyObject *)self; } // Parser static PyObject *parser_new( PyTypeObject *type, PyObject *args, PyObject *kwds ) { Parser *self = (Parser *)type->tp_alloc(type, 0); if (self != NULL) self->parser = ts_parser_new(); return (PyObject *)self; } static void parser_dealloc(Parser *self) { ts_parser_delete(self->parser); Py_TYPE(self)->tp_free((PyObject *)self); } static PyObject *parser_parse(Parser *self, PyObject *args) { PyObject *source_code = NULL; PyObject *old_tree_arg = NULL; if (!PyArg_UnpackTuple(args, "ref", 1, 2, &source_code, &old_tree_arg)) { return NULL; } if (!PyUnicode_Check(source_code)) { PyErr_SetString(PyExc_TypeError, "First argument to parse must be a string"); return NULL; } const TSTree *old_tree = NULL; if (old_tree_arg) { if (!PyObject_IsInstance(old_tree_arg, (PyObject *)&tree_type)) { PyErr_SetString(PyExc_TypeError, "Second argument to parse must be a Tree"); return NULL; } old_tree = ((Tree *)old_tree_arg)->tree; } TSTree *new_tree = NULL; PyUnicode_READY(source_code); size_t length = PyUnicode_GET_LENGTH(source_code); int kind = PyUnicode_KIND(source_code); if (kind == PyUnicode_1BYTE_KIND) { Py_UCS1 *source_bytes = PyUnicode_1BYTE_DATA(source_code); new_tree = ts_parser_parse_string(self->parser, old_tree, (char *)source_bytes, length); } else if (kind == PyUnicode_2BYTE_KIND) { Py_UCS2 *source_bytes = PyUnicode_2BYTE_DATA(source_code); new_tree = ts_parser_parse_string_encoding(self->parser, old_tree, (char *)source_bytes, length, TSInputEncodingUTF16); } else if (kind == PyUnicode_4BYTE_KIND) { PyErr_SetString(PyExc_ValueError, "4 byte strings are not yet supported"); return NULL; } else { PyErr_SetString(PyExc_ValueError, "Unknown string kind"); return NULL; } if (!new_tree) { PyErr_SetString(PyExc_ValueError, "Parsing failed"); return NULL; } return tree_new_internal(new_tree); } static PyObject *parser_set_language(Parser *self, PyObject *arg) { PyObject *language_id = PyObject_GetAttrString(arg, "language_id"); if (!language_id) { PyErr_SetString(PyExc_TypeError, "Argument to set_language must be a Language"); return NULL; } if (!PyLong_Check(language_id)) { PyErr_SetString(PyExc_TypeError, "Language ID must be an integer"); return NULL; } TSLanguage *language = (TSLanguage *)PyLong_AsLong(language_id); if (!language) { PyErr_SetString(PyExc_ValueError, "Language ID must not be null"); return NULL; } ts_parser_set_language(self->parser, language); return Py_None; } static PyMethodDef parser_methods[] = { { .ml_name = "parse", .ml_meth = (PyCFunction)parser_parse, .ml_flags = METH_VARARGS, .ml_doc = "Parse source code, creating a syntax tree", }, { .ml_name = "set_language", .ml_meth = (PyCFunction)parser_set_language, .ml_flags = METH_O, .ml_doc = "Parse source code, creating a syntax tree", }, {NULL}, }; static PyTypeObject parser_type = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = "tree_sitter.Parser", .tp_doc = "A Parser", .tp_basicsize = sizeof(Parser), .tp_itemsize = 0, .tp_flags = Py_TPFLAGS_DEFAULT, .tp_new = parser_new, .tp_dealloc = (destructor)parser_dealloc, .tp_methods = parser_methods, }; // Module static struct PyModuleDef module_definition = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "tree_sitter", .m_doc = NULL, .m_size = -1, }; PyMODINIT_FUNC PyInit_tree_sitter_binding(void) { PyObject *module = PyModule_Create(&module_definition); if (module == NULL) return NULL; if (PyType_Ready(&parser_type) < 0) return NULL; Py_INCREF(&parser_type); PyModule_AddObject(module, "Parser", (PyObject *)&parser_type); if (PyType_Ready(&tree_type) < 0) return NULL; Py_INCREF(&tree_type); PyModule_AddObject(module, "Tree", (PyObject *)&tree_type); if (PyType_Ready(&node_type) < 0) return NULL; Py_INCREF(&node_type); PyModule_AddObject(module, "Node", (PyObject *)&node_type); return module; }