tree-sitter: vendor tree-sitter runtime

tree-sitter/tree-sitter commit 7685b7861ca475664b6ef57e14d1da9acf741275

Included files are:
lib/include/tree-sitter/*.h
lib/src/*.[ch]
LICENSE
This commit is contained in:
Björn Linse 2019-06-06 10:34:01 +02:00
parent 0d9a3c86a1
commit 3bddf05023
32 changed files with 8070 additions and 0 deletions

View File

@ -25,3 +25,6 @@ coverage:
changes: no
comment: off
ignore:
- "src/tree_sitter"

21
src/tree_sitter/LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2018 Max Brunsfeld
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

81
src/tree_sitter/alloc.h Normal file
View File

@ -0,0 +1,81 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#if defined(TREE_SITTER_TEST)
void *ts_record_malloc(size_t);
void *ts_record_calloc(size_t, size_t);
void *ts_record_realloc(void *, size_t);
void ts_record_free(void *);
bool ts_toggle_allocation_recording(bool);
static inline void *ts_malloc(size_t size) {
return ts_record_malloc(size);
}
static inline void *ts_calloc(size_t count, size_t size) {
return ts_record_calloc(count, size);
}
static inline void *ts_realloc(void *buffer, size_t size) {
return ts_record_realloc(buffer, size);
}
static inline void ts_free(void *buffer) {
ts_record_free(buffer);
}
#else
#include <stdlib.h>
static inline bool ts_toggle_allocation_recording(bool value) {
return false;
}
static inline void *ts_malloc(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %lu bytes", size);
exit(1);
}
return result;
}
static inline void *ts_calloc(size_t count, size_t size) {
void *result = calloc(count, size);
if (count > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %lu bytes", count * size);
exit(1);
}
return result;
}
static inline void *ts_realloc(void *buffer, size_t size) {
void *result = realloc(buffer, size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to reallocate %lu bytes", size);
exit(1);
}
return result;
}
static inline void ts_free(void *buffer) {
free(buffer);
}
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

660
src/tree_sitter/api.h Normal file
View File

@ -0,0 +1,660 @@
#ifndef TREE_SITTER_API_H_
#define TREE_SITTER_API_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
/****************************/
/* Section - ABI Versioning */
/****************************/
#define TREE_SITTER_LANGUAGE_VERSION 11
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
/*******************/
/* Section - Types */
/*******************/
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
typedef enum {
TSInputEncodingUTF8,
TSInputEncodingUTF16,
} TSInputEncoding;
typedef enum {
TSSymbolTypeRegular,
TSSymbolTypeAnonymous,
TSSymbolTypeAuxiliary,
} TSSymbolType;
typedef struct {
uint32_t row;
uint32_t column;
} TSPoint;
typedef struct {
TSPoint start_point;
TSPoint end_point;
uint32_t start_byte;
uint32_t end_byte;
} TSRange;
typedef struct {
void *payload;
const char *(*read)(void *payload, uint32_t byte_index, TSPoint position, uint32_t *bytes_read);
TSInputEncoding encoding;
} TSInput;
typedef enum {
TSLogTypeParse,
TSLogTypeLex,
} TSLogType;
typedef struct {
void *payload;
void (*log)(void *payload, TSLogType, const char *);
} TSLogger;
typedef struct {
uint32_t start_byte;
uint32_t old_end_byte;
uint32_t new_end_byte;
TSPoint start_point;
TSPoint old_end_point;
TSPoint new_end_point;
} TSInputEdit;
typedef struct {
uint32_t context[4];
const void *id;
const TSTree *tree;
} TSNode;
typedef struct {
const void *tree;
const void *id;
uint32_t context[2];
} TSTreeCursor;
/********************/
/* Section - Parser */
/********************/
/**
* Create a new parser.
*/
TSParser *ts_parser_new(void);
/**
* Delete the parser, freeing all of the memory that it used.
*/
void ts_parser_delete(TSParser *parser);
/**
* Set the language that the parser should use for parsing.
*
* Returns a boolean indicating whether or not the language was successfully
* assigned. True means assignment succeeded. False means there was a version
* mismatch: the language was generated with an incompatible version of the
* Tree-sitter CLI. Check the language's version using `ts_language_version`
* and compare it to this library's `TREE_SITTER_LANGUAGE_VERSION` and
* `TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION` constants.
*/
bool ts_parser_set_language(TSParser *self, const TSLanguage *language);
/**
* Get the parser's current language.
*/
const TSLanguage *ts_parser_language(const TSParser *self);
/**
* Set the spans of text that the parser should include when parsing.
*
* By default, the parser will always include entire documents. This function
* allows you to parse only a *portion* of a document but still return a syntax
* tree whose ranges match up with the document as a whole. You can also pass
* multiple disjoint ranges.
*
* The second and third parameters specify the location and length of an array
* of ranges. The parser does *not* take ownership of these ranges; it copies
* the data, so it doesn't matter how these ranges are allocated.
*/
void ts_parser_set_included_ranges(
TSParser *self,
const TSRange *ranges,
uint32_t length
);
/**
* Get the ranges of text that the parser will include when parsing.
*
* The returned pointer is owned by the parser. The caller should not free it
* or write to it. The length of the array will be written to the given
* `length` pointer.
*/
const TSRange *ts_parser_included_ranges(
const TSParser *self,
uint32_t *length
);
/**
* Use the parser to parse some source code and create a syntax tree.
*
* If you are parsing this document for the first time, pass `NULL` for the
* `old_tree` parameter. Otherwise, if you have already parsed an earlier
* version of this document and the document has since been edited, pass the
* previous syntax tree so that the unchanged parts of it can be reused.
* This will save time and memory. For this to work correctly, you must have
* already edited the old syntax tree using the `ts_tree_edit` function in a
* way that exactly matches the source code changes.
*
* The `TSInput` parameter lets you specify how to read the text. It has the
* following three fields:
* 1. `read`: A function to retrieve a chunk of text at a given byte offset
* and (row, column) position. The function should return a pointer to the
* text and write its length to the the `bytes_read` pointer. The parser
* does not take ownership of this buffer; it just borrows it until it has
* finished reading it. The function should write a zero value to the
* `bytes_read` pointer to indicate the end of the document.
* 2. `payload`: An arbitrary pointer that will be passed to each invocation
* of the `read` function.
* 3. `encoding`: An indication of how the text is encoded. Either
* `TSInputEncodingUTF8` or `TSInputEncodingUTF16`.
*
* This function returns a syntax tree on success, and `NULL` on failure. There
* are three possible reasons for failure:
* 1. The parser does not have a language assigned. Check for this using the
`ts_parser_language` function.
* 2. Parsing was cancelled due to a timeout that was set by an earlier call to
* the `ts_parser_set_timeout_micros` function. You can resume parsing from
* where the parser left out by calling `ts_parser_parse` again with the
* same arguments. Or you can start parsing from scratch by first calling
* `ts_parser_reset`.
* 3. Parsing was cancelled using a cancellation flag that was set by an
* earlier call to `ts_parser_set_cancellation_flag`. You can resume parsing
* from where the parser left out by calling `ts_parser_parse` again with
* the same arguments.
*/
TSTree *ts_parser_parse(
TSParser *self,
const TSTree *old_tree,
TSInput input
);
/**
* Use the parser to parse some source code stored in one contiguous buffer.
* The first two parameters are the same as in the `ts_parser_parse` function
* above. The second two parameters indicate the location of the buffer and its
* length in bytes.
*/
TSTree *ts_parser_parse_string(
TSParser *self,
const TSTree *old_tree,
const char *string,
uint32_t length
);
/**
* Use the parser to parse some source code stored in one contiguous buffer with
* a given encoding. The first four parameters work the same as in the
* `ts_parser_parse_string` method above. The final parameter indicates whether
* the text is encoded as UTF8 or UTF16.
*/
TSTree *ts_parser_parse_string_encoding(
TSParser *self,
const TSTree *old_tree,
const char *string,
uint32_t length,
TSInputEncoding encoding
);
/**
* Instruct the parser to start the next parse from the beginning.
*
* If the parser previously failed because of a timeout or a cancellation, then
* by default, it will resume where it left off on the next call to
* `ts_parser_parse` or other parsing functions. If you don't want to resume,
* and instead intend to use this parser to parse some other document, you must
* call this `ts_parser_reset` first.
*/
void ts_parser_reset(TSParser *self);
/**
* Set the maximum duration in microseconds that parsing should be allowed to
* take before halting. If parsing takes longer than this, it will halt early,
* returning NULL. See `ts_parser_parse` for more information.
*/
void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout);
/**
* Get the duration in microseconds that parsing is allowed to take.
*/
uint64_t ts_parser_timeout_micros(const TSParser *self);
/**
* Set the parser's current cancellation flag pointer. If a non-null pointer is
* assigned, then the parser will periodically read from this pointer during
* parsing. If it reads a non-zero value, it will halt early, returning NULL.
* See `ts_parser_parse` for more information.
*/
void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag);
/**
* Get the parser's current cancellation flag pointer.
*/
const size_t *ts_parser_cancellation_flag(const TSParser *self);
/**
* Set the logger that a parser should use during parsing.
*
* The parser does not take ownership over the logger payload. If a logger was
* previously assigned, the caller is responsible for releasing any memory
* owned by the previous logger.
*/
void ts_parser_set_logger(TSParser *self, TSLogger logger);
/**
* Get the parser's current logger.
*/
TSLogger ts_parser_logger(const TSParser *self);
/**
* Set the file descriptor to which the parser should write debugging graphs
* during parsing. The graphs are formatted in the DOT language. You may want
* to pipe these graphs directly to a `dot(1)` process in order to generate
* SVG output. You can turn off this logging by passing a negative number.
*/
void ts_parser_print_dot_graphs(TSParser *self, int file);
/**
* Set whether or not the parser should halt immediately upon detecting an
* error. This will generally result in a syntax tree with an error at the
* root, and one or more partial syntax trees within the error. This behavior
* may not be supported long-term.
*/
void ts_parser_halt_on_error(TSParser *self, bool halt);
/******************/
/* Section - Tree */
/******************/
/**
* Create a shallow copy of the syntax tree. This is very fast.
*
* You need to copy a syntax tree in order to use it on more than one thread at
* a time, as syntax trees are not thread safe.
*/
TSTree *ts_tree_copy(const TSTree *self);
/**
* Delete the syntax tree, freeing all of the memory that it used.
*/
void ts_tree_delete(TSTree *self);
/**
* Get the root node of the syntax tree.
*/
TSNode ts_tree_root_node(const TSTree *self);
/**
* Get the language that was used to parse the syntax tree.
*/
const TSLanguage *ts_tree_language(const TSTree *);
/**
* Edit the syntax tree to keep it in sync with source code that has been
* edited.
*
* You must describe the edit both in terms of byte offsets and in terms of
* (row, column) coordinates.
*/
void ts_tree_edit(TSTree *self, const TSInputEdit *edit);
/**
* Compare a new syntax tree to a previous syntax tree representing the same
* document, returning an array of ranges whose syntactic structure has changed.
*
* For this to work correctly, the old syntax tree must have been edited such
* that its ranges match up to the new tree. Generally, you'll want to call
* this function right after calling one of the `ts_parser_parse` functions,
* passing in the new tree that was returned from `ts_parser_parse` and the old
* tree that was passed as a parameter.
*
* The returned array is allocated using `malloc` and the caller is responsible
* for freeing it using `free`. The length of the array will be written to the
* given `length` pointer.
*/
TSRange *ts_tree_get_changed_ranges(
const TSTree *self,
const TSTree *old_tree,
uint32_t *length
);
/**
* Write a DOT graph describing the syntax tree to the given file.
*/
void ts_tree_print_dot_graph(const TSTree *, FILE *);
/******************/
/* Section - Node */
/******************/
/**
* Get the node's type as a null-terminated string.
*/
const char *ts_node_type(TSNode);
/**
* Get the node's type as a numerical id.
*/
TSSymbol ts_node_symbol(TSNode);
/**
* Get the node's start byte.
*/
uint32_t ts_node_start_byte(TSNode);
/**
* Get the node's start position in terms of rows and columns.
*/
TSPoint ts_node_start_point(TSNode);
/**
* Get the node's end byte.
*/
uint32_t ts_node_end_byte(TSNode);
/**
* Get the node's end position in terms of rows and columns.
*/
TSPoint ts_node_end_point(TSNode);
/**
* Get an S-expression representing the node as a string.
*
* This string is allocated with `malloc` and the caller is responsible for
* freeing it using `free`.
*/
char *ts_node_string(TSNode);
/**
* Check if the node is null. Functions like `ts_node_child` and
* `ts_node_next_sibling` will return a null node to indicate that no such node
* was found.
*/
bool ts_node_is_null(TSNode);
/**
* Check if the node is *named*. Named nodes correspond to named rules in the
* grammar, whereas *anonymous* nodes correspond to string literals in the
* grammar.
*/
bool ts_node_is_named(TSNode);
/**
* Check if the node is *missing*. Missing nodes are inserted by the parser in
* order to recover from certain kinds of syntax errors.
*/
bool ts_node_is_missing(TSNode);
/**
* Check if the node is *missing*. Missing nodes are inserted by the parser in
* order to recover from certain kinds of syntax errors.
*/
bool ts_node_is_extra(TSNode);
/**
* Check if a syntax node has been edited.
*/
bool ts_node_has_changes(TSNode);
/**
* Check if the node is a syntax error or contains any syntax errors.
*/
bool ts_node_has_error(TSNode);
/**
* Get the node's immediate parent.
*/
TSNode ts_node_parent(TSNode);
/**
* Get the node's child at the given index, where zero represents the first
* child.
*/
TSNode ts_node_child(TSNode, uint32_t);
/**
* Get the node's number of children.
*/
uint32_t ts_node_child_count(TSNode);
/**
* Get the node's *named* child at the given index.
*
* See also `ts_node_is_named`.
*/
TSNode ts_node_named_child(TSNode, uint32_t);
/**
* Get the node's number of *named* children.
*
* See also `ts_node_is_named`.
*/
uint32_t ts_node_named_child_count(TSNode);
/**
* Get the node's child with the given field name.
*/
TSNode ts_node_child_by_field_name(
TSNode self,
const char *field_name,
uint32_t field_name_length
);
/**
* Get the node's child with the given numerical field id.
*
* You can convert a field name to an id using the
* `ts_language_field_id_for_name` function.
*/
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
/**
* Get the node's next / previous sibling.
*/
TSNode ts_node_next_sibling(TSNode);
TSNode ts_node_prev_sibling(TSNode);
/**
* Get the node's next / previous *named* sibling.
*/
TSNode ts_node_next_named_sibling(TSNode);
TSNode ts_node_prev_named_sibling(TSNode);
/**
* Get the node's first child that extends beyond the given byte offset.
*/
TSNode ts_node_first_child_for_byte(TSNode, uint32_t);
/**
* Get the node's first named child that extends beyond the given byte offset.
*/
TSNode ts_node_first_named_child_for_byte(TSNode, uint32_t);
/**
* Get the smallest node within this node that spans the given range of bytes
* or (row, column) positions.
*/
TSNode ts_node_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_descendant_for_point_range(TSNode, TSPoint, TSPoint);
/**
* Get the smallest named node within this node that spans the given range of
* bytes or (row, column) positions.
*/
TSNode ts_node_named_descendant_for_byte_range(TSNode, uint32_t, uint32_t);
TSNode ts_node_named_descendant_for_point_range(TSNode, TSPoint, TSPoint);
/**
* Edit the node to keep it in-sync with source code that has been edited.
*
* This function is only rarely needed. When you edit a syntax tree with the
* `ts_tree_edit` function, all of the nodes that you retrieve from the tree
* afterward will already reflect the edit. You only need to use `ts_node_edit`
* when you have a `TSNode` instance that you want to keep and continue to use
* after an edit.
*/
void ts_node_edit(TSNode *, const TSInputEdit *);
/**
* Check if two nodes are identical.
*/
bool ts_node_eq(TSNode, TSNode);
/************************/
/* Section - TreeCursor */
/************************/
/**
* Create a new tree cursor starting from the given node.
*
* A tree cursor allows you to walk a syntax tree more efficiently than is
* possible using the `TSNode` functions. It is a mutable object that is always
* on a certain syntax node, and can be moved imperatively to different nodes.
*/
TSTreeCursor ts_tree_cursor_new(TSNode);
/**
* Delete a tree cursor, freeing all of the memory that it used.
*/
void ts_tree_cursor_delete(TSTreeCursor *);
/**
* Re-initialize a tree cursor to start at a different ndoe.
*/
void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
/**
* Get the tree cursor's current node.
*/
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
/**
* Get the field name of the tree cursor's current node.
*
* This returns `NULL` if the current node doesn't have a field.
* See also `ts_node_child_by_field_name`.
*/
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
/**
* Get the field name of the tree cursor's current node.
*
* This returns zero if the current node doesn't have a field.
* See also `ts_node_child_by_field_id`, `ts_language_field_id_for_name`.
*/
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
/**
* Move the cursor to the parent of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there was no parent node (the cursor was already on the root node).
*/
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
/**
* Move the cursor to the next sibling of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there was no next sibling node.
*/
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
/**
* Move the cursor to the first schild of its current node.
*
* This returns `true` if the cursor successfully moved, and returns `false`
* if there were no children.
*/
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
/**
* Move the cursor to the first schild of its current node that extends beyond
* the given byte offset.
*
* This returns the index of the child node if one was found, and returns -1
* if no such child was found.
*/
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *);
/**********************/
/* Section - Language */
/**********************/
/**
* Get the number of distinct node types in the language.
*/
uint32_t ts_language_symbol_count(const TSLanguage *);
/**
* Get a node type string for the given numerical id.
*/
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
/**
* Get the numerical id for the given node type string.
*/
TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
/**
* Get the number of distinct field names in the language.
*/
uint32_t ts_language_field_count(const TSLanguage *);
/**
* Get the field name string for the given numerical id.
*/
const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
/**
* Get the numerical id for the given field name string.
*/
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
/**
* Check whether the given node type id belongs to named nodes, anonymous nodes,
* or a hidden nodes.
*
* See also `ts_node_is_named`. Hidden nodes are never returned from the API.
*/
TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
/**
* Get the ABI version number for this language. This version number is used
* to ensure that languages were generated by a compatible version of
* Tree-sitter.
*
* See also `ts_parser_set_language`.
*/
uint32_t ts_language_version(const TSLanguage *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_API_H_

142
src/tree_sitter/array.h Normal file
View File

@ -0,0 +1,142 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
#include "./alloc.h"
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
#define array_new() \
{ NULL, 0, 0 }
#define array_get(self, index) \
(assert((uint32_t)index < (self)->size), &(self)->contents[index])
#define array_front(self) array_get(self, 0)
#define array_back(self) array_get(self, (self)->size - 1)
#define array_clear(self) ((self)->size = 0)
#define array_reserve(self, new_capacity) \
array__reserve((VoidArray *)(self), array__elem_size(self), new_capacity)
#define array_erase(self, index) \
array__erase((VoidArray *)(self), array__elem_size(self), index)
#define array_delete(self) array__delete((VoidArray *)self)
#define array_push(self, element) \
(array__grow((VoidArray *)(self), 1, array__elem_size(self)), \
(self)->contents[(self)->size++] = (element))
#define array_grow_by(self, count) \
(array__grow((VoidArray *)(self), count, array__elem_size(self)), \
memset((self)->contents + (self)->size, 0, (count) * array__elem_size(self)), \
(self)->size += (count))
#define array_push_all(self, other) \
array_splice((self), (self)->size, 0, (other)->size, (other)->contents)
#define array_splice(self, index, old_count, new_count, new_contents) \
array__splice((VoidArray *)(self), array__elem_size(self), index, old_count, \
new_count, new_contents)
#define array_insert(self, index, element) \
array__splice((VoidArray *)(self), array__elem_size(self), index, 0, 1, &element)
#define array_pop(self) ((self)->contents[--(self)->size])
#define array_assign(self, other) \
array__assign((VoidArray *)(self), (const VoidArray *)(other), array__elem_size(self))
// Private
typedef Array(void) VoidArray;
#define array__elem_size(self) sizeof(*(self)->contents)
static inline void array__delete(VoidArray *self) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
static inline void array__erase(VoidArray *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
static inline void array__reserve(VoidArray *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_calloc(new_capacity, element_size);
}
self->capacity = new_capacity;
}
}
static inline void array__assign(VoidArray *self, const VoidArray *other, size_t element_size) {
array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
static inline void array__grow(VoidArray *self, size_t count, size_t element_size) {
size_t new_size = self->size + count;
if (new_size > self->capacity) {
size_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
array__reserve(self, element_size, new_capacity);
}
}
static inline void array__splice(VoidArray *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end)
memmove(contents + new_end * element_size, contents + old_end * element_size,
(self->size - old_end) * element_size);
if (new_count > 0)
memcpy((contents + index * element_size), elements,
new_count * element_size);
self->size += new_count - old_count;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

42
src/tree_sitter/atomic.h Normal file
View File

@ -0,0 +1,42 @@
#ifndef TREE_SITTER_ATOMIC_H_
#define TREE_SITTER_ATOMIC_H_
#include <stdint.h>
#ifdef _WIN32
#include <windows.h>
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement(p);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return InterlockedDecrement(p);
}
#else
static inline size_t atomic_load(const volatile size_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_load_n(p, __ATOMIC_RELAXED);
#else
return __sync_fetch_and_add((volatile size_t *)p, 0);
#endif
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return __sync_add_and_fetch(p, 1u);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return __sync_sub_and_fetch(p, 1u);
}
#endif
#endif // TREE_SITTER_ATOMIC_H_

141
src/tree_sitter/clock.h Normal file
View File

@ -0,0 +1,141 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_
#include <stdint.h>
typedef uint64_t TSDuration;
#ifdef _WIN32
// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.
#include <windows.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return micros * (uint64_t)frequency.QuadPart / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return self * 1000000 / (uint64_t)frequency.QuadPart;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return (uint64_t)result.QuadPart;
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#elif defined(CLOCK_MONOTONIC) && !defined(__APPLE__)
// POSIX with monotonic clock support (Linux)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.
#include <time.h>
typedef struct timespec TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self;
}
static inline TSClock clock_now(void) {
TSClock result;
clock_gettime(CLOCK_MONOTONIC, &result);
return result;
}
static inline TSClock clock_null(void) {
return (TSClock) {0, 0};
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
return result;
}
static inline bool clock_is_null(TSClock self) {
return !self.tv_sec;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
if (self.tv_sec > other.tv_sec) return true;
if (self.tv_sec < other.tv_sec) return false;
return self.tv_nsec > other.tv_nsec;
}
#else
// macOS or POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.
#include <time.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
return (uint64_t)clock();
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#endif
#endif // TREE_SITTER_CLOCK_H_

View File

@ -0,0 +1,11 @@
#ifndef TREE_SITTER_ERROR_COSTS_H_
#define TREE_SITTER_ERROR_COSTS_H_
#define ERROR_STATE 0
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1
#endif

View File

@ -0,0 +1,482 @@
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include <assert.h>
// #define DEBUG_GET_CHANGED_RANGES
static void ts_range_array_add(TSRangeArray *self, Length start, Length end) {
if (self->size > 0) {
TSRange *last_range = array_back(self);
if (start.bytes <= last_range->end_byte) {
last_range->end_byte = end.bytes;
last_range->end_point = end.extent;
return;
}
}
if (start.bytes < end.bytes) {
TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
array_push(self, range);
}
}
bool ts_range_array_intersects(const TSRangeArray *self, unsigned start_index,
uint32_t start_byte, uint32_t end_byte) {
for (unsigned i = start_index; i < self->size; i++) {
TSRange *range = &self->contents[i];
if (range->end_byte > start_byte) {
if (range->start_byte >= end_byte) break;
return true;
}
}
return false;
}
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
) {
unsigned new_index = 0;
unsigned old_index = 0;
Length current_position = length_zero();
bool in_old_range = false;
bool in_new_range = false;
while (old_index < old_range_count || new_index < new_range_count) {
const TSRange *old_range = &old_ranges[old_index];
const TSRange *new_range = &new_ranges[new_index];
Length next_old_position;
if (in_old_range) {
next_old_position = (Length) {old_range->end_byte, old_range->end_point};
} else if (old_index < old_range_count) {
next_old_position = (Length) {old_range->start_byte, old_range->start_point};
} else {
next_old_position = LENGTH_MAX;
}
Length next_new_position;
if (in_new_range) {
next_new_position = (Length) {new_range->end_byte, new_range->end_point};
} else if (new_index < new_range_count) {
next_new_position = (Length) {new_range->start_byte, new_range->start_point};
} else {
next_new_position = LENGTH_MAX;
}
if (next_old_position.bytes < next_new_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_old_position);
}
if (in_old_range) old_index++;
current_position = next_old_position;
in_old_range = !in_old_range;
} else if (next_new_position.bytes < next_old_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_new_range) new_index++;
current_position = next_new_position;
in_new_range = !in_new_range;
} else {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_old_range) old_index++;
if (in_new_range) new_index++;
in_old_range = !in_old_range;
in_new_range = !in_new_range;
current_position = next_new_position;
}
}
}
typedef struct {
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
} Iterator;
static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLanguage *language) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry){
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
};
}
static bool iterator_done(Iterator *self) {
return self->cursor.stack.size == 0;
}
static Length iterator_start_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
}
}
static Length iterator_end_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
if (self->in_padding) {
return result;
} else {
return length_add(result, ts_subtree_size(*entry.subtree));
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree)) return true;
if (self->cursor.stack.size > 1) {
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent.ptr->production_id
);
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
}
return false;
}
static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
TSSymbol *alias_symbol, uint32_t *start_byte) {
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding) {
if (i == 0) return;
i--;
}
for (; i + 1 > 0; i--) {
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (i > 0) {
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent->ptr->production_id
);
if (alias_sequence) {
*alias_symbol = alias_sequence[entry.structural_child_index];
}
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
*tree = *entry.subtree;
*start_byte = entry.position.bytes;
break;
}
}
}
static void iterator_ascend(Iterator *self) {
if (iterator_done(self)) return;
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
if (self->in_padding) return false;
bool did_descend;
do {
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &entry.subtree->ptr->children[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
if (child_right.bytes > goal_position) {
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (child_left.bytes > goal_position) {
self->in_padding = true;
} else {
self->visible_depth++;
}
return true;
}
did_descend = true;
break;
}
position = child_right;
if (!ts_subtree_extra(*child)) structural_child_index++;
}
} while (did_descend);
return false;
}
static void iterator_advance(Iterator *self) {
if (self->in_padding) {
self->in_padding = false;
if (iterator_tree_is_visible(self)) {
self->visible_depth++;
} else {
iterator_descend(self, 0);
}
return;
}
for (;;) {
if (iterator_tree_is_visible(self)) self->visible_depth--;
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
if (ts_subtree_child_count(*parent) > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &parent->ptr->children[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (ts_subtree_padding(*next_child).bytes > 0) {
self->in_padding = true;
} else {
self->visible_depth++;
}
} else {
iterator_descend(self, 0);
}
break;
}
}
}
typedef enum {
IteratorDiffers,
IteratorMayDiffer,
IteratorMatches,
} IteratorComparison;
static IteratorComparison iterator_compare(const Iterator *old_iter, const Iterator *new_iter) {
Subtree old_tree = NULL_SUBTREE;
Subtree new_tree = NULL_SUBTREE;
uint32_t old_start = 0;
uint32_t new_start = 0;
TSSymbol old_alias_symbol = 0;
TSSymbol new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
if (
old_alias_symbol == new_alias_symbol &&
ts_subtree_symbol(old_tree) == ts_subtree_symbol(new_tree)
) {
if (old_start == new_start &&
!ts_subtree_has_changes(old_tree) &&
ts_subtree_symbol(old_tree) != ts_builtin_sym_error &&
ts_subtree_size(old_tree).bytes == ts_subtree_size(new_tree).bytes &&
ts_subtree_parse_state(old_tree) != TS_TREE_STATE_NONE &&
ts_subtree_parse_state(new_tree) != TS_TREE_STATE_NONE &&
(ts_subtree_parse_state(old_tree) == ERROR_STATE) ==
(ts_subtree_parse_state(new_tree) == ERROR_STATE)) {
return IteratorMatches;
} else {
return IteratorMayDiffer;
}
}
return IteratorDiffers;
}
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
self->visible_depth,
start.row + 1, start.column,
end.row + 1, end.column
);
}
#endif
unsigned ts_subtree_get_changed_ranges(const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges) {
TSRangeArray results = array_new();
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
unsigned included_range_difference_index = 0;
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
if (position.bytes < next_position.bytes) {
ts_range_array_add(&results, position, next_position);
position = next_position;
} else if (position.bytes > next_position.bytes) {
ts_range_array_add(&results, next_position, position);
next_position = position;
}
do {
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row + 1, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
puts("");
#endif
// Compare the old and new subtrees.
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
// Even if the two subtrees appear to be identical, they could differ
// internally if they contain a range of text that was previously
// excluded from the parse, and is now included, or vice-versa.
if (comparison == IteratorMatches && ts_range_array_intersects(
included_range_differences,
included_range_difference_index,
position.bytes,
iterator_end_position(&old_iter).bytes
)) {
comparison = IteratorMayDiffer;
}
bool is_changed = false;
switch (comparison) {
// If the subtrees are definitely identical, move to the end
// of both subtrees.
case IteratorMatches:
next_position = iterator_end_position(&old_iter);
break;
// If the subtrees might differ internally, descend into both
// subtrees, finding the first child that spans the current position.
case IteratorMayDiffer:
if (iterator_descend(&old_iter, position.bytes)) {
if (!iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&old_iter);
}
} else if (iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&new_iter);
} else {
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
}
break;
// If the subtrees are different, record a change and then move
// to the end of both subtrees.
case IteratorDiffers:
is_changed = true;
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
break;
}
// Ensure that both iterators are caught up to the current position.
while (
!iterator_done(&old_iter) &&
iterator_end_position(&old_iter).bytes <= next_position.bytes
) iterator_advance(&old_iter);
while (
!iterator_done(&new_iter) &&
iterator_end_position(&new_iter).bytes <= next_position.bytes
) iterator_advance(&new_iter);
// Ensure that both iterators are at the same depth in the tree.
while (old_iter.visible_depth > new_iter.visible_depth) {
iterator_ascend(&old_iter);
}
while (new_iter.visible_depth > old_iter.visible_depth) {
iterator_ascend(&new_iter);
}
if (is_changed) {
#ifdef DEBUG_GET_CHANGED_RANGES
printf(
" change: [[%u, %u] - [%u, %u]]\n",
position.extent.row + 1, position.extent.column,
next_position.extent.row + 1, next_position.extent.column
);
#endif
ts_range_array_add(&results, position, next_position);
}
position = next_position;
// Keep track of the current position in the included range differences
// array in order to avoid scanning the entire array on each iteration.
while (included_range_difference_index < included_range_differences->size) {
const TSRange *range = &included_range_differences->contents[
included_range_difference_index
];
if (range->end_byte <= position.bytes) {
included_range_difference_index++;
} else {
break;
}
}
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
Length old_size = ts_subtree_total_size(*old_tree);
Length new_size = ts_subtree_total_size(*new_tree);
if (old_size.bytes < new_size.bytes) {
ts_range_array_add(&results, old_size, new_size);
} else if (new_size.bytes < old_size.bytes) {
ts_range_array_add(&results, new_size, old_size);
}
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}

View File

@ -0,0 +1,36 @@
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
#define TREE_SITTER_GET_CHANGED_RANGES_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./tree_cursor.h"
#include "./subtree.h"
typedef Array(TSRange) TSRangeArray;
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
);
bool ts_range_array_intersects(
const TSRangeArray *self, unsigned start_index,
uint32_t start_byte, uint32_t end_byte
);
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_

107
src/tree_sitter/language.c Normal file
View File

@ -0,0 +1,107 @@
#include "./language.h"
#include "./subtree.h"
#include "./error_costs.h"
#include <string.h>
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
TSSymbol symbol, TableEntry *result) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->action_count = 0;
result->is_reusable = false;
result->actions = NULL;
} else {
assert(symbol < self->token_count);
uint32_t action_index = ts_language_lookup(self, state, symbol);
const TSParseActionEntry *entry = &self->parse_actions[action_index];
result->action_count = entry->count;
result->is_reusable = entry->reusable;
result->actions = (const TSParseAction *)(entry + 1);
}
}
uint32_t ts_language_symbol_count(const TSLanguage *language) {
return language->symbol_count + language->alias_count;
}
uint32_t ts_language_version(const TSLanguage *language) {
return language->version;
}
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language, TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
return (TSSymbolMetadata){.visible = true, .named = true};
} else if (symbol == ts_builtin_sym_error_repeat) {
return (TSSymbolMetadata){.visible = false, .named = false};
} else {
return language->symbol_metadata[symbol];
}
}
const char *ts_language_symbol_name(const TSLanguage *language, TSSymbol symbol) {
if (symbol == ts_builtin_sym_error) {
return "ERROR";
} else if (symbol == ts_builtin_sym_error_repeat) {
return "_ERROR";
} else {
return language->symbol_names[symbol];
}
}
TSSymbol ts_language_symbol_for_name(const TSLanguage *self, const char *name) {
if (!strcmp(name, "ERROR")) return ts_builtin_sym_error;
uint32_t count = ts_language_symbol_count(self);
for (TSSymbol i = 0; i < count; i++) {
if (!strcmp(self->symbol_names[i], name)) {
return i;
}
}
return 0;
}
TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
if (metadata.named) {
return TSSymbolTypeRegular;
} else if (metadata.visible) {
return TSSymbolTypeAnonymous;
} else {
return TSSymbolTypeAuxiliary;
}
}
uint32_t ts_language_field_count(const TSLanguage *self) {
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
return self->field_count;
} else {
return 0;
}
}
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
uint32_t count = ts_language_field_count(self);
if (count) {
return self->field_names[id];
} else {
return NULL;
}
}
TSFieldId ts_language_field_id_for_name(
const TSLanguage *self,
const char *name,
uint32_t name_length
) {
uint32_t count = ts_language_field_count(self);
for (TSSymbol i = 1; i < count + 1; i++) {
switch (strncmp(name, self->field_names[i], name_length)) {
case 0:
return i;
case -1:
return 0;
default:
break;
}
}
return 0;
}

138
src/tree_sitter/language.h Normal file
View File

@ -0,0 +1,138 @@
#ifndef TREE_SITTER_LANGUAGE_H_
#define TREE_SITTER_LANGUAGE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./subtree.h"
#include "tree_sitter/parser.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
#define TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES 11
typedef struct {
const TSParseAction *actions;
uint32_t action_count;
bool is_reusable;
} TableEntry;
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
return 0 < symbol && symbol < self->external_token_count + 1;
}
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
uint32_t *count) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
*count = entry.action_count;
return entry.actions;
}
static inline bool ts_language_has_actions(const TSLanguage *self,
TSStateId state,
TSSymbol symbol) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
return entry.action_count > 0;
}
static inline bool ts_language_has_reduce_action(const TSLanguage *self,
TSStateId state,
TSSymbol symbol) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
}
static inline uint16_t ts_language_lookup(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
if (
self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_SMALL_STATES &&
state >= self->large_state_count
) {
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
const uint16_t *data = &self->small_parse_table[index];
uint16_t section_count = *(data++);
for (unsigned i = 0; i < section_count; i++) {
uint16_t section_value = *(data++);
uint16_t symbol_count = *(data++);
for (unsigned i = 0; i < symbol_count; i++) {
if (*(data++) == symbol) return section_value;
}
}
return 0;
} else {
return self->parse_table[state * self->symbol_count + symbol];
}
}
static inline TSStateId ts_language_next_state(const TSLanguage *self,
TSStateId state,
TSSymbol symbol) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
return 0;
} else if (symbol < self->token_count) {
uint32_t count;
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
if (count > 0) {
TSParseAction action = actions[count - 1];
if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
return action.params.state;
}
}
return 0;
} else {
return ts_language_lookup(self, state, symbol);
}
}
static inline const bool *
ts_language_enabled_external_tokens(const TSLanguage *self,
unsigned external_scanner_state) {
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_scanner.states + self->external_token_count * external_scanner_state;
}
}
static inline const TSSymbol *
ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
return production_id > 0 ?
self->alias_sequences + production_id * self->max_alias_sequence_length :
NULL;
}
static inline void ts_language_field_map(
const TSLanguage *self,
uint32_t production_id,
const TSFieldMapEntry **start,
const TSFieldMapEntry **end
) {
if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
*start = NULL;
*end = NULL;
return;
}
TSFieldMapSlice slice = self->field_map_slices[production_id];
*start = &self->field_map_entries[slice.index];
*end = &self->field_map_entries[slice.index] + slice.length;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LANGUAGE_H_

44
src/tree_sitter/length.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef TREE_SITTER_LENGTH_H_
#define TREE_SITTER_LENGTH_H_
#include <stdlib.h>
#include <stdbool.h>
#include "./point.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t bytes;
TSPoint extent;
} Length;
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
static inline bool length_is_undefined(Length length) {
return length.bytes == 0 && length.extent.column != 0;
}
static inline Length length_min(Length len1, Length len2) {
return (len1.bytes < len2.bytes) ? len1 : len2;
}
static inline Length length_add(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
return result;
}
static inline Length length_sub(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes - len2.bytes;
result.extent = point_sub(len1.extent, len2.extent);
return result;
}
static inline Length length_zero(void) {
Length result = {0, {0, 0}};
return result;
}
#endif

322
src/tree_sitter/lexer.c Normal file
View File

@ -0,0 +1,322 @@
#include <stdio.h>
#include "./lexer.h"
#include "./subtree.h"
#include "./length.h"
#include "./utf16.h"
#include "utf8proc.h"
#define LOG(...) \
if (self->logger.log) { \
snprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
}
#define LOG_CHARACTER(message, character) \
LOG( \
32 <= character && character < 127 ? \
message " character:'%c'" : \
message " character:%d", character \
)
static const char empty_chunk[3] = { 0, 0 };
static const int32_t BYTE_ORDER_MARK = 0xFEFF;
static void ts_lexer__get_chunk(Lexer *self) {
self->chunk_start = self->current_position.bytes;
self->chunk = self->input.read(
self->input.payload,
self->current_position.bytes,
self->current_position.extent,
&self->chunk_size
);
if (!self->chunk_size) self->chunk = empty_chunk;
}
typedef utf8proc_ssize_t (*DecodeFunction)(
const utf8proc_uint8_t *,
utf8proc_ssize_t,
utf8proc_int32_t *
);
static void ts_lexer__get_lookahead(Lexer *self) {
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
uint32_t size = self->chunk_size - position_in_chunk;
if (size == 0) {
self->lookahead_size = 1;
self->data.lookahead = '\0';
return;
}
DecodeFunction decode =
self->input.encoding == TSInputEncodingUTF8 ? utf8proc_iterate : utf16_iterate;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
// If this chunk ended in the middle of a multi-byte character,
// try again with a fresh chunk.
if (self->data.lookahead == -1 && size < 4) {
ts_lexer__get_chunk(self);
chunk = (const uint8_t *)self->chunk;
size = self->chunk_size;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
}
if (self->data.lookahead == -1) {
self->lookahead_size = 1;
}
}
static void ts_lexer__advance(TSLexer *payload, bool skip) {
Lexer *self = (Lexer *)payload;
if (self->chunk == empty_chunk)
return;
if (self->lookahead_size) {
self->current_position.bytes += self->lookahead_size;
if (self->data.lookahead == '\n') {
self->current_position.extent.row++;
self->current_position.extent.column = 0;
} else {
self->current_position.extent.column += self->lookahead_size;
}
}
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
if (self->current_position.bytes == current_range->end_byte) {
self->current_included_range_index++;
if (self->current_included_range_index == self->included_range_count) {
self->data.lookahead = '\0';
self->lookahead_size = 1;
return;
} else {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
}
}
if (skip) {
LOG_CHARACTER("skip", self->data.lookahead);
self->token_start_position = self->current_position;
} else {
LOG_CHARACTER("consume", self->data.lookahead);
}
if (self->current_position.bytes >= self->chunk_start + self->chunk_size) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);
}
static void ts_lexer__mark_end(TSLexer *payload) {
Lexer *self = (Lexer *)payload;
TSRange *current_included_range = &self->included_ranges[self->current_included_range_index];
if (self->current_included_range_index > 0 &&
self->current_position.bytes == current_included_range->start_byte) {
TSRange *previous_included_range = current_included_range - 1;
self->token_end_position = (Length) {
previous_included_range->end_byte,
previous_included_range->end_point,
};
} else {
self->token_end_position = self->current_position;
}
}
static uint32_t ts_lexer__get_column(TSLexer *payload) {
Lexer *self = (Lexer *)payload;
uint32_t goal_byte = self->current_position.bytes;
self->current_position.bytes -= self->current_position.extent.column;
self->current_position.extent.column = 0;
if (self->current_position.bytes < self->chunk_start) {
ts_lexer__get_chunk(self);
}
uint32_t result = 0;
while (self->current_position.bytes < goal_byte) {
ts_lexer__advance(payload, false);
result++;
}
return result;
}
static bool ts_lexer__is_at_included_range_start(TSLexer *payload) {
const Lexer *self = (const Lexer *)payload;
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
return self->current_position.bytes == current_range->start_byte;
}
// The lexer's methods are stored as a struct field so that generated
// parsers can call them without needing to be linked against this library.
void ts_lexer_init(Lexer *self) {
*self = (Lexer) {
.data = {
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column,
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
.lookahead = 0,
.result_symbol = 0,
},
.chunk = NULL,
.chunk_start = 0,
.current_position = {UINT32_MAX, {0, 0}},
.logger = {
.payload = NULL,
.log = NULL
},
.current_included_range_index = 0,
};
self->included_ranges = NULL;
ts_lexer_set_included_ranges(self, NULL, 0);
ts_lexer_reset(self, length_zero());
}
void ts_lexer_delete(Lexer *self) {
ts_free(self->included_ranges);
}
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
self->data.lookahead = 0;
self->lookahead_size = 0;
self->chunk = 0;
self->chunk_start = 0;
self->chunk_size = 0;
}
static void ts_lexer_goto(Lexer *self, Length position) {
bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (included_range->end_byte > position.bytes) {
if (included_range->start_byte > position.bytes) {
position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
};
}
self->current_included_range_index = i;
found_included_range = true;
break;
}
}
if (!found_included_range) {
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
position = (Length) {
.bytes = last_included_range->end_byte,
.extent = last_included_range->end_point,
};
self->chunk = empty_chunk;
self->chunk_start = position.bytes;
self->chunk_size = 2;
}
self->token_start_position = position;
self->token_end_position = LENGTH_UNDEFINED;
self->current_position = position;
if (self->chunk && (position.bytes < self->chunk_start ||
position.bytes >= self->chunk_start + self->chunk_size)) {
self->chunk = 0;
self->chunk_start = 0;
self->chunk_size = 0;
}
self->lookahead_size = 0;
self->data.lookahead = 0;
}
void ts_lexer_reset(Lexer *self, Length position) {
if (position.bytes != self->current_position.bytes) ts_lexer_goto(self, position);
}
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
if (!self->chunk) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
if (
self->current_position.bytes == 0 &&
self->data.lookahead == BYTE_ORDER_MARK
) ts_lexer__advance((TSLexer *)self, true);
}
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
if (length_is_undefined(self->token_end_position)) {
ts_lexer__mark_end(&self->data);
}
uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
// In order to determine that a byte sequence is invalid UTF8 or UTF16,
// the character decoding algorithm may have looked at the following byte.
// Therefore, the next byte *after* the current (invalid) character
// affects the interpretation of the current character.
if (self->data.lookahead == -1) {
current_lookahead_end_byte++;
}
if (current_lookahead_end_byte > *lookahead_end_byte) {
*lookahead_end_byte = current_lookahead_end_byte;
}
}
void ts_lexer_advance_to_end(Lexer *self) {
while (self->data.lookahead != 0) {
ts_lexer__advance((TSLexer *)self, false);
}
}
void ts_lexer_mark_end(Lexer *self) {
ts_lexer__mark_end(&self->data);
}
static const TSRange DEFAULT_RANGES[] = {
{
.start_point = {
.row = 0,
.column = 0,
},
.end_point = {
.row = UINT32_MAX,
.column = UINT32_MAX,
},
.start_byte = 0,
.end_byte = UINT32_MAX
}
};
void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count) {
if (!ranges) {
ranges = DEFAULT_RANGES;
count = 1;
}
size_t sz = count * sizeof(TSRange);
self->included_ranges = ts_realloc(self->included_ranges, sz);
memcpy(self->included_ranges, ranges, sz);
self->included_range_count = count;
ts_lexer_goto(self, self->current_position);
}
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
*count = self->included_range_count;
return self->included_ranges;
}
#undef LOG

48
src/tree_sitter/lexer.h Normal file
View File

@ -0,0 +1,48 @@
#ifndef TREE_SITTER_LEXER_H_
#define TREE_SITTER_LEXER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./length.h"
#include "./subtree.h"
#include "tree_sitter/api.h"
#include "tree_sitter/parser.h"
typedef struct {
TSLexer data;
Length current_position;
Length token_start_position;
Length token_end_position;
TSRange * included_ranges;
size_t included_range_count;
size_t current_included_range_index;
const char *chunk;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
TSInput input;
TSLogger logger;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;
void ts_lexer_init(Lexer *);
void ts_lexer_delete(Lexer *);
void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *);
void ts_lexer_finish(Lexer *, uint32_t *);
void ts_lexer_advance_to_end(Lexer *);
void ts_lexer_mark_end(Lexer *);
void ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LEXER_H_

20
src/tree_sitter/lib.c Normal file
View File

@ -0,0 +1,20 @@
// The Tree-sitter library can be built by compiling this one source file.
//
// The following directories must be added to the include path:
// - include
// - utf8proc
#define _POSIX_C_SOURCE 200112L
#define UTF8PROC_STATIC
#include "./get_changed_ranges.c"
#include "./language.c"
#include "./lexer.c"
#include "./node.c"
#include "./parser.c"
#include "./stack.c"
#include "./subtree.c"
#include "./tree_cursor.c"
#include "./tree.c"
#include "./utf16.c"
#include "utf8proc.c"

673
src/tree_sitter/node.c Normal file
View File

@ -0,0 +1,673 @@
#include <stdbool.h>
#include "./subtree.h"
#include "./tree.h"
#include "./language.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} NodeChildIterator;
// TSNode - constructors
TSNode ts_node_new(
const TSTree *tree,
const Subtree *subtree,
Length position,
TSSymbol alias
) {
return (TSNode) {
{position.bytes, position.extent.row, position.extent.column, alias},
subtree,
tree,
};
}
static inline TSNode ts_node__null(void) {
return ts_node_new(NULL, NULL, length_zero(), 0);
}
// TSNode - accessors
uint32_t ts_node_start_byte(TSNode self) {
return self.context[0];
}
TSPoint ts_node_start_point(TSNode self) {
return (TSPoint) {self.context[1], self.context[2]};
}
static inline uint32_t ts_node__alias(const TSNode *self) {
return self->context[3];
}
static inline Subtree ts_node__subtree(TSNode self) {
return *(const Subtree *)self.id;
}
// NodeChildIterator
static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
Subtree subtree = ts_node__subtree(*node);
if (ts_subtree_child_count(subtree) == 0) {
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->tree->language,
subtree.ptr->production_id
);
return (NodeChildIterator) {
.tree = node->tree,
.parent = subtree,
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
return self->child_index == self->parent.ptr->child_count;
}
static inline bool ts_node_child_iterator_next(
NodeChildIterator *self,
TSNode *result
) {
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child)) {
if (self->alias_sequence) {
alias_symbol = self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
if (self->child_index > 0) {
self->position = length_add(self->position, ts_subtree_padding(*child));
}
*result = ts_node_new(
self->tree,
child,
self->position,
alias_symbol
);
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
return true;
}
// TSNode - private
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
Subtree tree = ts_node__subtree(self);
if (include_anonymous) {
return ts_subtree_visible(tree) || ts_node__alias(&self);
} else {
TSSymbol alias = ts_node__alias(&self);
if (alias) {
return ts_language_symbol_metadata(self.tree->language, alias).named;
} else {
return ts_subtree_visible(tree) && ts_subtree_named(tree);
}
}
}
static inline uint32_t ts_node__relevant_child_count(
TSNode self,
bool include_anonymous
) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
if (include_anonymous) {
return tree.ptr->visible_child_count;
} else {
return tree.ptr->named_child_count;
}
} else {
return 0;
}
}
static inline TSNode ts_node__child(
TSNode self,
uint32_t child_index,
bool include_anonymous
) {
TSNode result = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, include_anonymous)) {
if (index == child_index) {
ts_tree_set_cached_parent(self.tree, &child, &self);
return child;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_index < grandchild_count) {
did_descend = true;
result = child;
child_index = grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return ts_node__null();
}
static bool ts_subtree_has_trailing_empty_descendant(
Subtree self,
Subtree other
) {
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
Subtree child = self.ptr->children[i];
if (ts_subtree_total_bytes(child) > 0) break;
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
return true;
}
}
return false;
}
static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
Subtree self_subtree = ts_node__subtree(self);
bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode earlier_node = ts_node__null();
bool earlier_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode earlier_child = ts_node__null();
bool earlier_child_is_relevant = false;
bool found_child_containing_target = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (child.id == self.id) break;
if (iterator.position.bytes > target_end_byte) {
found_child_containing_target = true;
break;
}
if (iterator.position.bytes == target_end_byte &&
(!self_is_empty ||
ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
found_child_containing_target = true;
break;
}
if (ts_node__is_relevant(child, include_anonymous)) {
earlier_child = child;
earlier_child_is_relevant = true;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
earlier_child = child;
earlier_child_is_relevant = false;
}
}
if (found_child_containing_target) {
if (!ts_node_is_null(earlier_child)) {
earlier_node = earlier_child;
earlier_node_is_relevant = earlier_child_is_relevant;
}
node = child;
} else if (earlier_child_is_relevant) {
return earlier_child;
} else if (!ts_node_is_null(earlier_child)) {
node = earlier_child;
} else if (earlier_node_is_relevant) {
return earlier_node;
} else {
node = earlier_node;
}
}
return ts_node__null();
}
static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode later_node = ts_node__null();
bool later_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode later_child = ts_node__null();
bool later_child_is_relevant = false;
TSNode child_containing_target = ts_node__null();
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes < target_end_byte) continue;
if (ts_node_start_byte(child) <= ts_node_start_byte(self)) {
if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
child_containing_target = child;
}
} else if (ts_node__is_relevant(child, include_anonymous)) {
later_child = child;
later_child_is_relevant = true;
break;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
later_child = child;
later_child_is_relevant = false;
break;
}
}
if (!ts_node_is_null(child_containing_target)) {
if (!ts_node_is_null(later_child)) {
later_node = later_child;
later_node_is_relevant = later_child_is_relevant;
}
node = child_containing_target;
} else if (later_child_is_relevant) {
return later_child;
} else if (!ts_node_is_null(later_child)) {
node = later_child;
} else if (later_node_is_relevant) {
return later_node;
} else {
node = later_node;
}
}
return ts_node__null();
}
static inline TSNode ts_node__first_child_for_byte(
TSNode self,
uint32_t goal,
bool include_anonymous
) {
TSNode node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node_end_byte(child) > goal) {
if (ts_node__is_relevant(child, include_anonymous)) {
return child;
} else if (ts_node_child_count(child) > 0) {
did_descend = true;
node = child;
break;
}
}
}
}
return ts_node__null();
}
static inline TSNode ts_node__descendant_for_byte_range(
TSNode self,
uint32_t range_start,
uint32_t range_end,
bool include_anonymous
) {
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
uint32_t node_end = iterator.position.bytes;
// The end of this node must extend far enough forward to touch
// the end of the range and exceed the start of the range.
if (node_end < range_end) continue;
if (node_end <= range_start) continue;
// The start of this node must extend far enough backward to
// touch the start of the range.
if (range_start < ts_node_start_byte(child)) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
static inline TSNode ts_node__descendant_for_point_range(
TSNode self,
TSPoint range_start,
TSPoint range_end,
bool include_anonymous
) {
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
TSPoint node_end = iterator.position.extent;
// The end of this node must extend far enough forward to touch
// the end of the range and exceed the start of the range.
if (point_lt(node_end, range_end)) continue;
if (point_lte(node_end, range_start)) continue;
// The start of this node must extend far enough backward to
// touch the start of the range.
if (point_lt(range_start, ts_node_start_point(child))) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
ts_tree_set_cached_parent(self.tree, &child, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
// TSNode - public
uint32_t ts_node_end_byte(TSNode self) {
return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
}
TSPoint ts_node_end_point(TSNode self) {
return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
}
TSSymbol ts_node_symbol(TSNode self) {
return ts_node__alias(&self)
? ts_node__alias(&self)
: ts_subtree_symbol(ts_node__subtree(self));
}
const char *ts_node_type(TSNode self) {
return ts_language_symbol_name(self.tree->language, ts_node_symbol(self));
}
char *ts_node_string(TSNode self) {
return ts_subtree_string(ts_node__subtree(self), self.tree->language, false);
}
bool ts_node_eq(TSNode self, TSNode other) {
return self.tree == other.tree && self.id == other.id;
}
bool ts_node_is_null(TSNode self) {
return self.id == 0;
}
bool ts_node_is_extra(TSNode self) {
return ts_subtree_extra(ts_node__subtree(self));
}
bool ts_node_is_named(TSNode self) {
TSSymbol alias = ts_node__alias(&self);
return alias
? ts_language_symbol_metadata(self.tree->language, alias).named
: ts_subtree_named(ts_node__subtree(self));
}
bool ts_node_is_missing(TSNode self) {
return ts_subtree_missing(ts_node__subtree(self));
}
bool ts_node_has_changes(TSNode self) {
return ts_subtree_has_changes(ts_node__subtree(self));
}
bool ts_node_has_error(TSNode self) {
return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
}
TSNode ts_node_parent(TSNode self) {
TSNode node = ts_tree_get_cached_parent(self.tree, &self);
if (node.id) return node;
node = ts_tree_root_node(self.tree);
uint32_t end_byte = ts_node_end_byte(self);
if (node.id == self.id) return ts_node__null();
TSNode last_visible_node = node;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (
ts_node_start_byte(child) > ts_node_start_byte(self) ||
child.id == self.id
) break;
if (iterator.position.bytes >= end_byte) {
node = child;
if (ts_node__is_relevant(child, true)) {
ts_tree_set_cached_parent(self.tree, &node, &last_visible_node);
last_visible_node = node;
}
did_descend = true;
break;
}
}
}
return last_visible_node;
}
TSNode ts_node_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, true);
}
TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, false);
}
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
recur:
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
if (field_map == field_map_end) return ts_node__null();
// The field mappings are sorted by their field id. Scan all
// the mappings to find the ones for the given field id.
while (field_map->field_id < field_id) {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
while (field_map_end[-1].field_id > field_id) {
field_map_end--;
if (field_map == field_map_end) return ts_node__null();
}
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (!ts_subtree_extra(ts_node__subtree(child))) {
uint32_t index = iterator.structural_child_index - 1;
if (index < field_map->child_index) continue;
// Hidden nodes' fields are "inherited" by their visible parent.
if (field_map->inherited) {
// If this is the *last* possible child node for this field,
// then perform a tail call to avoid recursion.
if (field_map + 1 == field_map_end) {
self = child;
goto recur;
}
// Otherwise, descend into this child, but if it doesn't contain
// the field, continue searching subsequent children.
else {
TSNode result = ts_node_child_by_field_id(child, field_id);
if (result.id) return result;
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
else if (ts_node__is_relevant(child, true)) {
return child;
}
// If the field refers to a hidden node, return its first visible
// child.
else {
return ts_node_child(child, 0);
}
}
}
return ts_node__null();
}
TSNode ts_node_child_by_field_name(
TSNode self,
const char *name,
uint32_t name_length
) {
TSFieldId field_id = ts_language_field_id_for_name(
self.tree->language,
name,
name_length
);
return ts_node_child_by_field_id(self, field_id);
}
uint32_t ts_node_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->visible_child_count;
} else {
return 0;
}
}
uint32_t ts_node_named_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->named_child_count;
} else {
return 0;
}
}
TSNode ts_node_next_sibling(TSNode self) {
return ts_node__next_sibling(self, true);
}
TSNode ts_node_next_named_sibling(TSNode self) {
return ts_node__next_sibling(self, false);
}
TSNode ts_node_prev_sibling(TSNode self) {
return ts_node__prev_sibling(self, true);
}
TSNode ts_node_prev_named_sibling(TSNode self) {
return ts_node__prev_sibling(self, false);
}
TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, true);
}
TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, false);
}
TSNode ts_node_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, false);
}
TSNode ts_node_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, false);
}
void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
uint32_t start_byte = ts_node_start_byte(*self);
TSPoint start_point = ts_node_start_point(*self);
if (start_byte >= edit->old_end_byte) {
start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
} else if (start_byte > edit->start_byte) {
start_byte = edit->new_end_byte;
start_point = edit->new_end_point;
}
self->context[0] = start_byte;
self->context[1] = start_point.row;
self->context[2] = start_point.column;
}

1887
src/tree_sitter/parser.c Normal file

File diff suppressed because it is too large Load Diff

220
src/tree_sitter/parser.h Normal file
View File

@ -0,0 +1,220 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef uint16_t TSStateId;
typedef struct {
bool visible : 1;
bool named : 1;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef struct {
union {
struct {
TSStateId state;
bool extra : 1;
bool repetition : 1;
};
struct {
TSSymbol symbol;
int16_t dynamic_precedence;
uint8_t child_count;
uint8_t production_id;
};
} params;
TSParseActionType type : 4;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable : 1;
};
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const uint16_t *parse_table;
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
const TSSymbol *alias_sequences;
uint16_t max_alias_sequence_length;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
uint32_t field_count;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const char **field_names;
uint32_t large_state_count;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = {.state = state_value}, \
} \
}
#define SHIFT_REPEAT(state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = { \
.state = state_value, \
.repetition = true \
}, \
} \
}
#define RECOVER() \
{ \
{ .type = TSParseActionTypeRecover } \
}
#define SHIFT_EXTRA() \
{ \
{ \
.type = TSParseActionTypeShift, \
.params = {.extra = true} \
} \
}
#define REDUCE(symbol_val, child_count_val, ...) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
} \
} \
}
#define ACCEPT_INPUT() \
{ \
{ .type = TSParseActionTypeAccept } \
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

53
src/tree_sitter/point.h Normal file
View File

@ -0,0 +1,53 @@
#ifndef TREE_SITTER_POINT_H_
#define TREE_SITTER_POINT_H_
#include "tree_sitter/api.h"
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
static inline TSPoint point__new(unsigned row, unsigned column) {
TSPoint result = {row, column};
return result;
}
static inline TSPoint point_add(TSPoint a, TSPoint b) {
if (b.row > 0)
return point__new(a.row + b.row, b.column);
else
return point__new(a.row, a.column + b.column);
}
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
if (a.row > b.row)
return point__new(a.row - b.row, a.column);
else
return point__new(0, a.column - b.column);
}
static inline bool point_lte(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}
static inline bool point_lt(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}
static inline bool point_eq(TSPoint a, TSPoint b) {
return a.row == b.row && a.column == b.column;
}
static inline TSPoint point_min(TSPoint a, TSPoint b) {
if (a.row < b.row || (a.row == b.row && a.column < b.column))
return a;
else
return b;
}
static inline TSPoint point_max(TSPoint a, TSPoint b) {
if (a.row > b.row || (a.row == b.row && a.column > b.column))
return a;
else
return b;
}
#endif

View File

@ -0,0 +1,34 @@
#ifndef TREE_SITTER_REDUCE_ACTION_H_
#define TREE_SITTER_REDUCE_ACTION_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short production_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;
static inline void ts_reduce_action_set_add(ReduceActionSet *self,
ReduceAction new_action) {
for (uint32_t i = 0; i < self->size; i++) {
ReduceAction action = self->contents[i];
if (action.symbol == new_action.symbol && action.count == new_action.count)
return;
}
array_push(self, new_action);
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_REDUCE_ACTION_H_

View File

@ -0,0 +1,88 @@
#include "./subtree.h"
typedef struct {
Subtree tree;
uint32_t child_index;
uint32_t byte_offset;
} StackEntry;
typedef struct {
Array(StackEntry) stack;
Subtree last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new(void) {
return (ReusableNode) {array_new(), NULL_SUBTREE};
}
static inline void reusable_node_clear(ReusableNode *self) {
array_clear(&self->stack);
self->last_external_token = NULL_SUBTREE;
}
static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
reusable_node_clear(self);
array_push(&self->stack, ((StackEntry) {
.tree = tree,
.child_index = 0,
.byte_offset = 0,
}));
}
static inline Subtree reusable_node_tree(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].tree
: NULL_SUBTREE;
}
static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].byte_offset
: UINT32_MAX;
}
static inline void reusable_node_delete(ReusableNode *self) {
array_delete(&self->stack);
}
static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
if (ts_subtree_has_external_tokens(last_entry.tree)) {
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
}
Subtree tree;
uint32_t next_index;
do {
StackEntry popped_entry = array_pop(&self->stack);
next_index = popped_entry.child_index + 1;
if (self->stack.size == 0) return;
tree = array_back(&self->stack)->tree;
} while (ts_subtree_child_count(tree) <= next_index);
array_push(&self->stack, ((StackEntry) {
.tree = tree.ptr->children[next_index],
.child_index = next_index,
.byte_offset = byte_offset,
}));
}
static inline bool reusable_node_descend(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
if (ts_subtree_child_count(last_entry.tree) > 0) {
array_push(&self->stack, ((StackEntry) {
.tree = last_entry.tree.ptr->children[0],
.child_index = 0,
.byte_offset = last_entry.byte_offset,
}));
return true;
} else {
return false;
}
}
static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
while (reusable_node_descend(self)) {}
reusable_node_advance(self);
}

846
src/tree_sitter/stack.c Normal file
View File

@ -0,0 +1,846 @@
#include "./alloc.h"
#include "./language.h"
#include "./subtree.h"
#include "./array.h"
#include "./stack.h"
#include "./length.h"
#include <assert.h>
#include <stdio.h>
#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
#define MAX_ITERATOR_COUNT 64
#ifdef _WIN32
#define inline __forceinline
#else
#define inline static inline __attribute__((always_inline))
#endif
typedef struct StackNode StackNode;
typedef struct {
StackNode *node;
Subtree subtree;
bool is_pending;
} StackLink;
struct StackNode {
TSStateId state;
Length position;
StackLink links[MAX_LINK_COUNT];
short unsigned int link_count;
uint32_t ref_count;
unsigned error_cost;
unsigned node_count;
int dynamic_precedence;
};
typedef struct {
StackNode *node;
SubtreeArray subtrees;
uint32_t subtree_count;
bool is_pending;
} StackIterator;
typedef struct {
void *payload;
StackIterateCallback callback;
} StackIterateSession;
typedef Array(StackNode *) StackNodeArray;
typedef enum {
StackStatusActive,
StackStatusPaused,
StackStatusHalted,
} StackStatus;
typedef struct {
StackNode *node;
Subtree last_external_token;
StackSummary *summary;
unsigned node_count_at_last_error;
TSSymbol lookahead_when_paused;
StackStatus status;
} StackHead;
struct Stack {
Array(StackHead) heads;
StackSliceArray slices;
Array(StackIterator) iterators;
StackNodeArray node_pool;
StackNode *base_node;
SubtreePool *subtree_pool;
};
typedef unsigned StackAction;
enum {
StackActionNone,
StackActionStop = 1,
StackActionPop = 2,
};
typedef StackAction (*StackCallback)(void *, const StackIterator *);
static void stack_node_retain(StackNode *self) {
if (!self)
return;
assert(self->ref_count > 0);
self->ref_count++;
assert(self->ref_count != 0);
}
static void stack_node_release(StackNode *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
recur:
assert(self->ref_count != 0);
self->ref_count--;
if (self->ref_count > 0) return;
StackNode *first_predecessor = NULL;
if (self->link_count > 0) {
for (unsigned i = self->link_count - 1; i > 0; i--) {
StackLink link = self->links[i];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
stack_node_release(link.node, pool, subtree_pool);
}
StackLink link = self->links[0];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
first_predecessor = self->links[0].node;
}
if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self);
} else {
ts_free(self);
}
if (first_predecessor) {
self = first_predecessor;
goto recur;
}
}
static StackNode *stack_node_new(StackNode *previous_node, Subtree subtree,
bool is_pending, TSStateId state, StackNodeArray *pool) {
StackNode *node = pool->size > 0 ?
array_pop(pool) :
ts_malloc(sizeof(StackNode));
*node = (StackNode){.ref_count = 1, .link_count = 0, .state = state};
if (previous_node) {
node->link_count = 1;
node->links[0] = (StackLink){
.node = previous_node,
.subtree = subtree,
.is_pending = is_pending,
};
node->position = previous_node->position;
node->error_cost = previous_node->error_cost;
node->dynamic_precedence = previous_node->dynamic_precedence;
node->node_count = previous_node->node_count;
if (subtree.ptr) {
node->error_cost += ts_subtree_error_cost(subtree);
node->position = length_add(node->position, ts_subtree_total_size(subtree));
node->node_count += ts_subtree_node_count(subtree);
node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
}
} else {
node->position = length_zero();
node->error_cost = 0;
}
return node;
}
static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
return
left.ptr == right.ptr ||
(left.ptr && right.ptr &&
ts_subtree_symbol(left) == ts_subtree_symbol(right) &&
((ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) ||
(ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
ts_subtree_extra(left) == ts_subtree_extra(right) &&
ts_subtree_external_scanner_state_eq(left, right))));
}
static void stack_node_add_link(StackNode *self, StackLink link, SubtreePool *subtree_pool) {
if (link.node == self) return;
for (int i = 0; i < self->link_count; i++) {
StackLink *existing_link = &self->links[i];
if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
// In general, we preserve ambiguities until they are removed from the stack
// during a pop operation where multiple paths lead to the same node. But in
// the special case where two links directly connect the same pair of nodes,
// we can safely remove the ambiguity ahead of time without changing behavior.
if (existing_link->node == link.node) {
if (
ts_subtree_dynamic_precedence(link.subtree) >
ts_subtree_dynamic_precedence(existing_link->subtree)
) {
ts_subtree_retain(link.subtree);
ts_subtree_release(subtree_pool, existing_link->subtree);
existing_link->subtree = link.subtree;
self->dynamic_precedence =
link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
}
return;
}
// If the previous nodes are mergeable, merge them recursively.
if (existing_link->node->state == link.node->state &&
existing_link->node->position.bytes == link.node->position.bytes) {
for (int j = 0; j < link.node->link_count; j++) {
stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
}
int32_t dynamic_precedence = link.node->dynamic_precedence;
if (link.subtree.ptr) {
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (dynamic_precedence > self->dynamic_precedence) {
self->dynamic_precedence = dynamic_precedence;
}
return;
}
}
}
if (self->link_count == MAX_LINK_COUNT) return;
stack_node_retain(link.node);
unsigned node_count = link.node->node_count;
int dynamic_precedence = link.node->dynamic_precedence;
self->links[self->link_count++] = link;
if (link.subtree.ptr) {
ts_subtree_retain(link.subtree);
node_count += ts_subtree_node_count(link.subtree);
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (node_count > self->node_count) self->node_count = node_count;
if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
}
static void stack_head_delete(StackHead *self, StackNodeArray *pool, SubtreePool *subtree_pool) {
if (self->node) {
if (self->last_external_token.ptr) {
ts_subtree_release(subtree_pool, self->last_external_token);
}
if (self->summary) {
array_delete(self->summary);
ts_free(self->summary);
}
stack_node_release(self->node, pool, subtree_pool);
}
}
static StackVersion ts_stack__add_version(Stack *self, StackVersion original_version,
StackNode *node) {
StackHead head = {
.node = node,
.node_count_at_last_error = self->heads.contents[original_version].node_count_at_last_error,
.last_external_token = self->heads.contents[original_version].last_external_token,
.status = StackStatusActive,
.lookahead_when_paused = 0,
};
array_push(&self->heads, head);
stack_node_retain(node);
if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
return (StackVersion)(self->heads.size - 1);
}
static void ts_stack__add_slice(Stack *self, StackVersion original_version,
StackNode *node, SubtreeArray *subtrees) {
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = self->slices.contents[i].version;
if (self->heads.contents[version].node == node) {
StackSlice slice = {*subtrees, version};
array_insert(&self->slices, i + 1, slice);
return;
}
}
StackVersion version = ts_stack__add_version(self, original_version, node);
StackSlice slice = { *subtrees, version };
array_push(&self->slices, slice);
}
inline StackSliceArray stack__iter(Stack *self, StackVersion version,
StackCallback callback, void *payload,
int goal_subtree_count) {
array_clear(&self->slices);
array_clear(&self->iterators);
StackHead *head = array_get(&self->heads, version);
StackIterator iterator = {
.node = head->node,
.subtrees = array_new(),
.subtree_count = 0,
.is_pending = true,
};
bool include_subtrees = false;
if (goal_subtree_count >= 0) {
include_subtrees = true;
array_reserve(&iterator.subtrees, goal_subtree_count);
}
array_push(&self->iterators, iterator);
while (self->iterators.size > 0) {
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
StackIterator *iterator = &self->iterators.contents[i];
StackNode *node = iterator->node;
StackAction action = callback(payload, iterator);
bool should_pop = action & StackActionPop;
bool should_stop = action & StackActionStop || node->link_count == 0;
if (should_pop) {
SubtreeArray subtrees = iterator->subtrees;
if (!should_stop)
ts_subtree_array_copy(subtrees, &subtrees);
ts_subtree_array_reverse(&subtrees);
ts_stack__add_slice(
self,
version,
node,
&subtrees
);
}
if (should_stop) {
if (!should_pop)
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
array_erase(&self->iterators, i);
i--, size--;
continue;
}
for (uint32_t j = 1; j <= node->link_count; j++) {
StackIterator *next_iterator;
StackLink link;
if (j == node->link_count) {
link = node->links[0];
next_iterator = &self->iterators.contents[i];
} else {
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
link = node->links[j];
StackIterator current_iterator = self->iterators.contents[i];
array_push(&self->iterators, current_iterator);
next_iterator = array_back(&self->iterators);
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
}
next_iterator->node = link.node;
if (link.subtree.ptr) {
if (include_subtrees) {
array_push(&next_iterator->subtrees, link.subtree);
ts_subtree_retain(link.subtree);
}
if (!ts_subtree_extra(link.subtree)) {
next_iterator->subtree_count++;
if (!link.is_pending) {
next_iterator->is_pending = false;
}
}
} else {
next_iterator->subtree_count++;
next_iterator->is_pending = false;
}
}
}
}
return self->slices;
}
Stack *ts_stack_new(SubtreePool *subtree_pool) {
Stack *self = ts_calloc(1, sizeof(Stack));
array_init(&self->heads);
array_init(&self->slices);
array_init(&self->iterators);
array_init(&self->node_pool);
array_reserve(&self->heads, 4);
array_reserve(&self->slices, 4);
array_reserve(&self->iterators, 4);
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
self->subtree_pool = subtree_pool;
self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
ts_stack_clear(self);
return self;
}
void ts_stack_delete(Stack *self) {
if (self->slices.contents)
array_delete(&self->slices);
if (self->iterators.contents)
array_delete(&self->iterators);
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
if (self->node_pool.contents) {
for (uint32_t i = 0; i < self->node_pool.size; i++)
ts_free(self->node_pool.contents[i]);
array_delete(&self->node_pool);
}
array_delete(&self->heads);
ts_free(self);
}
uint32_t ts_stack_version_count(const Stack *self) {
return self->heads.size;
}
TSStateId ts_stack_state(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->state;
}
Length ts_stack_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->last_external_token;
}
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
StackHead *head = array_get(&self->heads, version);
if (token.ptr) ts_subtree_retain(token);
if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
head->last_external_token = token;
}
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
unsigned result = head->node->error_cost;
if (
head->status == StackStatusPaused ||
(head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
result += ERROR_COST_PER_RECOVERY;
}
return result;
}
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
if (head->node->node_count < head->node_count_at_last_error) {
head->node_count_at_last_error = head->node->node_count;
}
return head->node->node_count - head->node_count_at_last_error;
}
void ts_stack_push(Stack *self, StackVersion version, Subtree subtree,
bool pending, TSStateId state) {
StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
head->node = new_node;
}
inline StackAction iterate_callback(void *payload, const StackIterator *iterator) {
StackIterateSession *session = payload;
session->callback(
session->payload,
iterator->node->state,
iterator->subtree_count
);
return StackActionNone;
}
void ts_stack_iterate(Stack *self, StackVersion version,
StackIterateCallback callback, void *payload) {
StackIterateSession session = {payload, callback};
stack__iter(self, version, iterate_callback, &session, -1);
}
inline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
unsigned *goal_subtree_count = payload;
if (iterator->subtree_count == *goal_subtree_count) {
return StackActionPop | StackActionStop;
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
return stack__iter(self, version, pop_count_callback, &count, count);
}
inline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
if (iterator->subtree_count >= 1) {
if (iterator->is_pending) {
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
if (pop.size > 0) {
ts_stack_renumber_version(self, pop.contents[0].version, version);
pop.contents[0].version = version;
}
return pop;
}
inline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
if (iterator->subtrees.size > 0) {
bool *found_error = payload;
if (!*found_error && ts_subtree_is_error(iterator->subtrees.contents[0])) {
*found_error = true;
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
for (unsigned i = 0; i < node->link_count; i++) {
if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
bool found_error = false;
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
if (pop.size > 0) {
assert(pop.size == 1);
ts_stack_renumber_version(self, pop.contents[0].version, version);
return pop.contents[0].subtrees;
}
break;
}
}
return (SubtreeArray){.size = 0};
}
inline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
}
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
return stack__iter(self, version, pop_all_callback, NULL, 0);
}
typedef struct {
StackSummary *summary;
unsigned max_depth;
} SummarizeStackSession;
inline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
SummarizeStackSession *session = payload;
TSStateId state = iterator->node->state;
unsigned depth = iterator->subtree_count;
if (depth > session->max_depth) return StackActionStop;
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
StackSummaryEntry entry = session->summary->contents[i];
if (entry.depth < depth) break;
if (entry.depth == depth && entry.state == state) return StackActionNone;
}
array_push(session->summary, ((StackSummaryEntry){
.position = iterator->node->position,
.depth = depth,
.state = state,
}));
return StackActionNone;
}
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
SummarizeStackSession session = {
.summary = ts_malloc(sizeof(StackSummary)),
.max_depth = max_depth
};
array_init(session.summary);
stack__iter(self, version, summarize_stack_callback, &session, -1);
self->heads.contents[version].summary = session.summary;
}
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->summary;
}
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->dynamic_precedence;
}
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
const StackHead *head = array_get(&self->heads, version);
const StackNode *node = head->node;
if (node->error_cost == 0) return true;
while (node) {
if (node->link_count > 0) {
Subtree subtree = node->links[0].subtree;
if (subtree.ptr) {
if (ts_subtree_total_bytes(subtree) > 0) {
return true;
} else if (
node->node_count > head->node_count_at_last_error &&
ts_subtree_error_cost(subtree) == 0
) {
node = node->links[0].node;
continue;
}
}
}
break;
}
return false;
}
void ts_stack_remove_version(Stack *self, StackVersion version) {
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
array_erase(&self->heads, version);
}
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
if (v1 == v2) return;
assert(v2 < v1);
assert((uint32_t)v1 < self->heads.size);
StackHead *source_head = &self->heads.contents[v1];
StackHead *target_head = &self->heads.contents[v2];
if (target_head->summary && !source_head->summary) {
source_head->summary = target_head->summary;
target_head->summary = NULL;
}
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
*target_head = *source_head;
array_erase(&self->heads, v1);
}
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
StackHead temporary_head = self->heads.contents[v1];
self->heads.contents[v1] = self->heads.contents[v2];
self->heads.contents[v2] = temporary_head;
}
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
assert(version < self->heads.size);
array_push(&self->heads, self->heads.contents[version]);
StackHead *head = array_back(&self->heads);
stack_node_retain(head->node);
if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
head->summary = NULL;
return self->heads.size - 1;
}
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
if (!ts_stack_can_merge(self, version1, version2)) return false;
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
}
if (head1->node->state == ERROR_STATE) {
head1->node_count_at_last_error = head1->node->node_count;
}
ts_stack_remove_version(self, version2);
return true;
}
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
StackHead *head1 = &self->heads.contents[version1];
StackHead *head2 = &self->heads.contents[version2];
return
head1->status == StackStatusActive &&
head2->status == StackStatusActive &&
head1->node->state == head2->node->state &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->error_cost == head2->node->error_cost &&
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->status = StackStatusHalted;
}
void ts_stack_pause(Stack *self, StackVersion version, TSSymbol lookahead) {
StackHead *head = array_get(&self->heads, version);
head->status = StackStatusPaused;
head->lookahead_when_paused = lookahead;
head->node_count_at_last_error = head->node->node_count;
}
bool ts_stack_is_active(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusActive;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusHalted;
}
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusPaused;
}
TSSymbol ts_stack_resume(Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
assert(head->status == StackStatusPaused);
TSSymbol result = head->lookahead_when_paused;
head->status = StackStatusActive;
head->lookahead_when_paused = 0;
return result;
}
void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(&self->heads.contents[i], &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
array_push(&self->heads, ((StackHead){
.node = self->base_node,
.last_external_token = NULL_SUBTREE,
.status = StackStatusActive,
.lookahead_when_paused = 0,
}));
}
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
array_reserve(&self->iterators, 32);
bool was_recording_allocations = ts_toggle_allocation_recording(false);
if (!f) f = stderr;
fprintf(f, "digraph stack {\n");
fprintf(f, "rankdir=\"RL\";\n");
fprintf(f, "edge [arrowhead=none]\n");
Array(StackNode *) visited_nodes = array_new();
array_clear(&self->iterators);
for (uint32_t i = 0; i < self->heads.size; i++) {
StackHead *head = &self->heads.contents[i];
if (head->status == StackStatusHalted) continue;
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
fprintf(f, "node_head_%u -> node_%p [", i, head->node);
if (head->status == StackStatusPaused) {
fprintf(f, "color=red ");
}
fprintf(f,
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
i,
ts_stack_node_count_since_error(self, i),
ts_stack_error_cost(self, i)
);
if (head->last_external_token.ptr) {
const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
const char *data = ts_external_scanner_state_data(state);
fprintf(f, "\nexternal_scanner_state:");
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
}
fprintf(f, "\"]\n");
array_push(&self->iterators, ((StackIterator){.node = head->node }));
}
bool all_iterators_done = false;
while (!all_iterators_done) {
all_iterators_done = true;
for (uint32_t i = 0; i < self->iterators.size; i++) {
StackIterator iterator = self->iterators.contents[i];
StackNode *node = iterator.node;
for (uint32_t j = 0; j < visited_nodes.size; j++) {
if (visited_nodes.contents[j] == node) {
node = NULL;
break;
}
}
if (!node) continue;
all_iterators_done = false;
fprintf(f, "node_%p [", node);
if (node->state == ERROR_STATE) {
fprintf(f, "label=\"?\"");
} else if (
node->link_count == 1 &&
node->links[0].subtree.ptr &&
ts_subtree_extra(node->links[0].subtree)
) {
fprintf(f, "shape=point margin=0 label=\"\"");
} else {
fprintf(f, "label=\"%d\"", node->state);
}
fprintf(
f,
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
node->position.extent.row + 1,
node->position.extent.column,
node->node_count,
node->error_cost,
node->dynamic_precedence
);
for (int j = 0; j < node->link_count; j++) {
StackLink link = node->links[j];
fprintf(f, "node_%p -> node_%p [", node, link.node);
if (link.is_pending) fprintf(f, "style=dashed ");
if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
if (!link.subtree.ptr) {
fprintf(f, "color=red");
} else {
fprintf(f, "label=\"");
bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
if (quoted) fprintf(f, "'");
const char *name = ts_language_symbol_name(language, ts_subtree_symbol(link.subtree));
for (const char *c = name; *c; c++) {
if (*c == '\"' || *c == '\\') fprintf(f, "\\");
fprintf(f, "%c", *c);
}
if (quoted) fprintf(f, "'");
fprintf(f, "\"");
fprintf(
f,
"labeltooltip=\"error_cost: %u\ndynamic_precedence: %u\"",
ts_subtree_error_cost(link.subtree),
ts_subtree_dynamic_precedence(link.subtree)
);
}
fprintf(f, "];\n");
StackIterator *next_iterator;
if (j == 0) {
next_iterator = &self->iterators.contents[i];
} else {
array_push(&self->iterators, iterator);
next_iterator = array_back(&self->iterators);
}
next_iterator->node = link.node;
}
array_push(&visited_nodes, node);
}
}
fprintf(f, "}\n");
array_delete(&visited_nodes);
ts_toggle_allocation_recording(was_recording_allocations);
return true;
}
#undef inline

135
src/tree_sitter/stack.h Normal file
View File

@ -0,0 +1,135 @@
#ifndef TREE_SITTER_PARSE_STACK_H_
#define TREE_SITTER_PARSE_STACK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "./subtree.h"
#include "./error_costs.h"
#include <stdio.h>
typedef struct Stack Stack;
typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)
typedef struct {
SubtreeArray subtrees;
StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;
typedef struct {
Length position;
unsigned depth;
TSStateId state;
} StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary;
// Create a stack.
Stack *ts_stack_new(SubtreePool *);
// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *);
// Get the stack's current number of versions.
uint32_t ts_stack_version_count(const Stack *);
// Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *, StackVersion);
// Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *, StackVersion);
// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);
// Push a tree and state onto the given version of the stack.
//
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were
// removed from that version.
StackSliceArray ts_stack_pop_count(Stack *, StackVersion, uint32_t count);
// Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
// Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *, StackVersion);
int ts_stack_dynamic_precedence(Stack *, StackVersion);
bool ts_stack_has_advanced_since_error(const Stack *, StackVersion);
// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *, StackVersion, unsigned max_depth);
// Retrieve a summary of all the parse states near the top of the
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *, StackVersion);
// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *, StackVersion version);
// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *, StackVersion, StackVersion);
// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *, StackVersion, StackVersion);
TSSymbol ts_stack_resume(Stack *, StackVersion);
void ts_stack_pause(Stack *, StackVersion, TSSymbol);
void ts_stack_halt(Stack *, StackVersion);
bool ts_stack_is_active(const Stack *, StackVersion);
bool ts_stack_is_paused(const Stack *, StackVersion);
bool ts_stack_is_halted(const Stack *, StackVersion);
void ts_stack_renumber_version(Stack *, StackVersion, StackVersion);
void ts_stack_swap_versions(Stack *, StackVersion, StackVersion);
StackVersion ts_stack_copy_version(Stack *, StackVersion);
// Remove the given version from the stack.
void ts_stack_remove_version(Stack *, StackVersion);
void ts_stack_clear(Stack *);
bool ts_stack_print_dot_graph(Stack *, const TSLanguage *, FILE *);
typedef void (*StackIterateCallback)(void *, TSStateId, uint32_t);
void ts_stack_iterate(Stack *, StackVersion, StackIterateCallback, void *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSE_STACK_H_

996
src/tree_sitter/subtree.c Normal file
View File

@ -0,0 +1,996 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include "./alloc.h"
#include "./atomic.h"
#include "./subtree.h"
#include "./length.h"
#include "./language.h"
#include "./error_costs.h"
#include <stddef.h>
typedef struct {
Length start;
Length old_end;
Length new_end;
} Edit;
#ifdef TREE_SITTER_TEST
#define TS_MAX_INLINE_TREE_LENGTH 2
#define TS_MAX_TREE_POOL_SIZE 0
#else
#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
#define TS_MAX_TREE_POOL_SIZE 32
#endif
static const ExternalScannerState empty_state = {.length = 0, .short_data = {0}};
// ExternalScannerState
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
self->length = length;
if (length > sizeof(self->short_data)) {
self->long_data = ts_malloc(length);
memcpy(self->long_data, data, length);
} else {
memcpy(self->short_data, data, length);
}
}
ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
ExternalScannerState result = *self;
if (self->length > sizeof(self->short_data)) {
result.long_data = ts_malloc(self->length);
memcpy(result.long_data, self->long_data, self->length);
}
return result;
}
void ts_external_scanner_state_delete(ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
ts_free(self->long_data);
}
}
const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
if (self->length > sizeof(self->short_data)) {
return self->long_data;
} else {
return self->short_data;
}
}
bool ts_external_scanner_state_eq(const ExternalScannerState *a, const ExternalScannerState *b) {
return a == b || (
a->length == b->length &&
!memcmp(ts_external_scanner_state_data(a), ts_external_scanner_state_data(b), a->length)
);
}
// SubtreeArray
void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest) {
dest->size = self.size;
dest->capacity = self.capacity;
dest->contents = self.contents;
if (self.capacity > 0) {
dest->contents = ts_calloc(self.capacity, sizeof(Subtree));
memcpy(dest->contents, self.contents, self.size * sizeof(Subtree));
for (uint32_t i = 0; i < self.size; i++) {
ts_subtree_retain(dest->contents[i]);
}
}
}
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self) {
for (uint32_t i = 0; i < self->size; i++) {
ts_subtree_release(pool, self->contents[i]);
}
array_delete(self);
}
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *self) {
SubtreeArray result = array_new();
uint32_t i = self->size - 1;
for (; i + 1 > 0; i--) {
Subtree child = self->contents[i];
if (!ts_subtree_extra(child)) break;
array_push(&result, child);
}
self->size = i + 1;
ts_subtree_array_reverse(&result);
return result;
}
void ts_subtree_array_reverse(SubtreeArray *self) {
for (uint32_t i = 0, limit = self->size / 2; i < limit; i++) {
size_t reverse_index = self->size - 1 - i;
Subtree swap = self->contents[i];
self->contents[i] = self->contents[reverse_index];
self->contents[reverse_index] = swap;
}
}
// SubtreePool
SubtreePool ts_subtree_pool_new(uint32_t capacity) {
SubtreePool self = {array_new(), array_new()};
array_reserve(&self.free_trees, capacity);
return self;
}
void ts_subtree_pool_delete(SubtreePool *self) {
if (self->free_trees.contents) {
for (unsigned i = 0; i < self->free_trees.size; i++) {
ts_free(self->free_trees.contents[i].ptr);
}
array_delete(&self->free_trees);
}
if (self->tree_stack.contents) array_delete(&self->tree_stack);
}
static SubtreeHeapData *ts_subtree_pool_allocate(SubtreePool *self) {
if (self->free_trees.size > 0) {
return array_pop(&self->free_trees).ptr;
} else {
return ts_malloc(sizeof(SubtreeHeapData));
}
}
static void ts_subtree_pool_free(SubtreePool *self, SubtreeHeapData *tree) {
if (self->free_trees.capacity > 0 && self->free_trees.size + 1 <= TS_MAX_TREE_POOL_SIZE) {
array_push(&self->free_trees, (MutableSubtree) {.ptr = tree});
} else {
ts_free(tree);
}
}
// Subtree
static inline bool ts_subtree_can_inline(Length padding, Length size, uint32_t lookahead_bytes) {
return
padding.bytes < TS_MAX_INLINE_TREE_LENGTH &&
padding.extent.row < 16 &&
padding.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
size.extent.row == 0 &&
size.extent.column < TS_MAX_INLINE_TREE_LENGTH &&
lookahead_bytes < 16;
}
Subtree ts_subtree_new_leaf(
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
uint32_t lookahead_bytes, TSStateId parse_state, bool has_external_tokens,
bool is_keyword, const TSLanguage *language
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool extra = symbol == ts_builtin_sym_end;
bool is_inline = (
symbol <= UINT8_MAX &&
!has_external_tokens &&
ts_subtree_can_inline(padding, size, lookahead_bytes)
);
if (is_inline) {
return (Subtree) {{
.parse_state = parse_state,
.symbol = symbol,
.padding_bytes = padding.bytes,
.padding_rows = padding.extent.row,
.padding_columns = padding.extent.column,
.size_bytes = size.bytes,
.lookahead_bytes = lookahead_bytes,
.visible = metadata.visible,
.named = metadata.named,
.extra = extra,
.has_changes = false,
.is_missing = false,
.is_keyword = is_keyword,
.is_inline = true,
}};
} else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
*data = (SubtreeHeapData) {
.ref_count = 1,
.padding = padding,
.size = size,
.lookahead_bytes = lookahead_bytes,
.error_cost = 0,
.child_count = 0,
.symbol = symbol,
.parse_state = parse_state,
.visible = metadata.visible,
.named = metadata.named,
.extra = extra,
.fragile_left = false,
.fragile_right = false,
.has_changes = false,
.has_external_tokens = has_external_tokens,
.is_missing = false,
.is_keyword = is_keyword,
.first_leaf = {.symbol = 0, .parse_state = 0},
};
return (Subtree) {.ptr = data};
}
}
void ts_subtree_set_symbol(
MutableSubtree *self,
TSSymbol symbol,
const TSLanguage *language
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
if (self->data.is_inline) {
assert(symbol < UINT8_MAX);
self->data.symbol = symbol;
self->data.named = metadata.named;
self->data.visible = metadata.visible;
} else {
self->ptr->symbol = symbol;
self->ptr->named = metadata.named;
self->ptr->visible = metadata.visible;
}
}
Subtree ts_subtree_new_error(
SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
) {
Subtree result = ts_subtree_new_leaf(
pool, ts_builtin_sym_error, padding, size, bytes_scanned,
parse_state, false, false, language
);
SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
data->fragile_left = true;
data->fragile_right = true;
data->lookahead_char = lookahead_char;
return result;
}
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return (MutableSubtree) {self.data};
if (self.ptr->ref_count == 1) return ts_subtree_to_mut_unsafe(self);
SubtreeHeapData *result = ts_subtree_pool_allocate(pool);
memcpy(result, self.ptr, sizeof(SubtreeHeapData));
if (result->child_count > 0) {
result->children = ts_calloc(self.ptr->child_count, sizeof(Subtree));
memcpy(result->children, self.ptr->children, result->child_count * sizeof(Subtree));
for (uint32_t i = 0; i < result->child_count; i++) {
ts_subtree_retain(result->children[i]);
}
} else if (result->has_external_tokens) {
result->external_scanner_state = ts_external_scanner_state_copy(&self.ptr->external_scanner_state);
}
result->ref_count = 1;
ts_subtree_release(pool, self);
return (MutableSubtree) {.ptr = result};
}
static void ts_subtree__compress(MutableSubtree self, unsigned count, const TSLanguage *language,
MutableSubtreeArray *stack) {
unsigned initial_stack_size = stack->size;
MutableSubtree tree = self;
TSSymbol symbol = tree.ptr->symbol;
for (unsigned i = 0; i < count; i++) {
if (tree.ptr->ref_count > 1 || tree.ptr->child_count < 2) break;
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
if (
child.data.is_inline ||
child.ptr->child_count < 2 ||
child.ptr->ref_count > 1 ||
child.ptr->symbol != symbol
) break;
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[0]);
if (
grandchild.data.is_inline ||
grandchild.ptr->child_count < 2 ||
grandchild.ptr->ref_count > 1 ||
grandchild.ptr->symbol != symbol
) break;
tree.ptr->children[0] = ts_subtree_from_mut(grandchild);
child.ptr->children[0] = grandchild.ptr->children[grandchild.ptr->child_count - 1];
grandchild.ptr->children[grandchild.ptr->child_count - 1] = ts_subtree_from_mut(child);
array_push(stack, tree);
tree = grandchild;
}
while (stack->size > initial_stack_size) {
tree = array_pop(stack);
MutableSubtree child = ts_subtree_to_mut_unsafe(tree.ptr->children[0]);
MutableSubtree grandchild = ts_subtree_to_mut_unsafe(child.ptr->children[child.ptr->child_count - 1]);
ts_subtree_set_children(grandchild, grandchild.ptr->children, grandchild.ptr->child_count, language);
ts_subtree_set_children(child, child.ptr->children, child.ptr->child_count, language);
ts_subtree_set_children(tree, tree.ptr->children, tree.ptr->child_count, language);
}
}
void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *language) {
array_clear(&pool->tree_stack);
if (ts_subtree_child_count(self) > 0 && self.ptr->ref_count == 1) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
}
while (pool->tree_stack.size > 0) {
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->repeat_depth > 0) {
Subtree child1 = tree.ptr->children[0];
Subtree child2 = tree.ptr->children[tree.ptr->child_count - 1];
if (
ts_subtree_child_count(child1) > 0 &&
ts_subtree_child_count(child2) > 0 &&
child1.ptr->repeat_depth > child2.ptr->repeat_depth
) {
unsigned n = child1.ptr->repeat_depth - child2.ptr->repeat_depth;
for (unsigned i = n / 2; i > 0; i /= 2) {
ts_subtree__compress(tree, i, language, &pool->tree_stack);
n -= i;
}
}
}
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
}
}
}
static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
return ts_subtree_child_count(self) ? self.ptr->repeat_depth : 0;
}
void ts_subtree_set_children(
MutableSubtree self, Subtree *children, uint32_t child_count, const TSLanguage *language
) {
assert(!self.data.is_inline);
if (self.ptr->child_count > 0 && children != self.ptr->children) {
ts_free(self.ptr->children);
}
self.ptr->child_count = child_count;
self.ptr->children = children;
self.ptr->named_child_count = 0;
self.ptr->visible_child_count = 0;
self.ptr->error_cost = 0;
self.ptr->repeat_depth = 0;
self.ptr->node_count = 1;
self.ptr->has_external_tokens = false;
self.ptr->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
uint32_t lookahead_end_byte = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (i == 0) {
self.ptr->padding = ts_subtree_padding(child);
self.ptr->size = ts_subtree_size(child);
} else {
self.ptr->size = length_add(self.ptr->size, ts_subtree_total_size(child));
}
uint32_t child_lookahead_end_byte =
self.ptr->padding.bytes +
self.ptr->size.bytes +
ts_subtree_lookahead_bytes(child);
if (child_lookahead_end_byte > lookahead_end_byte) lookahead_end_byte = child_lookahead_end_byte;
if (ts_subtree_symbol(child) != ts_builtin_sym_error_repeat) {
self.ptr->error_cost += ts_subtree_error_cost(child);
}
self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
self.ptr->node_count += ts_subtree_node_count(child);
if (alias_sequence && alias_sequence[non_extra_index] != 0 && !ts_subtree_extra(child)) {
self.ptr->visible_child_count++;
if (ts_language_symbol_metadata(language, alias_sequence[non_extra_index]).named) {
self.ptr->named_child_count++;
}
} else if (ts_subtree_visible(child)) {
self.ptr->visible_child_count++;
if (ts_subtree_named(child)) self.ptr->named_child_count++;
} else if (ts_subtree_child_count(child) > 0) {
self.ptr->visible_child_count += child.ptr->visible_child_count;
self.ptr->named_child_count += child.ptr->named_child_count;
}
if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
if (ts_subtree_is_error(child)) {
self.ptr->fragile_left = self.ptr->fragile_right = true;
self.ptr->parse_state = TS_TREE_STATE_NONE;
}
if (!ts_subtree_extra(child)) non_extra_index++;
}
self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
if (self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error_repeat) {
self.ptr->error_cost +=
ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * self.ptr->size.extent.row;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
uint32_t grandchild_count = ts_subtree_child_count(child);
if (ts_subtree_extra(child)) continue;
if (ts_subtree_is_error(child) && grandchild_count == 0) continue;
if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
}
}
}
if (self.ptr->child_count > 0) {
Subtree first_child = self.ptr->children[0];
Subtree last_child = self.ptr->children[self.ptr->child_count - 1];
self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
if (
self.ptr->child_count >= 2 &&
!self.ptr->visible &&
!self.ptr->named &&
ts_subtree_symbol(first_child) == self.ptr->symbol
) {
if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
} else {
self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
}
}
}
}
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
SubtreeArray *children, unsigned production_id,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
*data = (SubtreeHeapData) {
.ref_count = 1,
.symbol = symbol,
.production_id = production_id,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.fragile_left = fragile,
.fragile_right = fragile,
.is_keyword = false,
.node_count = 0,
.first_leaf = {.symbol = 0, .parse_state = 0},
};
MutableSubtree result = {.ptr = data};
ts_subtree_set_children(result, children->contents, children->size, language);
return result;
}
Subtree ts_subtree_new_error_node(SubtreePool *pool, SubtreeArray *children,
bool extra, const TSLanguage *language) {
MutableSubtree result = ts_subtree_new_node(
pool, ts_builtin_sym_error, children, 0, language
);
result.ptr->extra = extra;
return ts_subtree_from_mut(result);
}
Subtree ts_subtree_new_missing_leaf(SubtreePool *pool, TSSymbol symbol, Length padding,
const TSLanguage *language) {
Subtree result = ts_subtree_new_leaf(
pool, symbol, padding, length_zero(), 0,
0, false, false, language
);
if (result.data.is_inline) {
result.data.is_missing = true;
} else {
((SubtreeHeapData *)result.ptr)->is_missing = true;
}
return result;
}
void ts_subtree_retain(Subtree self) {
if (self.data.is_inline) return;
assert(self.ptr->ref_count > 0);
atomic_inc((volatile uint32_t *)&self.ptr->ref_count);
assert(self.ptr->ref_count != 0);
}
void ts_subtree_release(SubtreePool *pool, Subtree self) {
if (self.data.is_inline) return;
array_clear(&pool->tree_stack);
assert(self.ptr->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&self.ptr->ref_count) == 0) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(self));
}
while (pool->tree_stack.size > 0) {
MutableSubtree tree = array_pop(&pool->tree_stack);
if (tree.ptr->child_count > 0) {
for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
Subtree child = tree.ptr->children[i];
if (child.data.is_inline) continue;
assert(child.ptr->ref_count > 0);
if (atomic_dec((volatile uint32_t *)&child.ptr->ref_count) == 0) {
array_push(&pool->tree_stack, ts_subtree_to_mut_unsafe(child));
}
}
ts_free(tree.ptr->children);
} else if (tree.ptr->has_external_tokens) {
ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
}
ts_subtree_pool_free(pool, tree.ptr);
}
}
bool ts_subtree_eq(Subtree self, Subtree other) {
if (self.data.is_inline || other.data.is_inline) {
return memcmp(&self, &other, sizeof(SubtreeInlineData)) == 0;
}
if (self.ptr) {
if (!other.ptr) return false;
} else {
return !other.ptr;
}
if (self.ptr->symbol != other.ptr->symbol) return false;
if (self.ptr->visible != other.ptr->visible) return false;
if (self.ptr->named != other.ptr->named) return false;
if (self.ptr->padding.bytes != other.ptr->padding.bytes) return false;
if (self.ptr->size.bytes != other.ptr->size.bytes) return false;
if (self.ptr->symbol == ts_builtin_sym_error) return self.ptr->lookahead_char == other.ptr->lookahead_char;
if (self.ptr->child_count != other.ptr->child_count) return false;
if (self.ptr->child_count > 0) {
if (self.ptr->visible_child_count != other.ptr->visible_child_count) return false;
if (self.ptr->named_child_count != other.ptr->named_child_count) return false;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
if (!ts_subtree_eq(self.ptr->children[i], other.ptr->children[i])) {
return false;
}
}
}
return true;
}
int ts_subtree_compare(Subtree left, Subtree right) {
if (ts_subtree_symbol(left) < ts_subtree_symbol(right)) return -1;
if (ts_subtree_symbol(right) < ts_subtree_symbol(left)) return 1;
if (ts_subtree_child_count(left) < ts_subtree_child_count(right)) return -1;
if (ts_subtree_child_count(right) < ts_subtree_child_count(left)) return 1;
for (uint32_t i = 0, n = ts_subtree_child_count(left); i < n; i++) {
Subtree left_child = left.ptr->children[i];
Subtree right_child = right.ptr->children[i];
switch (ts_subtree_compare(left_child, right_child)) {
case -1: return -1;
case 1: return 1;
default: break;
}
}
return 0;
}
static inline void ts_subtree_set_has_changes(MutableSubtree *self) {
if (self->data.is_inline) {
self->data.has_changes = true;
} else {
self->ptr->has_changes = true;
}
}
Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool) {
typedef struct {
Subtree *tree;
Edit edit;
} StackEntry;
Array(StackEntry) stack = array_new();
array_push(&stack, ((StackEntry) {
.tree = &self,
.edit = (Edit) {
.start = {edit->start_byte, edit->start_point},
.old_end = {edit->old_end_byte, edit->old_end_point},
.new_end = {edit->new_end_byte, edit->new_end_point},
},
}));
while (stack.size) {
StackEntry entry = array_pop(&stack);
Edit edit = entry.edit;
bool is_noop = edit.old_end.bytes == edit.start.bytes && edit.new_end.bytes == edit.start.bytes;
bool is_pure_insertion = edit.old_end.bytes == edit.start.bytes;
Length size = ts_subtree_size(*entry.tree);
Length padding = ts_subtree_padding(*entry.tree);
uint32_t lookahead_bytes = ts_subtree_lookahead_bytes(*entry.tree);
uint32_t end_byte = padding.bytes + size.bytes + lookahead_bytes;
if (edit.start.bytes > end_byte || (is_noop && edit.start.bytes == end_byte)) continue;
// If the edit is entirely within the space before this subtree, then shift this
// subtree over according to the edit without changing its size.
if (edit.old_end.bytes <= padding.bytes) {
padding = length_add(edit.new_end, length_sub(padding, edit.old_end));
}
// If the edit starts in the space before this subtree and extends into this subtree,
// shrink the subtree's content to compensate for the change in the space before it.
else if (edit.start.bytes < padding.bytes) {
size = length_sub(size, length_sub(edit.old_end, padding));
padding = edit.new_end;
}
// If the edit is a pure insertion right at the start of the subtree,
// shift the subtree over according to the insertion.
else if (edit.start.bytes == padding.bytes && is_pure_insertion) {
padding = edit.new_end;
}
// If the edit is within this subtree, resize the subtree to reflect the edit.
else {
uint32_t total_bytes = padding.bytes + size.bytes;
if (edit.start.bytes < total_bytes ||
(edit.start.bytes == total_bytes && is_pure_insertion)) {
size = length_add(
length_sub(edit.new_end, padding),
length_sub(size, length_sub(edit.old_end, padding))
);
}
}
MutableSubtree result = ts_subtree_make_mut(pool, *entry.tree);
if (result.data.is_inline) {
if (ts_subtree_can_inline(padding, size, lookahead_bytes)) {
result.data.padding_bytes = padding.bytes;
result.data.padding_rows = padding.extent.row;
result.data.padding_columns = padding.extent.column;
result.data.size_bytes = size.bytes;
} else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
data->ref_count = 1;
data->padding = padding;
data->size = size;
data->lookahead_bytes = lookahead_bytes;
data->error_cost = 0;
data->child_count = 0;
data->symbol = result.data.symbol;
data->parse_state = result.data.parse_state;
data->visible = result.data.visible;
data->named = result.data.named;
data->extra = result.data.extra;
data->fragile_left = false;
data->fragile_right = false;
data->has_changes = false;
data->has_external_tokens = false;
data->is_missing = result.data.is_missing;
data->is_keyword = result.data.is_keyword;
result.ptr = data;
}
} else {
result.ptr->padding = padding;
result.ptr->size = size;
}
ts_subtree_set_has_changes(&result);
*entry.tree = ts_subtree_from_mut(result);
Length child_left, child_right = length_zero();
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.tree); i < n; i++) {
Subtree *child = &result.ptr->children[i];
Length child_size = ts_subtree_total_size(*child);
child_left = child_right;
child_right = length_add(child_left, child_size);
// If this child ends before the edit, it is not affected.
if (child_right.bytes + ts_subtree_lookahead_bytes(*child) < edit.start.bytes) continue;
// If this child starts after the edit, then we're done processing children.
if (child_left.bytes > edit.old_end.bytes ||
(child_left.bytes == edit.old_end.bytes && child_size.bytes > 0 && i > 0)) break;
// Transform edit into the child's coordinate space.
Edit child_edit = {
.start = length_sub(edit.start, child_left),
.old_end = length_sub(edit.old_end, child_left),
.new_end = length_sub(edit.new_end, child_left),
};
// Clamp child_edit to the child's bounds.
if (edit.start.bytes < child_left.bytes) child_edit.start = length_zero();
if (edit.old_end.bytes < child_left.bytes) child_edit.old_end = length_zero();
if (edit.new_end.bytes < child_left.bytes) child_edit.new_end = length_zero();
if (edit.old_end.bytes > child_right.bytes) child_edit.old_end = child_size;
// Interpret all inserted text as applying to the *first* child that touches the edit.
// Subsequent children are only never have any text inserted into them; they are only
// shrunk to compensate for the edit.
if (child_right.bytes > edit.start.bytes ||
(child_right.bytes == edit.start.bytes && is_pure_insertion)) {
edit.new_end = edit.start;
}
// Children that occur before the edit are not reshaped by the edit.
else {
child_edit.old_end = child_edit.start;
child_edit.new_end = child_edit.start;
}
// Queue processing of this child's subtree.
array_push(&stack, ((StackEntry) {
.tree = child,
.edit = child_edit,
}));
}
}
array_delete(&stack);
return self;
}
Subtree ts_subtree_last_external_token(Subtree tree) {
if (!ts_subtree_has_external_tokens(tree)) return NULL_SUBTREE;
while (tree.ptr->child_count > 0) {
for (uint32_t i = tree.ptr->child_count - 1; i + 1 > 0; i--) {
Subtree child = tree.ptr->children[i];
if (ts_subtree_has_external_tokens(child)) {
tree = child;
break;
}
}
}
return tree;
}
static size_t ts_subtree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);
}
static void ts_subtree__write_dot_string(FILE *f, const char *string) {
for (const char *c = string; *c; c++) {
if (*c == '"') {
fputs("\\\"", f);
} else if (*c == '\n') {
fputs("\\n", f);
} else {
fputc(*c, f);
}
}
}
static const char *ROOT_FIELD = "__ROOT__";
static size_t ts_subtree__write_to_string(
Subtree self, char *string, size_t limit,
const TSLanguage *language, bool include_all,
TSSymbol alias_symbol, bool alias_is_named, const char *field_name
) {
if (!self.ptr) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool is_root = field_name == ROOT_FIELD;
bool is_visible =
include_all ||
ts_subtree_missing(self) ||
(
alias_symbol
? alias_is_named
: ts_subtree_visible(self) && ts_subtree_named(self)
);
if (is_visible) {
if (!is_root) {
cursor += snprintf(*writer, limit, " ");
if (field_name) {
cursor += snprintf(*writer, limit, "%s: ", field_name);
}
}
if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
const char *symbol_name = ts_language_symbol_name(language, symbol);
if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING ");
if (alias_is_named || ts_subtree_named(self)) {
cursor += snprintf(*writer, limit, "%s", symbol_name);
} else {
cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
}
} else {
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
} else if (is_root) {
TSSymbol symbol = ts_subtree_symbol(self);
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(\"%s\")", symbol_name);
}
if (ts_subtree_child_count(self)) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
language,
self.ptr->production_id,
&field_map,
&field_map_end
);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (ts_subtree_extra(child)) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, include_all,
0, false, NULL
);
} else {
TSSymbol alias_symbol = alias_sequence
? alias_sequence[structural_child_index]
: 0;
bool alias_is_named = alias_symbol
? ts_language_symbol_metadata(language, alias_symbol).named
: false;
const char *child_field_name = is_visible ? NULL : field_name;
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (!i->inherited && i->child_index == structural_child_index) {
child_field_name = language->field_names[i->field_id];
break;
}
}
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, include_all,
alias_symbol, alias_is_named, child_field_name
);
structural_child_index++;
}
}
}
if (is_visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
char *ts_subtree_string(
Subtree self,
const TSLanguage *language,
bool include_all
) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, include_all,
0, false, ROOT_FIELD
) + 1;
char *result = malloc(size * sizeof(char));
ts_subtree__write_to_string(
self, result, size,
language, include_all,
0, false, ROOT_FIELD
);
return result;
}
void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
const TSLanguage *language, TSSymbol alias_symbol,
FILE *f) {
TSSymbol subtree_symbol = ts_subtree_symbol(*self);
TSSymbol symbol = alias_symbol ? alias_symbol : subtree_symbol;
uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
fprintf(f, "tree_%p [label=\"", self);
ts_subtree__write_dot_string(f, ts_language_symbol_name(language, symbol));
fprintf(f, "\"");
if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
if (ts_subtree_extra(*self)) fprintf(f, ", fontcolor=gray");
fprintf(f, ", tooltip=\""
"range: %u - %u\n"
"state: %d\n"
"error-cost: %u\n"
"has-changes: %u\n"
"repeat-depth: %u\n"
"lookahead-bytes: %u",
start_offset, end_offset,
ts_subtree_parse_state(*self),
ts_subtree_error_cost(*self),
ts_subtree_has_changes(*self),
ts_subtree_repeat_depth(*self),
ts_subtree_lookahead_bytes(*self)
);
if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0) {
fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
}
fprintf(f, "\"]\n");
uint32_t child_start_offset = start_offset;
uint32_t child_info_offset =
language->max_alias_sequence_length *
ts_subtree_production_id(*self);
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
const Subtree *child = &self->ptr->children[i];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child) && child_info_offset) {
alias_symbol = language->alias_sequences[child_info_offset];
child_info_offset++;
}
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
child_start_offset += ts_subtree_total_bytes(*child);
}
}
void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f) {
fprintf(f, "digraph tree {\n");
fprintf(f, "edge [arrowhead=none]\n");
ts_subtree__print_dot_graph(&self, 0, language, 0, f);
fprintf(f, "}\n");
}
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other) {
const ExternalScannerState *state1 = &empty_state;
const ExternalScannerState *state2 = &empty_state;
if (self.ptr && ts_subtree_has_external_tokens(self) && !self.ptr->child_count) {
state1 = &self.ptr->external_scanner_state;
}
if (other.ptr && ts_subtree_has_external_tokens(other) && !other.ptr->child_count) {
state2 = &other.ptr->external_scanner_state;
}
return ts_external_scanner_state_eq(state1, state2);
}

281
src/tree_sitter/subtree.h Normal file
View File

@ -0,0 +1,281 @@
#ifndef TREE_SITTER_SUBTREE_H_
#define TREE_SITTER_SUBTREE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include "./length.h"
#include "./array.h"
#include "./error_costs.h"
#include "tree_sitter/api.h"
#include "tree_sitter/parser.h"
static const TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
typedef union Subtree Subtree;
typedef union MutableSubtree MutableSubtree;
typedef struct {
union {
char *long_data;
char short_data[24];
};
uint32_t length;
} ExternalScannerState;
typedef struct {
bool is_inline : 1;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool has_changes : 1;
bool is_missing : 1;
bool is_keyword : 1;
uint8_t symbol;
uint8_t padding_bytes;
uint8_t size_bytes;
uint8_t padding_columns;
uint8_t padding_rows : 4;
uint8_t lookahead_bytes : 4;
uint16_t parse_state;
} SubtreeInlineData;
typedef struct {
volatile uint32_t ref_count;
Length padding;
Length size;
uint32_t lookahead_bytes;
uint32_t error_cost;
uint32_t child_count;
TSSymbol symbol;
TSStateId parse_state;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool is_missing : 1;
bool is_keyword : 1;
union {
// Non-terminal subtrees (`child_count > 0`)
struct {
Subtree *children;
uint32_t visible_child_count;
uint32_t named_child_count;
uint32_t node_count;
uint32_t repeat_depth;
int32_t dynamic_precedence;
uint16_t production_id;
struct {
TSSymbol symbol;
TSStateId parse_state;
} first_leaf;
};
// External terminal subtrees (`child_count == 0 && has_external_tokens`)
ExternalScannerState external_scanner_state;
// Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
int32_t lookahead_char;
};
} SubtreeHeapData;
union Subtree {
SubtreeInlineData data;
const SubtreeHeapData *ptr;
};
union MutableSubtree {
SubtreeInlineData data;
SubtreeHeapData *ptr;
};
typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray;
typedef struct {
MutableSubtreeArray free_trees;
MutableSubtreeArray tree_stack;
} SubtreePool;
void ts_external_scanner_state_init(ExternalScannerState *, const char *, unsigned);
const char *ts_external_scanner_state_data(const ExternalScannerState *);
void ts_subtree_array_copy(SubtreeArray, SubtreeArray *);
void ts_subtree_array_delete(SubtreePool *, SubtreeArray *);
SubtreeArray ts_subtree_array_remove_trailing_extras(SubtreeArray *);
void ts_subtree_array_reverse(SubtreeArray *);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *);
Subtree ts_subtree_new_leaf(
SubtreePool *, TSSymbol, Length, Length, uint32_t,
TSStateId, bool, bool, const TSLanguage *
);
Subtree ts_subtree_new_error(
SubtreePool *, int32_t, Length, Length, uint32_t, TSStateId, const TSLanguage *
);
MutableSubtree ts_subtree_new_node(SubtreePool *, TSSymbol, SubtreeArray *, unsigned, const TSLanguage *);
Subtree ts_subtree_new_error_node(SubtreePool *, SubtreeArray *, bool, const TSLanguage *);
Subtree ts_subtree_new_missing_leaf(SubtreePool *, TSSymbol, Length, const TSLanguage *);
MutableSubtree ts_subtree_make_mut(SubtreePool *, Subtree);
void ts_subtree_retain(Subtree);
void ts_subtree_release(SubtreePool *, Subtree);
bool ts_subtree_eq(Subtree, Subtree);
int ts_subtree_compare(Subtree, Subtree);
void ts_subtree_set_symbol(MutableSubtree *, TSSymbol, const TSLanguage *);
void ts_subtree_set_children(MutableSubtree, Subtree *, uint32_t, const TSLanguage *);
void ts_subtree_balance(Subtree, SubtreePool *, const TSLanguage *);
Subtree ts_subtree_edit(Subtree, const TSInputEdit *edit, SubtreePool *);
char *ts_subtree_string(Subtree, const TSLanguage *, bool include_all);
void ts_subtree_print_dot_graph(Subtree, const TSLanguage *, FILE *);
Subtree ts_subtree_last_external_token(Subtree);
bool ts_subtree_external_scanner_state_eq(Subtree, Subtree);
#define SUBTREE_GET(self, name) (self.data.is_inline ? self.data.name : self.ptr->name)
static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
#undef SUBTREE_GET
static inline void ts_subtree_set_extra(MutableSubtree *self) {
if (self->data.is_inline) {
self->data.extra = true;
} else {
self->ptr->extra = true;
}
}
static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
if (self.data.is_inline) return self.data.symbol;
if (self.ptr->child_count == 0) return self.ptr->symbol;
return self.ptr->first_leaf.symbol;
}
static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
if (self.data.is_inline) return self.data.parse_state;
if (self.ptr->child_count == 0) return self.ptr->parse_state;
return self.ptr->first_leaf.parse_state;
}
static inline Length ts_subtree_padding(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
return result;
} else {
return self.ptr->padding;
}
}
static inline Length ts_subtree_size(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
return result;
} else {
return self.ptr->size;
}
}
static inline Length ts_subtree_total_size(Subtree self) {
return length_add(ts_subtree_padding(self), ts_subtree_size(self));
}
static inline uint32_t ts_subtree_total_bytes(Subtree self) {
return ts_subtree_total_size(self).bytes;
}
static inline uint32_t ts_subtree_child_count(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->child_count;
}
static inline uint32_t ts_subtree_node_count(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 1 : self.ptr->node_count;
}
static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->visible_child_count;
} else {
return 0;
}
}
static inline uint32_t ts_subtree_error_cost(Subtree self) {
if (ts_subtree_missing(self)) {
return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
} else {
return self.data.is_inline ? 0 : self.ptr->error_cost;
}
}
static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}
static inline uint16_t ts_subtree_production_id(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->production_id;
} else {
return 0;
}
}
static inline bool ts_subtree_fragile_left(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_left;
}
static inline bool ts_subtree_fragile_right(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_right;
}
static inline bool ts_subtree_has_external_tokens(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_tokens;
}
static inline bool ts_subtree_is_fragile(Subtree self) {
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
}
static inline bool ts_subtree_is_error(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_error;
}
static inline bool ts_subtree_is_eof(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_end;
}
static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
Subtree result;
result.data = self.data;
return result;
}
static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
MutableSubtree result;
result.data = self.data;
return result;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_SUBTREE_H_

149
src/tree_sitter/tree.c Normal file
View File

@ -0,0 +1,149 @@
#include "tree_sitter/api.h"
#include "./array.h"
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./tree_cursor.h"
#include "./tree.h"
static const unsigned PARENT_CACHE_CAPACITY = 32;
TSTree *ts_tree_new(
Subtree root, const TSLanguage *language,
const TSRange *included_ranges, unsigned included_range_count
) {
TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root;
result->language = language;
result->parent_cache = NULL;
result->parent_cache_start = 0;
result->parent_cache_size = 0;
result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
result->included_range_count = included_range_count;
return result;
}
TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
}
void ts_tree_delete(TSTree *self) {
if (!self) return;
SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool);
ts_free(self->included_ranges);
if (self->parent_cache) ts_free(self->parent_cache);
ts_free(self);
}
TSNode ts_tree_root_node(const TSTree *self) {
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
}
const TSLanguage *ts_tree_language(const TSTree *self) {
return self->language;
}
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *range = &self->included_ranges[i];
if (range->end_byte >= edit->old_end_byte) {
if (range->end_byte != UINT32_MAX) {
range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
range->end_point = point_add(
edit->new_end_point,
point_sub(range->end_point, edit->old_end_point)
);
if (range->end_byte < edit->new_end_byte) {
range->end_byte = UINT32_MAX;
range->end_point = POINT_MAX;
}
}
if (range->start_byte >= edit->old_end_byte) {
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
range->start_point = point_add(
edit->new_end_point,
point_sub(range->start_point, edit->old_end_point)
);
if (range->start_byte < edit->new_end_byte) {
range->start_byte = UINT32_MAX;
range->start_point = POINT_MAX;
}
}
}
}
SubtreePool pool = ts_subtree_pool_new(0);
self->root = ts_subtree_edit(self->root, edit, &pool);
self->parent_cache_start = 0;
self->parent_cache_size = 0;
ts_subtree_pool_delete(&pool);
}
TSRange *ts_tree_get_changed_ranges(const TSTree *self, const TSTree *other, uint32_t *count) {
TSRange *result;
TreeCursor cursor1 = {NULL, array_new()};
TreeCursor cursor2 = {NULL, array_new()};
TSNode root = ts_tree_root_node(self);
ts_tree_cursor_init(&cursor1, root);
ts_tree_cursor_init(&cursor2, root);
TSRangeArray included_range_differences = array_new();
ts_range_array_get_changed_ranges(
self->included_ranges, self->included_range_count,
other->included_ranges, other->included_range_count,
&included_range_differences
);
*count = ts_subtree_get_changed_ranges(
&self->root, &other->root, &cursor1, &cursor2,
self->language, &included_range_differences, &result
);
array_delete(&included_range_differences);
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
void ts_tree_print_dot_graph(const TSTree *self, FILE *file) {
ts_subtree_print_dot_graph(self->root, self->language, file);
}
TSNode ts_tree_get_cached_parent(const TSTree *self, const TSNode *node) {
for (uint32_t i = 0; i < self->parent_cache_size; i++) {
uint32_t index = (self->parent_cache_start + i) % PARENT_CACHE_CAPACITY;
ParentCacheEntry *entry = &self->parent_cache[index];
if (entry->child == node->id) {
return ts_node_new(self, entry->parent, entry->position, entry->alias_symbol);
}
}
return ts_node_new(NULL, NULL, length_zero(), 0);
}
void ts_tree_set_cached_parent(const TSTree *_self, const TSNode *node, const TSNode *parent) {
TSTree *self = (TSTree *)_self;
if (!self->parent_cache) {
self->parent_cache = ts_calloc(PARENT_CACHE_CAPACITY, sizeof(ParentCacheEntry));
}
uint32_t index = (self->parent_cache_start + self->parent_cache_size) % PARENT_CACHE_CAPACITY;
self->parent_cache[index] = (ParentCacheEntry) {
.child = node->id,
.parent = (const Subtree *)parent->id,
.position = {
parent->context[0],
{parent->context[1], parent->context[2]}
},
.alias_symbol = parent->context[3],
};
if (self->parent_cache_size == PARENT_CACHE_CAPACITY) {
self->parent_cache_start++;
} else {
self->parent_cache_size++;
}
}

34
src/tree_sitter/tree.h Normal file
View File

@ -0,0 +1,34 @@
#ifndef TREE_SITTER_TREE_H_
#define TREE_SITTER_TREE_H_
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
const Subtree *child;
const Subtree *parent;
Length position;
TSSymbol alias_symbol;
} ParentCacheEntry;
struct TSTree {
Subtree root;
const TSLanguage *language;
ParentCacheEntry *parent_cache;
uint32_t parent_cache_start;
uint32_t parent_cache_size;
TSRange *included_ranges;
unsigned included_range_count;
};
TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *, unsigned);
TSNode ts_node_new(const TSTree *, const Subtree *, Length, TSSymbol);
TSNode ts_tree_get_cached_parent(const TSTree *, const TSNode *);
void ts_tree_set_cached_parent(const TSTree *, const TSNode *, const TSNode *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_TREE_H_

View File

@ -0,0 +1,302 @@
#include "tree_sitter/api.h"
#include "./alloc.h"
#include "./tree_cursor.h"
#include "./language.h"
#include "./tree.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} CursorChildIterator;
// CursorChildIterator
static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
if (ts_subtree_child_count(*last_entry->subtree) == 0) {
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->ptr->production_id
);
return (CursorChildIterator) {
.tree = self->tree,
.parent = *last_entry->subtree,
.position = last_entry->position,
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
if (!extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
self->structural_child_index++;
}
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
if (self->child_index < self->parent.ptr->child_count) {
Subtree next_child = self->parent.ptr->children[self->child_index];
self->position = length_add(self->position, ts_subtree_padding(next_child));
}
return true;
}
// TSTreeCursor - lifecycle
TSTreeCursor ts_tree_cursor_new(TSNode node) {
TSTreeCursor self = {NULL, NULL, {0, 0}};
ts_tree_cursor_init((TreeCursor *)&self, node);
return self;
}
void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
ts_tree_cursor_init((TreeCursor *)_self, node);
}
void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
self->tree = node.tree;
array_clear(&self->stack);
array_push(&self->stack, ((TreeCursorEntry) {
.subtree = (const Subtree *)node.id,
.position = {
ts_node_start_byte(node),
ts_node_start_point(node)
},
.child_index = 0,
.structural_child_index = 0,
}));
}
void ts_tree_cursor_delete(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
array_delete(&self->stack);
}
// TSTreeCursor - walking the tree
bool ts_tree_cursor_goto_first_child(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return true;
}
if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
}
} while (did_descend);
return false;
}
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *_self, uint32_t goal_byte) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
uint32_t visible_child_index = 0;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
uint32_t end_byte = entry.position.bytes + ts_subtree_size(*entry.subtree).bytes;
bool at_goal = end_byte > goal_byte;
uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
if (at_goal) {
if (visible) {
array_push(&self->stack, entry);
return visible_child_index;
}
if (visible_child_count > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
} else if (visible) {
visible_child_index++;
} else {
visible_child_index += visible_child_count;
}
}
} while (did_descend);
if (self->stack.size > initial_size &&
ts_tree_cursor_goto_next_sibling((TSTreeCursor *)self)) {
return visible_child_index;
}
self->stack.size = initial_size;
return -1;
}
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
while (self->stack.size > 1) {
TreeCursorEntry entry = array_pop(&self->stack);
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
iterator.child_index = entry.child_index;
iterator.structural_child_index = entry.structural_child_index;
iterator.position = entry.position;
bool visible = false;
ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible);
if (visible && self->stack.size + 1 < initial_size) break;
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return true;
}
if (ts_subtree_visible_child_count(*entry.subtree)) {
array_push(&self->stack, entry);
ts_tree_cursor_goto_first_child(_self);
return true;
}
}
}
self->stack.size = initial_size;
return false;
}
bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
bool is_aliased = false;
if (i > 0) {
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
}
if (ts_subtree_visible(*entry->subtree) || is_aliased) {
self->stack.size = i + 1;
return true;
}
}
return false;
}
TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
TSSymbol alias_symbol = 0;
if (self->stack.size > 1) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
alias_symbol = alias_sequence[last_entry->structural_child_index];
}
}
return ts_node_new(
self->tree,
last_entry->subtree,
last_entry->position,
alias_symbol
);
}
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
// Walk up the tree, visiting the current node and its invisible ancestors.
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
// Stop walking up when another visible node is found.
if (i != self->stack.size - 1) {
if (ts_subtree_visible(*entry->subtree)) break;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
if (alias_sequence && alias_sequence[entry->structural_child_index]) {
break;
}
}
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
while (field_map < field_map_end) {
if (
!field_map->inherited &&
field_map->child_index == entry->structural_child_index
) return field_map->field_id;
field_map++;
}
}
return 0;
}
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
TSFieldId id = ts_tree_cursor_current_field_id(_self);
if (id) {
const TreeCursor *self = (const TreeCursor *)_self;
return self->tree->language->field_names[id];
} else {
return NULL;
}
}
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
const TreeCursor *cursor = (const TreeCursor *)_cursor;
TSTreeCursor res = {NULL, NULL, {0, 0}};
TreeCursor *copy = (TreeCursor *)&res;
copy->tree = cursor->tree;
array_push_all(&copy->stack, &cursor->stack);
return res;
}

View File

@ -0,0 +1,20 @@
#ifndef TREE_SITTER_TREE_CURSOR_H_
#define TREE_SITTER_TREE_CURSOR_H_
#include "./subtree.h"
typedef struct {
const Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
} TreeCursorEntry;
typedef struct {
const TSTree *tree;
Array(TreeCursorEntry) stack;
} TreeCursor;
void ts_tree_cursor_init(TreeCursor *, TSNode);
#endif // TREE_SITTER_TREE_CURSOR_H_

33
src/tree_sitter/utf16.c Normal file
View File

@ -0,0 +1,33 @@
#include "./utf16.h"
utf8proc_ssize_t utf16_iterate(
const utf8proc_uint8_t *string,
utf8proc_ssize_t length,
utf8proc_int32_t *code_point
) {
if (length < 2) {
*code_point = -1;
return 0;
}
uint16_t *units = (uint16_t *)string;
uint16_t unit = units[0];
if (unit < 0xd800 || unit >= 0xe000) {
*code_point = unit;
return 2;
}
if (unit < 0xdc00) {
if (length >= 4) {
uint16_t next_unit = units[1];
if (next_unit >= 0xdc00 && next_unit < 0xe000) {
*code_point = 0x10000 + ((unit - 0xd800) << 10) + (next_unit - 0xdc00);
return 4;
}
}
}
*code_point = -1;
return 2;
}

21
src/tree_sitter/utf16.h Normal file
View File

@ -0,0 +1,21 @@
#ifndef TREE_SITTER_UTF16_H_
#define TREE_SITTER_UTF16_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdint.h>
#include <stdlib.h>
#include "utf8proc.h"
// Analogous to utf8proc's utf8proc_iterate function. Reads one code point from
// the given UTF16 string and stores it in the location pointed to by `code_point`.
// Returns the number of bytes in `string` that were read.
utf8proc_ssize_t utf16_iterate(const utf8proc_uint8_t *, utf8proc_ssize_t, utf8proc_int32_t *);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_UTF16_H_