1
0
Fork 0

nvim config updates

This commit is contained in:
Arthur K. 2026-01-30 15:47:24 +03:00
parent 559939e2f4
commit adc494721a
Signed by: wzray
GPG key ID: B97F30FDC4636357
20 changed files with 1108 additions and 30 deletions

View file

@ -0,0 +1,491 @@
module.exports = grammar({
name: "dockerfile",
extras: ($) => [/\s+/, $.line_continuation, $.comment],
externals: ($) => [
$.heredoc_marker,
$.heredoc_line,
$.heredoc_end,
$.heredoc_nl,
$.error_sentinel,
],
rules: {
source_file: ($) => repeat(seq($._instruction, "\n")),
_instruction: ($) =>
choice(
$.from_instruction,
$.run_instruction,
$.cmd_instruction,
$.label_instruction,
$.expose_instruction,
$.env_instruction,
$.add_instruction,
$.copy_instruction,
$.entrypoint_instruction,
$.volume_instruction,
$.user_instruction,
$.workdir_instruction,
$.arg_instruction,
$.onbuild_instruction,
$.stopsignal_instruction,
$.healthcheck_instruction,
$.shell_instruction,
$.maintainer_instruction,
$.cross_build_instruction,
),
from_instruction: ($) =>
seq(
alias(/[fF][rR][oO][mM]/, "FROM"),
optional($.param),
$.image_spec,
optional(seq(alias(/[aA][sS]/, "AS"), field("as", $.image_alias)))
),
run_instruction: ($) =>
seq(
alias(/[rR][uU][nN]/, "RUN"),
repeat(
choice(
$.param,
$.mount_param
)
),
choice($.json_string_array, $.shell_command),
repeat($.heredoc_block)
),
cmd_instruction: ($) =>
seq(
alias(/[cC][mM][dD]/, "CMD"),
choice($.json_string_array, $.shell_command)
),
label_instruction: ($) =>
seq(alias(/[lL][aA][bB][eE][lL]/, "LABEL"), repeat1($.label_pair)),
expose_instruction: ($) =>
seq(
alias(/[eE][xX][pP][oO][sS][eE]/, "EXPOSE"),
repeat1(choice($.expose_port, $.expansion))
),
env_instruction: ($) =>
seq(
alias(/[eE][nN][vV]/, "ENV"),
choice(repeat1($.env_pair), alias($._spaced_env_pair, $.env_pair))
),
add_instruction: ($) =>
seq(
alias(/[aA][dD][dD]/, "ADD"),
repeat($.param),
repeat1(
seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
),
alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
),
copy_instruction: ($) =>
seq(
alias(/[cC][oO][pP][yY]/, "COPY"),
repeat($.param),
repeat1(
seq(alias($.path_with_heredoc, $.path), $._non_newline_whitespace)
),
alias($.path_with_heredoc, $.path),
repeat($.heredoc_block)
),
entrypoint_instruction: ($) =>
seq(
alias(/[eE][nN][tT][rR][yY][pP][oO][iI][nN][tT]/, "ENTRYPOINT"),
choice($.json_string_array, $.shell_command)
),
volume_instruction: ($) =>
seq(
alias(/[vV][oO][lL][uU][mM][eE]/, "VOLUME"),
choice(
$.json_string_array,
seq($.path, repeat(seq($._non_newline_whitespace, $.path)))
)
),
user_instruction: ($) =>
seq(
alias(/[uU][sS][eE][rR]/, "USER"),
field("user", alias($._user_name_or_group, $.unquoted_string)),
optional(
seq(
token.immediate(":"),
field("group",
alias($._immediate_user_name_or_group, $.unquoted_string))
)
)
),
_user_name_or_group: ($) =>
seq(
choice(/([a-zA-Z][-A-Za-z0-9_]*|[0-9]+)/, $.expansion),
repeat($._immediate_user_name_or_group_fragment)
),
// same as _user_name_or_group but sticks to previous token
_immediate_user_name_or_group: ($) =>
repeat1($._immediate_user_name_or_group_fragment),
_immediate_user_name_or_group_fragment: ($) =>
choice(
token.immediate(/([a-zA-Z][-a-zA-Z0-9_]*|[0-9]+)/),
$._immediate_expansion
),
workdir_instruction: ($) =>
seq(alias(/[wW][oO][rR][kK][dD][iI][rR]/, "WORKDIR"), $.path),
arg_instruction: ($) =>
seq(
alias(/[aA][rR][gG]/, "ARG"),
field("name", alias(/[a-zA-Z0-9_]+/, $.unquoted_string)),
optional(
seq(
token.immediate("="),
field("default",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
)
)
),
onbuild_instruction: ($) =>
seq(alias(/[oO][nN][bB][uU][iI][lL][dD]/, "ONBUILD"), $._instruction),
stopsignal_instruction: ($) =>
seq(
alias(/[sS][tT][oO][pP][sS][iI][gG][nN][aA][lL]/, "STOPSIGNAL"),
$._stopsignal_value
),
_stopsignal_value: ($) =>
seq(
choice(/[A-Z0-9]+/, $.expansion),
repeat(choice(token.immediate(/[A-Z0-9]+/), $._immediate_expansion))
),
healthcheck_instruction: ($) =>
seq(
alias(/[hH][eE][aA][lL][tT][hH][cC][hH][eE][cC][kK]/, "HEALTHCHECK"),
choice("NONE", seq(repeat($.param), $.cmd_instruction))
),
shell_instruction: ($) =>
seq(alias(/[sS][hH][eE][lL][lL]/, "SHELL"), $.json_string_array),
maintainer_instruction: () =>
seq(
alias(/[mM][aA][iI][nN][tT][aA][iI][nN][eE][rR]/, "MAINTAINER"),
/.*/
),
cross_build_instruction: () =>
seq(
alias(
/[cC][rR][oO][sS][sS]_[bB][uU][iI][lL][dD][a-zA-Z_]*/,
"CROSS_BUILD"
),
/.*/
),
heredoc_block: ($) =>
seq(
// A heredoc block starts with a line break after the instruction it
// belongs to. The herdoc_nl token is a special token that only matches
// \n if there's at least one open heredoc to avoid conflicts.
// We also alias this token to hide it from the output like all other
// whitespace.
alias($.heredoc_nl, "_heredoc_nl"),
repeat(seq($.heredoc_line, "\n")),
$.heredoc_end
),
path: ($) =>
seq(
choice(
/[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^<]/, // cannot start with a '<<' to avoid conflicts with heredocs (a single < is fine, though)
$.expansion
),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
),
path_with_heredoc: ($) =>
choice(
$.heredoc_marker,
seq(
choice(
/[^-\s\$<]/, // cannot start with a '-' to avoid conflicts with params
/<[^-\s\$<]/,
$.expansion
),
repeat(choice(token.immediate(/[^\s\$]+/), $._immediate_expansion))
)
),
expansion: $ =>
seq("$", $._expansion_body),
// we have 2 rules b/c aliases don't work as expected on seq() directly
_immediate_expansion: $ => alias($._imm_expansion, $.expansion),
_imm_expansion: $ =>
seq(token.immediate("$"), $._expansion_body),
_expansion_body: $ =>
choice(
$.variable,
seq(
token.immediate("{"),
alias(token.immediate(/[^\}]+/), $.variable),
token.immediate("}")
)
),
variable: () => token.immediate(/[a-zA-Z_][a-zA-Z0-9_]*/),
env_pair: ($) =>
seq(
field("name", $._env_key),
token.immediate("="),
optional(
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
)
),
_spaced_env_pair: ($) =>
seq(
field("name", $._env_key),
token.immediate(/\s+/),
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
),
_env_key: ($) =>
alias(/[a-zA-Z_][a-zA-Z0-9_]*/, $.unquoted_string),
expose_port: () => seq(/\d+(-\d+)?/, optional(choice("/tcp", "/udp"))),
label_pair: ($) =>
seq(
field("key", choice(
alias(/[-a-zA-Z0-9\._]+/, $.unquoted_string),
$.double_quoted_string,
$.single_quoted_string
)),
token.immediate("="),
field("value",
choice(
$.double_quoted_string,
$.single_quoted_string,
$.unquoted_string
))
),
image_spec: ($) =>
seq(
field("name", $.image_name),
seq(
field("tag", optional($.image_tag)),
field("digest", optional($.image_digest))
)
),
image_name: ($) =>
seq(
choice(/[^@:\s\$-]/, $.expansion),
repeat(choice(token.immediate(/[^@:\s\$]+/), $._immediate_expansion))
),
image_tag: ($) =>
seq(
token.immediate(":"),
repeat1(choice(token.immediate(/[^@\s\$]+/), $._immediate_expansion))
),
image_digest: ($) =>
seq(
token.immediate("@"),
repeat1(choice(token.immediate(/[a-zA-Z0-9:]+/), $._immediate_expansion))
),
// Generic parsing of options passed right after an instruction name.
param: () =>
seq(
"--",
field("name", token.immediate(/[a-z][-a-z]*/)),
token.immediate("="),
field("value", token.immediate(/[^\s]+/))
),
// Specific parsing of the --mount option e.g.
//
// --mount=type=cache,target=/root/.cache/go-build
//
mount_param: ($) => seq(
"--",
field("name", token.immediate("mount")),
token.immediate("="),
field(
"value",
seq(
$.mount_param_param,
repeat(
seq(token.immediate(","), $.mount_param_param)
)
)
)
),
mount_param_param: () => seq(
token.immediate(/[^\s=,]+/),
token.immediate("="),
token.immediate(/[^\s=,]+/)
),
image_alias: ($) => seq(
choice(/[-a-zA-Z0-9_]+/, $.expansion),
repeat(choice(token.immediate(/[-a-zA-Z0-9_]+/), $._immediate_expansion))
),
shell_command: ($) =>
seq(
$.shell_fragment,
repeat(
seq(
alias($.required_line_continuation, $.line_continuation),
$.shell_fragment
)
)
),
shell_fragment: ($) => repeat1(
choice(
// A shell fragment is broken into the same tokens as other
// constructs because the lexer prefers the longer tokens
// when it has a choice. The example below shows the tokenization
// of the --mount parameter.
//
// RUN --mount=foo=bar,baz=42 ls --all
// ^^ ^ ^ ^ ^
// ^^^^^ ^^^ ^^^ ^^^ ^^
// |--------param-------|
// |--shell_command--|
//
seq($.heredoc_marker, /[ \t]*/),
/[,=-]/,
/[^\\\[\n#\s,=-][^\\\n<]*/,
/\\[^\n,=-]/,
/<[^<]/,
)
),
line_continuation: () => /\\[ \t]*\n/,
required_line_continuation: () => "\\\n",
json_string_array: ($) =>
seq(
"[",
optional(
seq($.json_string, repeat(seq(",", $.json_string)))
),
"]"
),
// Note that JSON strings are different from the other double-quoted
// strings. They don't support $-expansions.
// Convenient reference: https://www.json.org/
json_string: ($) => seq(
'"',
repeat(
choice(
token.immediate(/[^"\\]+/),
alias($.json_escape_sequence, $.escape_sequence)
)
),
'"'
),
json_escape_sequence: () => token.immediate(
/\\(?:["\\/bfnrt]|u[0-9A-Fa-f]{4})/
),
double_quoted_string: ($) =>
seq(
'"',
repeat(
choice(
token.immediate(/[^"\n\\\$]+/),
alias($.double_quoted_escape_sequence, $.escape_sequence),
"\\",
$._immediate_expansion
)
),
'"'
),
// same as double_quoted_string but without $-expansions:
single_quoted_string: ($) =>
seq(
"'",
repeat(
choice(
token.immediate(/[^'\n\\]+/),
alias($.single_quoted_escape_sequence, $.escape_sequence),
"\\",
)
),
"'"
),
unquoted_string: ($) =>
repeat1(
choice(
token.immediate(/[^\s\n\"'\\\$]+/),
token.immediate("\\ "),
$._immediate_expansion
)
),
double_quoted_escape_sequence: () => token.immediate(
choice(
"\\\\",
"\\\""
)
),
single_quoted_escape_sequence: () => token.immediate(
choice(
"\\\\",
"\\'"
)
),
_non_newline_whitespace: () => token.immediate(/[\t ]+/),
comment: () => /#.*/,
},
});

View file

@ -0,0 +1,58 @@
[
"FROM"
"AS"
"RUN"
"CMD"
"LABEL"
"EXPOSE"
"ENV"
"ADD"
"COPY"
"ENTRYPOINT"
"VOLUME"
"USER"
"WORKDIR"
"ARG"
"ONBUILD"
"STOPSIGNAL"
"HEALTHCHECK"
"SHELL"
"MAINTAINER"
"CROSS_BUILD"
(heredoc_marker)
(heredoc_end)
] @keyword
[
":"
"@"
] @operator
(comment) @comment
(image_spec
(image_tag
":" @punctuation.special)
(image_digest
"@" @punctuation.special))
[
(double_quoted_string)
(single_quoted_string)
(json_string)
(heredoc_block)
] @string
(expansion
[
"$"
"{"
"}"
] @punctuation.special
) @none
((variable) @constant
(#match? @constant "^[A-Z][A-Z_0-9]*$"))

View file

@ -0,0 +1,11 @@
((comment) @injection.content
(#set! injection.language "comment"))
((shell_command) @injection.content
(#set! injection.language "bash")
(#set! injection.include-children))
((run_instruction
(heredoc_block) @injection.content)
(#set! injection.language "bash")
(#set! injection.include-children))

View file

@ -0,0 +1,343 @@
// https://github.com/faergeek/tree-sitter-dockerfile/tree/make-language-injections-easier
// https://github.com/camdencheek/tree-sitter-dockerfile
//
// The MIT License (MIT)
//
// Copyright (c) 2021 Camden Cheek
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <wctype.h>
#include "tree_sitter/parser.h"
#define MAX_HEREDOCS 10
#define DEL_SPACE 512
typedef struct {
bool in_heredoc;
bool stripping_heredoc;
unsigned heredoc_count;
char *heredocs[MAX_HEREDOCS];
} scanner_state;
enum TokenType {
HEREDOC_MARKER,
HEREDOC_LINE,
HEREDOC_END,
HEREDOC_NL,
ERROR_SENTINEL,
};
void *tree_sitter_dockerfile_external_scanner_create() {
scanner_state *state = malloc(sizeof(scanner_state));
memset(state, 0, sizeof(scanner_state));
return state;
}
void tree_sitter_dockerfile_external_scanner_destroy(void *payload) {
if (!payload)
return;
scanner_state *state = payload;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
if (state->heredocs[i]) {
free(state->heredocs[i]);
}
}
free(state);
}
unsigned tree_sitter_dockerfile_external_scanner_serialize(void *payload,
char *buffer) {
scanner_state *state = payload;
unsigned pos = 0;
buffer[pos++] = state->in_heredoc;
buffer[pos++] = state->stripping_heredoc;
for (unsigned i = 0; i < state->heredoc_count; i++) {
// Add the ending null byte to the length since we'll have to copy it as
// well.
unsigned len = strlen(state->heredocs[i]) + 1;
// If we run out of space, just drop the heredocs that don't fit.
// We need at least len + 1 bytes space since we'll copy len bytes below
// and later add a null byte at the end.
if (pos + len + 1 > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
break;
}
memcpy(&buffer[pos], state->heredocs[i], len);
pos += len;
}
// Add a null byte at the end to make it easy to detect.
buffer[pos++] = 0;
return pos;
}
void tree_sitter_dockerfile_external_scanner_deserialize(void *payload,
const char *buffer,
unsigned length) {
scanner_state *state = payload;
// Free all current heredocs to avoid leaking memory when we overwrite the
// array later.
for (unsigned i = 0; i < state->heredoc_count; i++) {
free(state->heredocs[i]);
state->heredocs[i] = NULL;
}
if (length == 0) {
state->in_heredoc = false;
state->stripping_heredoc = false;
state->heredoc_count = 0;
} else {
unsigned pos = 0;
state->in_heredoc = buffer[pos++];
state->stripping_heredoc = buffer[pos++];
unsigned heredoc_count = 0;
for (unsigned i = 0; i < MAX_HEREDOCS; i++) {
unsigned len = strlen(&buffer[pos]);
// We found the ending null byte which means that we're done.
if (len == 0)
break;
// Account for the ending null byte in strings (again).
len++;
char *heredoc = malloc(len);
memcpy(heredoc, &buffer[pos], len);
state->heredocs[i] = heredoc;
heredoc_count++;
pos += len;
}
state->heredoc_count = heredoc_count;
}
}
static void skip_whitespace(TSLexer *lexer) {
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
iswspace(lexer->lookahead))
lexer->advance(lexer, true);
}
static bool scan_marker(scanner_state *state, TSLexer *lexer) {
skip_whitespace(lexer);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
if (lexer->lookahead != '<')
return false;
lexer->advance(lexer, false);
bool stripping = false;
if (lexer->lookahead == '-') {
stripping = true;
lexer->advance(lexer, false);
}
int32_t quote = 0;
if (lexer->lookahead == '"' || lexer->lookahead == '\'') {
quote = lexer->lookahead;
lexer->advance(lexer, false);
}
// Reserve a reasonable amount of space for the heredoc delimiter string.
// Most heredocs (like EOF, EOT, EOS, FILE, etc.) are pretty short so we'll
// usually only need a few bytes. We're also limited to less than 1024 bytes
// by tree-sitter since our state has to fit in
// TREE_SITTER_SERIALIZATION_BUFFER_SIZE.
char delimiter[DEL_SPACE];
// We start recording the actual string at position 1 since we store whether
// it's a stripping heredoc in the first position (with either a dash or a
// space).
unsigned del_idx = 1;
while (lexer->lookahead != '\0' &&
(quote ? lexer->lookahead != quote : !iswspace(lexer->lookahead))) {
if (lexer->lookahead == '\\') {
lexer->advance(lexer, false);
if (lexer->lookahead == '\0') {
return false;
}
}
if (del_idx > 0) {
delimiter[del_idx++] = lexer->lookahead;
}
lexer->advance(lexer, false);
// If we run out of space, stop recording the delimiter but keep
// advancing the lexer to ensure that we at least parse the marker
// correctly. Reserve two bytes: one for the strip indicator and
// one for the terminating null byte.
if (del_idx >= DEL_SPACE - 2) {
del_idx = 0;
}
}
if (quote) {
if (lexer->lookahead != quote) {
return false;
}
lexer->advance(lexer, false);
}
if (del_idx == 0) {
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
delimiter[0] = stripping ? '-' : ' ';
delimiter[del_idx] = '\0';
// We copy the delimiter string to the heap here since we can't store our
// stack-allocated string in our state (which is stored on the heap).
char *del_copy = malloc(del_idx + 1);
memcpy(del_copy, delimiter, del_idx + 1);
if (state->heredoc_count == 0) {
state->heredoc_count = 1;
state->heredocs[0] = del_copy;
state->stripping_heredoc = stripping;
} else if (state->heredoc_count >= MAX_HEREDOCS) {
free(del_copy);
} else {
state->heredocs[state->heredoc_count++] = del_copy;
}
lexer->result_symbol = HEREDOC_MARKER;
return true;
}
static bool scan_content(scanner_state *state, TSLexer *lexer,
const bool *valid_symbols) {
if (state->heredoc_count == 0) {
state->in_heredoc = false;
return false;
}
state->in_heredoc = true;
if (state->stripping_heredoc) {
skip_whitespace(lexer);
}
if (valid_symbols[HEREDOC_END]) {
unsigned delim_idx = 1;
// Look for the current heredoc delimiter.
while (state->heredocs[0][delim_idx] != '\0' &&
lexer->lookahead != '\0' &&
lexer->lookahead == state->heredocs[0][delim_idx]) {
lexer->advance(lexer, false);
delim_idx++;
}
// Check if the entire string matched.
if (state->heredocs[0][delim_idx] == '\0') {
lexer->result_symbol = HEREDOC_END;
// Shift the first heredoc off the list.
free(state->heredocs[0]);
for (unsigned i = 1; i < state->heredoc_count; i++) {
state->heredocs[i - 1] = state->heredocs[i];
}
state->heredocs[state->heredoc_count - 1] = NULL;
state->heredoc_count--;
if (state->heredoc_count > 0) {
state->stripping_heredoc = state->heredocs[0][0] == '-';
} else {
state->in_heredoc = false;
}
return true;
}
}
if (!valid_symbols[HEREDOC_LINE])
return false;
lexer->result_symbol = HEREDOC_LINE;
for (;;) {
switch (lexer->lookahead) {
case '\0':
if (lexer->eof(lexer)) {
state->in_heredoc = false;
return true;
}
lexer->advance(lexer, false);
break;
case '\n':
return true;
default:
lexer->advance(lexer, false);
}
}
}
bool tree_sitter_dockerfile_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
scanner_state *state = payload;
if (valid_symbols[ERROR_SENTINEL]) {
if (state->in_heredoc) {
return scan_content(state, lexer, valid_symbols);
} else {
return scan_marker(state, lexer);
}
}
// HEREDOC_NL only matches a linebreak if there are open heredocs. This is
// necessary to avoid a conflict in the grammar since a normal line break
// could either be the start of a heredoc or the end of an instruction.
if (valid_symbols[HEREDOC_NL]) {
if (state->heredoc_count > 0 && lexer->lookahead == '\n') {
lexer->result_symbol = HEREDOC_NL;
lexer->advance(lexer, false);
return true;
}
}
if (valid_symbols[HEREDOC_MARKER]) {
return scan_marker(state, lexer);
}
if (valid_symbols[HEREDOC_LINE] || valid_symbols[HEREDOC_END]) {
return scan_content(state, lexer, valid_symbols);
}
return false;
}