diff options
Diffstat (limited to 'src/3rdparty/xkbcommon/src/compose/parser.c')
-rw-r--r-- | src/3rdparty/xkbcommon/src/compose/parser.c | 737 |
1 files changed, 0 insertions, 737 deletions
diff --git a/src/3rdparty/xkbcommon/src/compose/parser.c b/src/3rdparty/xkbcommon/src/compose/parser.c deleted file mode 100644 index 439d404f06..0000000000 --- a/src/3rdparty/xkbcommon/src/compose/parser.c +++ /dev/null @@ -1,737 +0,0 @@ -/* - * Copyright © 2013 Ran Benita <ran234@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/****************************************************************** - - Copyright 1992 by Oki Technosystems Laboratory, Inc. - Copyright 1992 by Fuji Xerox Co., Ltd. - -Permission to use, copy, modify, distribute, and sell this software -and its documentation for any purpose is hereby granted without fee, -provided that the above copyright notice appear in all copies and -that both that copyright notice and this permission notice appear -in supporting documentation, and that the name of Oki Technosystems -Laboratory and Fuji Xerox not be used in advertising or publicity -pertaining to distribution of the software without specific, written -prior permission. -Oki Technosystems Laboratory and Fuji Xerox make no representations -about the suitability of this software for any purpose. It is provided -"as is" without express or implied warranty. - -OKI TECHNOSYSTEMS LABORATORY AND FUJI XEROX DISCLAIM ALL WARRANTIES -WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OKI TECHNOSYSTEMS -LABORATORY AND FUJI XEROX BE LIABLE FOR ANY SPECIAL, INDIRECT OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. - - Author: Yasuhiro Kawai Oki Technosystems Laboratory - Author: Kazunori Nishihara Fuji Xerox - -******************************************************************/ - -#include <errno.h> - -#include "utils.h" -#include "scanner-utils.h" -#include "table.h" -#include "paths.h" -#include "utf8.h" -#include "parser.h" - -#define MAX_LHS_LEN 10 -#define MAX_INCLUDE_DEPTH 5 - -/* - * Grammar adapted from libX11/modules/im/ximcp/imLcPrs.c. - * See also the XCompose(5) manpage. - * - * FILE ::= { [PRODUCTION] [COMMENT] "\n" | INCLUDE } - * INCLUDE ::= "include" '"' INCLUDE_STRING '"' - * PRODUCTION ::= LHS ":" RHS [ COMMENT ] - * COMMENT ::= "#" {<any character except null or newline>} - * LHS ::= EVENT { EVENT } - * EVENT ::= [MODIFIER_LIST] "<" keysym ">" - * MODIFIER_LIST ::= (["!"] {MODIFIER} ) | "None" - * MODIFIER ::= ["~"] MODIFIER_NAME - * MODIFIER_NAME ::= ("Ctrl"|"Lock"|"Caps"|"Shift"|"Alt"|"Meta") - * RHS ::= ( STRING | keysym | STRING keysym ) - * STRING ::= '"' { CHAR } '"' - * CHAR ::= GRAPHIC_CHAR | ESCAPED_CHAR - * GRAPHIC_CHAR ::= locale (codeset) dependent code - * ESCAPED_CHAR ::= ('\\' | '\"' | OCTAL | HEX ) - * OCTAL ::= '\' OCTAL_CHAR [OCTAL_CHAR [OCTAL_CHAR]] - * OCTAL_CHAR ::= (0|1|2|3|4|5|6|7) - * HEX ::= '\' (x|X) HEX_CHAR [HEX_CHAR]] - * HEX_CHAR ::= (0|1|2|3|4|5|6|7|8|9|A|B|C|D|E|F|a|b|c|d|e|f) - * - * INCLUDE_STRING is a filesystem path, with the following %-expansions: - * %% - '%'. - * %H - The user's home directory (the $HOME environment variable). - * %L - The name of the locale specific Compose file (e.g., - * "/usr/share/X11/locale/<localename>/Compose"). - * %S - The name of the system directory for Compose files (e.g., - * "/usr/share/X11/locale"). - */ - -enum rules_token { - TOK_END_OF_FILE = 0, - TOK_END_OF_LINE, - TOK_INCLUDE, - TOK_INCLUDE_STRING, - TOK_LHS_KEYSYM, - TOK_COLON, - TOK_BANG, - TOK_TILDE, - TOK_STRING, - TOK_IDENT, - TOK_ERROR -}; - -/* Values returned with some tokens, like yylval. */ -union lvalue { - struct { - /* Still \0-terminated. */ - const char *str; - size_t len; - } string; -}; - -static enum rules_token -lex(struct scanner *s, union lvalue *val) -{ -skip_more_whitespace_and_comments: - /* Skip spaces. */ - while (is_space(peek(s))) - if (next(s) == '\n') - return TOK_END_OF_LINE; - - /* Skip comments. */ - if (chr(s, '#')) { - skip_to_eol(s); - goto skip_more_whitespace_and_comments; - } - - /* See if we're done. */ - if (eof(s)) return TOK_END_OF_FILE; - - /* New token. */ - s->token_line = s->line; - s->token_column = s->column; - s->buf_pos = 0; - - /* LHS Keysym. */ - if (chr(s, '<')) { - while (peek(s) != '>' && !eol(s) && !eof(s)) - buf_append(s, next(s)); - if (!chr(s, '>')) { - scanner_err(s, "unterminated keysym literal"); - return TOK_ERROR; - } - if (!buf_append(s, '\0')) { - scanner_err(s, "keysym literal is too long"); - return TOK_ERROR; - } - val->string.str = s->buf; - val->string.len = s->buf_pos; - return TOK_LHS_KEYSYM; - } - - /* Colon. */ - if (chr(s, ':')) - return TOK_COLON; - if (chr(s, '!')) - return TOK_BANG; - if (chr(s, '~')) - return TOK_TILDE; - - /* String literal. */ - if (chr(s, '\"')) { - while (!eof(s) && !eol(s) && peek(s) != '\"') { - if (chr(s, '\\')) { - uint8_t o; - if (chr(s, '\\')) { - buf_append(s, '\\'); - } - else if (chr(s, '"')) { - buf_append(s, '"'); - } - else if (chr(s, 'x') || chr(s, 'X')) { - if (hex(s, &o)) - buf_append(s, (char) o); - else - scanner_warn(s, "illegal hexadecimal escape sequence in string literal"); - } - else if (oct(s, &o)) { - buf_append(s, (char) o); - } - else { - scanner_warn(s, "unknown escape sequence (%c) in string literal", peek(s)); - /* Ignore. */ - } - } else { - buf_append(s, next(s)); - } - } - if (!chr(s, '\"')) { - scanner_err(s, "unterminated string literal"); - return TOK_ERROR; - } - if (!buf_append(s, '\0')) { - scanner_err(s, "string literal is too long"); - return TOK_ERROR; - } - if (!is_valid_utf8(s->buf, s->buf_pos - 1)) { - scanner_err(s, "string literal is not a valid UTF-8 string"); - return TOK_ERROR; - } - val->string.str = s->buf; - val->string.len = s->buf_pos; - return TOK_STRING; - } - - /* Identifier or include. */ - if (is_alpha(peek(s)) || peek(s) == '_') { - s->buf_pos = 0; - while (is_alnum(peek(s)) || peek(s) == '_') - buf_append(s, next(s)); - if (!buf_append(s, '\0')) { - scanner_err(s, "identifier is too long"); - return TOK_ERROR; - } - - if (streq(s->buf, "include")) - return TOK_INCLUDE; - - val->string.str = s->buf; - val->string.len = s->buf_pos; - return TOK_IDENT; - } - - /* Discard rest of line. */ - skip_to_eol(s); - - scanner_err(s, "unrecognized token"); - return TOK_ERROR; -} - -static enum rules_token -lex_include_string(struct scanner *s, struct xkb_compose_table *table, - union lvalue *val_out) -{ - while (is_space(peek(s))) - if (next(s) == '\n') - return TOK_END_OF_LINE; - - s->token_line = s->line; - s->token_column = s->column; - s->buf_pos = 0; - - if (!chr(s, '\"')) { - scanner_err(s, "include statement must be followed by a path"); - return TOK_ERROR; - } - - while (!eof(s) && !eol(s) && peek(s) != '\"') { - if (chr(s, '%')) { - if (chr(s, '%')) { - buf_append(s, '%'); - } - else if (chr(s, 'H')) { - const char *home = secure_getenv("HOME"); - if (!home) { - scanner_err(s, "%%H was used in an include statement, but the HOME environment variable is not set"); - return TOK_ERROR; - } - if (!buf_appends(s, home)) { - scanner_err(s, "include path after expanding %%H is too long"); - return TOK_ERROR; - } - } - else if (chr(s, 'L')) { - char *path = get_locale_compose_file_path(table->locale); - if (!path) { - scanner_err(s, "failed to expand %%L to the locale Compose file"); - return TOK_ERROR; - } - if (!buf_appends(s, path)) { - free(path); - scanner_err(s, "include path after expanding %%L is too long"); - return TOK_ERROR; - } - free(path); - } - else if (chr(s, 'S')) { - const char *xlocaledir = get_xlocaledir_path(); - if (!buf_appends(s, xlocaledir)) { - scanner_err(s, "include path after expanding %%S is too long"); - return TOK_ERROR; - } - } - else { - scanner_err(s, "unknown %% format (%c) in include statement", peek(s)); - return TOK_ERROR; - } - } else { - buf_append(s, next(s)); - } - } - if (!chr(s, '\"')) { - scanner_err(s, "unterminated include statement"); - return TOK_ERROR; - } - if (!buf_append(s, '\0')) { - scanner_err(s, "include path is too long"); - return TOK_ERROR; - } - val_out->string.str = s->buf; - val_out->string.len = s->buf_pos; - return TOK_INCLUDE_STRING; -} - -struct production { - xkb_keysym_t lhs[MAX_LHS_LEN]; - unsigned int len; - xkb_keysym_t keysym; - char string[256]; - /* At least one of these is true. */ - bool has_keysym; - bool has_string; - - /* The matching is as follows: (active_mods & modmask) == mods. */ - xkb_mod_mask_t modmask; - xkb_mod_mask_t mods; -}; - -static uint32_t -add_node(struct xkb_compose_table *table, xkb_keysym_t keysym) -{ - struct compose_node new = { - .keysym = keysym, - .next = 0, - .is_leaf = true, - }; - darray_append(table->nodes, new); - return darray_size(table->nodes) - 1; -} - -static void -add_production(struct xkb_compose_table *table, struct scanner *s, - const struct production *production) -{ - unsigned lhs_pos; - uint32_t curr; - struct compose_node *node; - - curr = 0; - node = &darray_item(table->nodes, curr); - - /* - * Insert the sequence to the trie, creating new nodes as needed. - * - * TODO: This can be sped up a bit by first trying the path that the - * previous production took, and only then doing the linear search - * through the trie levels. This will work because sequences in the - * Compose files are often clustered by a common prefix; especially - * in the 1st and 2nd keysyms, which is where the largest variation - * (thus, longest search) is. - */ - for (lhs_pos = 0; lhs_pos < production->len; lhs_pos++) { - while (production->lhs[lhs_pos] != node->keysym) { - if (node->next == 0) { - uint32_t next = add_node(table, production->lhs[lhs_pos]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->next = next; - } - - curr = node->next; - node = &darray_item(table->nodes, curr); - } - - if (lhs_pos + 1 == production->len) - break; - - if (node->is_leaf) { - if (node->u.leaf.utf8 != 0 || - node->u.leaf.keysym != XKB_KEY_NoSymbol) { - scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding"); - node->u.leaf.utf8 = 0; - node->u.leaf.keysym = XKB_KEY_NoSymbol; - } - - { - uint32_t successor = add_node(table, production->lhs[lhs_pos + 1]); - /* Refetch since add_node could have realloc()ed. */ - node = &darray_item(table->nodes, curr); - node->is_leaf = false; - node->u.successor = successor; - } - } - - curr = node->u.successor; - node = &darray_item(table->nodes, curr); - } - - if (!node->is_leaf) { - scanner_warn(s, "this compose sequence is a prefix of another; skipping line"); - return; - } - - if (node->u.leaf.utf8 != 0 || node->u.leaf.keysym != XKB_KEY_NoSymbol) { - bool same_string = - (node->u.leaf.utf8 == 0 && !production->has_string) || - ( - node->u.leaf.utf8 != 0 && production->has_string && - streq(&darray_item(table->utf8, node->u.leaf.utf8), - production->string) - ); - bool same_keysym = - (node->u.leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) || - ( - node->u.leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym && - node->u.leaf.keysym == production->keysym - ); - if (same_string && same_keysym) { - scanner_warn(s, "this compose sequence is a duplicate of another; skipping line"); - return; - } - scanner_warn(s, "this compose sequence already exists; overriding"); - } - - if (production->has_string) { - node->u.leaf.utf8 = darray_size(table->utf8); - darray_append_items(table->utf8, production->string, - strlen(production->string) + 1); - } - if (production->has_keysym) { - node->u.leaf.keysym = production->keysym; - } -} - -/* Should match resolve_modifier(). */ -#define ALL_MODS_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 3)) - -static xkb_mod_index_t -resolve_modifier(const char *name) -{ - static const struct { - const char *name; - xkb_mod_index_t mod; - } mods[] = { - { "Shift", 0 }, - { "Ctrl", 2 }, - { "Alt", 3 }, - { "Meta", 3 }, - { "Lock", 1 }, - { "Caps", 1 }, - }; - - for (unsigned i = 0; i < ARRAY_SIZE(mods); i++) - if (streq(name, mods[i].name)) - return mods[i].mod; - - return XKB_MOD_INVALID; -} - -static bool -parse(struct xkb_compose_table *table, struct scanner *s, - unsigned include_depth); - -static bool -do_include(struct xkb_compose_table *table, struct scanner *s, - const char *path, unsigned include_depth) -{ - FILE *file; - bool ok; - char *string; - size_t size; - struct scanner new_s; - - if (include_depth >= MAX_INCLUDE_DEPTH) { - scanner_err(s, "maximum include depth (%d) exceeded; maybe there is an include loop?", - MAX_INCLUDE_DEPTH); - return false; - } - - file = fopen(path, "r"); - if (!file) { - scanner_err(s, "failed to open included Compose file \"%s\": %s", - path, strerror(errno)); - return false; - } - - ok = map_file(file, &string, &size); - if (!ok) { - scanner_err(s, "failed to read included Compose file \"%s\": %s", - path, strerror(errno)); - goto err_file; - } - - scanner_init(&new_s, table->ctx, string, size, path, s->priv); - - ok = parse(table, &new_s, include_depth + 1); - if (!ok) - goto err_unmap; - -err_unmap: - unmap_file(string, size); -err_file: - fclose(file); - return ok; -} - -static bool -parse(struct xkb_compose_table *table, struct scanner *s, - unsigned include_depth) -{ - enum rules_token tok; - union lvalue val; - xkb_keysym_t keysym; - struct production production; - enum { MAX_ERRORS = 10 }; - int num_errors = 0; - -initial: - production.len = 0; - production.has_keysym = false; - production.has_string = false; - production.mods = 0; - production.modmask = 0; - - /* fallthrough */ - -initial_eol: - switch (tok = lex(s, &val)) { - case TOK_END_OF_LINE: - goto initial_eol; - case TOK_END_OF_FILE: - goto finished; - case TOK_INCLUDE: - goto include; - default: - goto lhs_tok; - } - -include: - switch (tok = lex_include_string(s, table, &val)) { - case TOK_INCLUDE_STRING: - goto include_eol; - default: - goto unexpected; - } - -include_eol: - switch (tok = lex(s, &val)) { - case TOK_END_OF_LINE: - if (!do_include(table, s, val.string.str, include_depth)) - goto fail; - goto initial; - default: - goto unexpected; - } - -lhs: - tok = lex(s, &val); -lhs_tok: - switch (tok) { - case TOK_COLON: - if (production.len <= 0) { - scanner_warn(s, "expected at least one keysym on left-hand side; skipping line"); - goto skip; - } - goto rhs; - case TOK_IDENT: - if (streq(val.string.str, "None")) { - production.mods = 0; - production.modmask = ALL_MODS_MASK; - goto lhs_keysym; - } - goto lhs_mod_list_tok; - case TOK_TILDE: - goto lhs_mod_list_tok; - case TOK_BANG: - production.modmask = ALL_MODS_MASK; - goto lhs_mod_list; - default: - goto lhs_keysym_tok; - } - -lhs_keysym: - tok = lex(s, &val); -lhs_keysym_tok: - switch (tok) { - case TOK_LHS_KEYSYM: - keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS); - if (keysym == XKB_KEY_NoSymbol) { - scanner_err(s, "unrecognized keysym \"%s\" on left-hand side", - val.string.str); - goto error; - } - if (production.len + 1 > MAX_LHS_LEN) { - scanner_warn(s, "too many keysyms (%d) on left-hand side; skipping line", - MAX_LHS_LEN + 1); - goto skip; - } - production.lhs[production.len++] = keysym; - production.mods = 0; - production.modmask = 0; - goto lhs; - default: - goto unexpected; - } - -lhs_mod_list: - tok = lex(s, &val); -lhs_mod_list_tok: { - bool tilde = false; - xkb_mod_index_t mod; - - if (tok != TOK_TILDE && tok != TOK_IDENT) - goto lhs_keysym_tok; - - if (tok == TOK_TILDE) { - tilde = true; - tok = lex(s, &val); - } - - if (tok != TOK_IDENT) - goto unexpected; - - mod = resolve_modifier(val.string.str); - if (mod == XKB_MOD_INVALID) { - scanner_err(s, "unrecognized modifier \"%s\"", - val.string.str); - goto error; - } - - production.modmask |= 1 << mod; - if (tilde) - production.mods &= ~(1 << mod); - else - production.mods |= 1 << mod; - - goto lhs_mod_list; - } - -rhs: - switch (tok = lex(s, &val)) { - case TOK_STRING: - if (production.has_string) { - scanner_warn(s, "right-hand side can have at most one string; skipping line"); - goto skip; - } - if (val.string.len <= 0) { - scanner_warn(s, "right-hand side string must not be empty; skipping line"); - goto skip; - } - if (val.string.len >= sizeof(production.string)) { - scanner_warn(s, "right-hand side string is too long; skipping line"); - goto skip; - } - strcpy(production.string, val.string.str); - production.has_string = true; - goto rhs; - case TOK_IDENT: - keysym = xkb_keysym_from_name(val.string.str, XKB_KEYSYM_NO_FLAGS); - if (keysym == XKB_KEY_NoSymbol) { - scanner_err(s, "unrecognized keysym \"%s\" on right-hand side", - val.string.str); - goto error; - } - if (production.has_keysym) { - scanner_warn(s, "right-hand side can have at most one keysym; skipping line"); - goto skip; - } - production.keysym = keysym; - production.has_keysym = true; - /* fallthrough */ - case TOK_END_OF_LINE: - if (!production.has_string && !production.has_keysym) { - scanner_warn(s, "right-hand side must have at least one of string or keysym; skipping line"); - goto skip; - } - add_production(table, s, &production); - goto initial; - default: - goto unexpected; - } - -unexpected: - if (tok != TOK_ERROR) - scanner_err(s, "unexpected token"); -error: - num_errors++; - if (num_errors <= MAX_ERRORS) - goto skip; - - scanner_err(s, "too many errors"); - goto fail; - -fail: - scanner_err(s, "failed to parse file"); - return false; - -skip: - while (tok != TOK_END_OF_LINE && tok != TOK_END_OF_FILE) - tok = lex(s, &val); - goto initial; - -finished: - return true; -} - -bool -parse_string(struct xkb_compose_table *table, const char *string, size_t len, - const char *file_name) -{ - struct scanner s; - scanner_init(&s, table->ctx, string, len, file_name, NULL); - if (!parse(table, &s, 0)) - return false; - /* Maybe the allocator can use the excess space. */ - darray_shrink(table->nodes); - darray_shrink(table->utf8); - return true; -} - -bool -parse_file(struct xkb_compose_table *table, FILE *file, const char *file_name) -{ - bool ok; - char *string; - size_t size; - - ok = map_file(file, &string, &size); - if (!ok) { - log_err(table->ctx, "Couldn't read Compose file %s: %s\n", - file_name, strerror(errno)); - return false; - } - - ok = parse_string(table, string, size, file_name); - unmap_file(string, size); - return ok; -} |