summaryrefslogtreecommitdiffstats
path: root/chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py')
-rw-r--r--chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py277
1 files changed, 277 insertions, 0 deletions
diff --git a/chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py b/chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py
new file mode 100644
index 00000000000..34419acd53c
--- /dev/null
+++ b/chromium/mojo/public/tools/bindings/pylib/mojom/parse/lexer.py
@@ -0,0 +1,277 @@
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import imp
+import os.path
+import sys
+
+# Disable lint check for finding modules:
+# pylint: disable=F0401
+
+def _GetDirAbove(dirname):
+ """Returns the directory "above" this file containing |dirname| (which must
+ also be "above" this file)."""
+ path = os.path.abspath(__file__)
+ while True:
+ path, tail = os.path.split(path)
+ assert tail
+ if tail == dirname:
+ return path
+
+try:
+ imp.find_module("ply")
+except ImportError:
+ sys.path.append(os.path.join(_GetDirAbove("mojo"), "third_party"))
+from ply.lex import TOKEN
+
+from ..error import Error
+
+
+# Disable lint check for exceptions deriving from Exception:
+# pylint: disable=W0710
+class LexError(Error):
+ """Class for errors from the lexer."""
+
+ def __init__(self, filename, message, lineno):
+ Error.__init__(self, filename, message, lineno=lineno)
+
+
+# We have methods which look like they could be functions:
+# pylint: disable=R0201
+class Lexer(object):
+
+ def __init__(self, filename):
+ self.filename = filename
+
+ ######################-- PRIVATE --######################
+
+ ##
+ ## Internal auxiliary methods
+ ##
+ def _error(self, msg, token):
+ raise LexError(self.filename, msg, token.lineno)
+
+ ##
+ ## Reserved keywords
+ ##
+ keywords = (
+ 'HANDLE',
+
+ 'IMPORT',
+ 'MODULE',
+ 'STRUCT',
+ 'INTERFACE',
+ 'ENUM',
+ 'CONST',
+ 'TRUE',
+ 'FALSE',
+ 'DEFAULT',
+ )
+
+ keyword_map = {}
+ for keyword in keywords:
+ keyword_map[keyword.lower()] = keyword
+
+ ##
+ ## All the tokens recognized by the lexer
+ ##
+ tokens = keywords + (
+ # Identifiers
+ 'NAME',
+
+ # Constants
+ 'ORDINAL',
+ 'INT_CONST_DEC', 'INT_CONST_HEX',
+ 'FLOAT_CONST',
+ 'CHAR_CONST',
+
+ # String literals
+ 'STRING_LITERAL',
+
+ # Operators
+ 'MINUS',
+ 'PLUS',
+ 'AMP',
+
+ # Assignment
+ 'EQUALS',
+
+ # Request / response
+ 'RESPONSE',
+
+ # Delimiters
+ 'LPAREN', 'RPAREN', # ( )
+ 'LBRACKET', 'RBRACKET', # [ ]
+ 'LBRACE', 'RBRACE', # { }
+ 'LANGLE', 'RANGLE', # < >
+ 'SEMI', # ;
+ 'COMMA', 'DOT' # , .
+ )
+
+ ##
+ ## Regexes for use in tokens
+ ##
+
+ # valid C identifiers (K&R2: A.2.3)
+ identifier = r'[a-zA-Z_][0-9a-zA-Z_]*'
+
+ hex_prefix = '0[xX]'
+ hex_digits = '[0-9a-fA-F]+'
+
+ # integer constants (K&R2: A.2.5.1)
+ decimal_constant = '0|([1-9][0-9]*)'
+ hex_constant = hex_prefix+hex_digits
+ # Don't allow octal constants (even invalid octal).
+ octal_constant_disallowed = '0[0-9]+'
+
+ # character constants (K&R2: A.2.5.2)
+ # Note: a-zA-Z and '.-~^_!=&;,' are allowed as escape chars to support #line
+ # directives with Windows paths as filenames (..\..\dir\file)
+ # For the same reason, decimal_escape allows all digit sequences. We want to
+ # parse all correct code, even if it means to sometimes parse incorrect
+ # code.
+ #
+ simple_escape = r"""([a-zA-Z._~!=&\^\-\\?'"])"""
+ decimal_escape = r"""(\d+)"""
+ hex_escape = r"""(x[0-9a-fA-F]+)"""
+ bad_escape = r"""([\\][^a-zA-Z._~^!=&\^\-\\?'"x0-7])"""
+
+ escape_sequence = \
+ r"""(\\("""+simple_escape+'|'+decimal_escape+'|'+hex_escape+'))'
+ cconst_char = r"""([^'\\\n]|"""+escape_sequence+')'
+ char_const = "'"+cconst_char+"'"
+ unmatched_quote = "('"+cconst_char+"*\\n)|('"+cconst_char+"*$)"
+ bad_char_const = \
+ r"""('"""+cconst_char+"""[^'\n]+')|('')|('"""+ \
+ bad_escape+r"""[^'\n]*')"""
+
+ # string literals (K&R2: A.2.6)
+ string_char = r"""([^"\\\n]|"""+escape_sequence+')'
+ string_literal = '"'+string_char+'*"'
+ bad_string_literal = '"'+string_char+'*'+bad_escape+string_char+'*"'
+
+ # floating constants (K&R2: A.2.5.3)
+ exponent_part = r"""([eE][-+]?[0-9]+)"""
+ fractional_constant = r"""([0-9]*\.[0-9]+)|([0-9]+\.)"""
+ floating_constant = \
+ '(((('+fractional_constant+')'+ \
+ exponent_part+'?)|([0-9]+'+exponent_part+')))'
+
+ # Ordinals
+ ordinal = r'@[0-9]+'
+ missing_ordinal_value = r'@'
+ # Don't allow ordinal values in octal (even invalid octal, like 09) or
+ # hexadecimal.
+ octal_or_hex_ordinal_disallowed = r'@((0[0-9]+)|('+hex_prefix+hex_digits+'))'
+
+ ##
+ ## Rules for the normal state
+ ##
+ t_ignore = ' \t\r'
+
+ # Newlines
+ def t_NEWLINE(self, t):
+ r'\n+'
+ t.lexer.lineno += len(t.value)
+
+ # Operators
+ t_MINUS = r'-'
+ t_PLUS = r'\+'
+ t_AMP = r'&'
+
+ # =
+ t_EQUALS = r'='
+
+ # =>
+ t_RESPONSE = r'=>'
+
+ # Delimiters
+ t_LPAREN = r'\('
+ t_RPAREN = r'\)'
+ t_LBRACKET = r'\['
+ t_RBRACKET = r'\]'
+ t_LBRACE = r'\{'
+ t_RBRACE = r'\}'
+ t_LANGLE = r'<'
+ t_RANGLE = r'>'
+ t_COMMA = r','
+ t_DOT = r'\.'
+ t_SEMI = r';'
+
+ t_STRING_LITERAL = string_literal
+
+ # The following floating and integer constants are defined as
+ # functions to impose a strict order (otherwise, decimal
+ # is placed before the others because its regex is longer,
+ # and this is bad)
+ #
+ @TOKEN(floating_constant)
+ def t_FLOAT_CONST(self, t):
+ return t
+
+ @TOKEN(hex_constant)
+ def t_INT_CONST_HEX(self, t):
+ return t
+
+ @TOKEN(octal_constant_disallowed)
+ def t_OCTAL_CONSTANT_DISALLOWED(self, t):
+ msg = "Octal values not allowed"
+ self._error(msg, t)
+
+ @TOKEN(decimal_constant)
+ def t_INT_CONST_DEC(self, t):
+ return t
+
+ # Must come before bad_char_const, to prevent it from
+ # catching valid char constants as invalid
+ #
+ @TOKEN(char_const)
+ def t_CHAR_CONST(self, t):
+ return t
+
+ @TOKEN(unmatched_quote)
+ def t_UNMATCHED_QUOTE(self, t):
+ msg = "Unmatched '"
+ self._error(msg, t)
+
+ @TOKEN(bad_char_const)
+ def t_BAD_CHAR_CONST(self, t):
+ msg = "Invalid char constant %s" % t.value
+ self._error(msg, t)
+
+ # unmatched string literals are caught by the preprocessor
+
+ @TOKEN(bad_string_literal)
+ def t_BAD_STRING_LITERAL(self, t):
+ msg = "String contains invalid escape code"
+ self._error(msg, t)
+
+ # Handle ordinal-related tokens in the right order:
+ @TOKEN(octal_or_hex_ordinal_disallowed)
+ def t_OCTAL_OR_HEX_ORDINAL_DISALLOWED(self, t):
+ msg = "Octal and hexadecimal ordinal values not allowed"
+ self._error(msg, t)
+
+ @TOKEN(ordinal)
+ def t_ORDINAL(self, t):
+ return t
+
+ @TOKEN(missing_ordinal_value)
+ def t_BAD_ORDINAL(self, t):
+ msg = "Missing ordinal value"
+ self._error(msg, t)
+
+ @TOKEN(identifier)
+ def t_NAME(self, t):
+ t.type = self.keyword_map.get(t.value, "NAME")
+ return t
+
+ # Ignore C and C++ style comments
+ def t_COMMENT(self, t):
+ r'(/\*(.|\n)*?\*/)|(//.*(\n[ \t]*//.*)*)'
+ t.lexer.lineno += t.value.count("\n")
+
+ def t_error(self, t):
+ msg = "Illegal character %s" % repr(t.value[0])
+ self._error(msg, t)