summaryrefslogtreecommitdiffstats
path: root/include/clang/Basic/CharInfo.h
diff options
context:
space:
mode:
authorJordan Rose <jordan_rose@apple.com>2013-02-08 22:30:22 +0000
committerJordan Rose <jordan_rose@apple.com>2013-02-08 22:30:22 +0000
commit9893902eceba7f01dd1521349d33866f77254d78 (patch)
tree663def75b1c867719f47927560893f869a761668 /include/clang/Basic/CharInfo.h
parente22cef5cb2e460bae88563cfc5fcf98d742d6215 (diff)
Pull Lexer's CharInfo table out for general use throughout Clang.
Rewriting the same predicates over and over again is bad for code size and code maintainence. Using the functions in <ctype.h> is generally unsafe unless they are specified to be locale-independent (i.e. only isdigit and isxdigit). The next commit will try to clean up uses of <ctype.h> functions within Clang. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@174765 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'include/clang/Basic/CharInfo.h')
-rw-r--r--include/clang/Basic/CharInfo.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/include/clang/Basic/CharInfo.h b/include/clang/Basic/CharInfo.h
new file mode 100644
index 0000000000..f9b7b7311d
--- /dev/null
+++ b/include/clang/Basic/CharInfo.h
@@ -0,0 +1,162 @@
+//===--- clang/Basic/CharInfo.h - Classifying ASCII Characters ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANG_BASIC_CHARINFO_H
+#define CLANG_BASIC_CHARINFO_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace clang {
+namespace charinfo {
+ extern const uint16_t InfoTable[256];
+
+ enum {
+ CHAR_HORZ_WS = 0x0001, // '\t', '\f', '\v'. Note, no '\0'
+ CHAR_VERT_WS = 0x0002, // '\r', '\n'
+ CHAR_SPACE = 0x0004, // ' '
+ CHAR_DIGIT = 0x0008, // 0-9
+ CHAR_XLETTER = 0x0010, // a-f,A-F
+ CHAR_UPPER = 0x0020, // A-Z
+ CHAR_LOWER = 0x0040, // a-z
+ CHAR_UNDER = 0x0080, // _
+ CHAR_PERIOD = 0x0100, // .
+ CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'
+ CHAR_PUNCT = 0x0400 // `$@()
+ };
+
+ enum {
+ CHAR_XUPPER = CHAR_XLETTER | CHAR_UPPER,
+ CHAR_XLOWER = CHAR_XLETTER | CHAR_LOWER
+ };
+} // end namespace charinfo
+
+/// Returns true if this is an ASCII character.
+LLVM_READNONE static inline bool isASCII(char c) {
+ return static_cast<unsigned char>(c) <= 127;
+}
+
+/// Returns true if this is a valid first character of a C identifier,
+/// which is [a-zA-Z_].
+LLVM_READONLY static inline bool isIdentifierHead(unsigned char c,
+ bool AllowDollar = false) {
+ using namespace charinfo;
+ if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_UNDER))
+ return true;
+ return AllowDollar && c == '$';
+}
+
+/// Returns true if this is a body character of a C identifier,
+/// which is [a-zA-Z0-9_].
+LLVM_READONLY static inline bool isIdentifierBody(unsigned char c,
+ bool AllowDollar = false) {
+ using namespace charinfo;
+ if (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER))
+ return true;
+ return AllowDollar && c == '$';
+}
+
+/// Returns true if this character is horizontal ASCII whitespace:
+/// ' ', '\\t', '\\f', '\\v'.
+///
+/// Note that this returns false for '\\0'.
+LLVM_READONLY static inline bool isHorizontalWhitespace(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_SPACE)) != 0;
+}
+
+/// Returns true if this character is vertical ASCII whitespace: '\\n', '\\r'.
+///
+/// Note that this returns false for '\\0'.
+LLVM_READONLY static inline bool isVerticalWhitespace(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & CHAR_VERT_WS) != 0;
+}
+
+/// Return true if this character is horizontal or vertical ASCII whitespace:
+/// ' ', '\\t', '\\f', '\\v', '\\n', '\\r'.
+///
+/// Note that this returns false for '\\0'.
+LLVM_READONLY static inline bool isWhitespace(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_HORZ_WS|CHAR_VERT_WS|CHAR_SPACE)) != 0;
+}
+
+/// Return true if this character is an ASCII digit: [0-9]
+LLVM_READONLY static inline bool isDigit(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & CHAR_DIGIT) != 0;
+}
+
+/// Return true if this character is a lowercase ASCII letter: [a-z]
+LLVM_READONLY static inline bool isLowercase(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & CHAR_LOWER) != 0;
+}
+
+/// Return true if this character is an uppercase ASCII letter: [A-Z]
+LLVM_READONLY static inline bool isUppercase(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & CHAR_UPPER) != 0;
+}
+
+/// Return true if this character is an ASCII letter: [a-zA-Z]
+LLVM_READONLY static inline bool isLetter(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER)) != 0;
+}
+
+/// Return true if this character is an ASCII letter or digit: [a-zA-Z0-9]
+LLVM_READONLY static inline bool isAlphanumeric(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_DIGIT|CHAR_UPPER|CHAR_LOWER)) != 0;
+}
+
+/// Return true if this character is an ASCII hex digit: [0-9a-fA-F]
+LLVM_READONLY static inline bool isHexDigit(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_DIGIT|CHAR_XLETTER)) != 0;
+ return true;
+}
+
+/// Return true if this character is an ASCII punctuation character.
+///
+/// Note that '_' is both a punctuation character and an identifier character!
+LLVM_READONLY static inline bool isPunctuation(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
+}
+
+/// Return true if this character is an ASCII printable character; that is, a
+/// character that should take exactly one column to print in a fixed-width
+/// terminal.
+LLVM_READONLY static inline bool isPrintable(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
+ CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
+}
+
+/// Return true if this is the body character of a C preprocessing number,
+/// which is [a-zA-Z0-9_.].
+LLVM_READONLY static inline bool isPreprocessingNumberBody(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] &
+ (CHAR_UPPER|CHAR_LOWER|CHAR_DIGIT|CHAR_UNDER|CHAR_PERIOD)) != 0;
+}
+
+/// Return true if this is the body character of a C++ raw string delimiter.
+LLVM_READONLY static inline bool isRawStringDelimBody(unsigned char c) {
+ using namespace charinfo;
+ return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
+ CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
+}
+
+} // end namespace clang
+
+#endif