summaryrefslogtreecommitdiffstats
path: root/lib/Format/Encoding.h
diff options
context:
space:
mode:
authorAlexander Kornienko <alexfh@google.com>2013-09-05 14:08:34 +0000
committerAlexander Kornienko <alexfh@google.com>2013-09-05 14:08:34 +0000
commit0b62cc30c9aa462184de0435dc083d944a41d67f (patch)
treeeda31e19d3265b07413dd9e4a1e870d0dd2bd916 /lib/Format/Encoding.h
parentdeb77991a2bef594285864d72cb2bf90dec706ff (diff)
Handle zero-width and double-width characters in string literals and comments.
Summary: Count column width instead of the number of code points. This also includes correct handling of tabs inside string literals and comments (with an exception of multiline string literals/comments, where tabs are present before the first escaped newline). Reviewers: djasper, klimek Reviewed By: klimek CC: cfe-commits, klimek Differential Revision: http://llvm-reviews.chandlerc.com/D1601 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@190052 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Format/Encoding.h')
-rw-r--r--lib/Format/Encoding.h32
1 files changed, 32 insertions, 0 deletions
diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
index e9e9ae71c7..356334d537 100644
--- a/lib/Format/Encoding.h
+++ b/lib/Format/Encoding.h
@@ -18,6 +18,7 @@
#include "clang/Basic/LLVM.h"
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Unicode.h"
namespace clang {
namespace format {
@@ -57,6 +58,37 @@ inline unsigned getCodePointCount(StringRef Text, Encoding Encoding) {
}
}
+/// \brief Returns the number of columns required to display the \p Text on a
+/// generic Unicode-capable terminal. Text is assumed to use the specified
+/// \p Encoding.
+inline unsigned columnWidth(StringRef Text, Encoding Encoding) {
+ if (Encoding == Encoding_UTF8) {
+ int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text);
+ if (ContentWidth >= 0)
+ return ContentWidth;
+ }
+ return Text.size();
+}
+
+/// \brief Returns the number of columns required to display the \p Text,
+/// starting from the \p StartColumn on a terminal with the \p TabWidth. The
+/// text is assumed to use the specified \p Encoding.
+inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn,
+ unsigned TabWidth, Encoding Encoding) {
+ unsigned TotalWidth = 0;
+ StringRef Tail = Text;
+ for (;;) {
+ StringRef::size_type TabPos = Tail.find('\t');
+ if (TabPos == StringRef::npos)
+ return TotalWidth + columnWidth(Tail, Encoding);
+ int Width = columnWidth(Tail.substr(0, TabPos), Encoding);
+ assert(Width >= 0);
+ TotalWidth += Width;
+ TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth;
+ Tail = Tail.substr(TabPos + 1);
+ }
+}
+
/// \brief Gets the number of bytes in a sequence representing a single
/// codepoint and starting with FirstChar in the specified Encoding.
inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) {