summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCameron Desrochers <cameron@moodycamel.com>2017-09-20 19:03:37 +0000
committerCameron Desrochers <cameron@moodycamel.com>2017-09-20 19:03:37 +0000
commitaacb7039877a344e6063dc8e1214f82ef3b69fca (patch)
tree5f6f9ef10d75e2a15ca523d154686857f804788e
parent93bd7cd32dec27ea8f4a2ba67c70a54b0905dbfb (diff)
[PCH] Fixed preamble breaking with BOM presence (and particularly, fluctuating BOM presence)
This patch fixes broken preamble-skipping when the preamble region includes a byte order mark (BOM). Previously, parsing would fail if preamble PCH generation was enabled and a BOM was present. This also fixes preamble invalidation when a BOM appears or disappears. This may seem to be an obscure edge case, but it happens regularly with IDEs that pass buffer overrides that never (or always) have a BOM, yet the underlying file from the initial parse that generated a PCH might (or might not) have a BOM. I've included a test case for these scenarios. Differential Revision: https://reviews.llvm.org/D37491 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@313796 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/clang/Frontend/PrecompiledPreamble.h15
-rw-r--r--include/clang/Lex/Lexer.h27
-rw-r--r--include/clang/Lex/PreprocessorOptions.h4
-rw-r--r--lib/Frontend/FrontendActions.cpp2
-rw-r--r--lib/Frontend/PrecompiledPreamble.cpp3
-rw-r--r--lib/Lex/Lexer.cpp14
-rw-r--r--lib/Lex/Preprocessor.cpp6
-rw-r--r--unittests/Frontend/PCHPreambleTest.cpp44
8 files changed, 80 insertions, 35 deletions
diff --git a/include/clang/Frontend/PrecompiledPreamble.h b/include/clang/Frontend/PrecompiledPreamble.h
index 61cb31bd31..6b0b6261e4 100644
--- a/include/clang/Frontend/PrecompiledPreamble.h
+++ b/include/clang/Frontend/PrecompiledPreamble.h
@@ -36,21 +36,6 @@ class CompilerInvocation;
class DeclGroupRef;
class PCHContainerOperations;
-/// A size of the preamble and a flag required by
-/// PreprocessorOptions::PrecompiledPreambleBytes.
-struct PreambleBounds {
- PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
- : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
-
- /// \brief Size of the preamble in bytes.
- unsigned Size;
- /// \brief Whether the preamble ends at the start of a new line.
- ///
- /// Used to inform the lexer as to whether it's starting at the beginning of
- /// a line after skipping the preamble.
- bool PreambleEndsAtStartOfLine;
-};
-
/// \brief Runs lexer to compute suggested preamble bounds.
PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
llvm::MemoryBuffer *Buffer,
diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h
index aa8bf3891e..603ce10f64 100644
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@@ -39,6 +39,23 @@ enum ConflictMarkerKind {
CMK_Perforce
};
+/// Describes the bounds (start, size) of the preamble and a flag required by
+/// PreprocessorOptions::PrecompiledPreambleBytes.
+/// The preamble includes the BOM, if any.
+struct PreambleBounds {
+ PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
+ : Size(Size),
+ PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
+
+ /// \brief Size of the preamble in bytes.
+ unsigned Size;
+ /// \brief Whether the preamble ends at the start of a new line.
+ ///
+ /// Used to inform the lexer as to whether it's starting at the beginning of
+ /// a line after skipping the preamble.
+ bool PreambleEndsAtStartOfLine;
+};
+
/// Lexer - This provides a simple interface that turns a text buffer into a
/// stream of tokens. This provides no support for file reading or buffering,
/// or buffering/seeking of tokens, only forward lexing is supported. It relies
@@ -443,11 +460,11 @@ public:
/// to fewer than this number of lines.
///
/// \returns The offset into the file where the preamble ends and the rest
- /// of the file begins along with a boolean value indicating whether
+ /// of the file begins along with a boolean value indicating whether
/// the preamble ends at the beginning of a new line.
- static std::pair<unsigned, bool> ComputePreamble(StringRef Buffer,
- const LangOptions &LangOpts,
- unsigned MaxLines = 0);
+ static PreambleBounds ComputePreamble(StringRef Buffer,
+ const LangOptions &LangOpts,
+ unsigned MaxLines = 0);
/// \brief Checks that the given token is the first token that occurs after
/// the given location (this excludes comments and whitespace). Returns the
@@ -618,7 +635,7 @@ private:
//===--------------------------------------------------------------------===//
// Other lexer functions.
- void SkipBytes(unsigned Bytes, bool StartOfLine);
+ void SetByteOffset(unsigned Offset, bool StartOfLine);
void PropagateLineStartLeadingSpaceInfo(Token &Result);
diff --git a/include/clang/Lex/PreprocessorOptions.h b/include/clang/Lex/PreprocessorOptions.h
index d91c665cf1..760b308f92 100644
--- a/include/clang/Lex/PreprocessorOptions.h
+++ b/include/clang/Lex/PreprocessorOptions.h
@@ -160,7 +160,7 @@ public:
DisablePCHValidation(false),
AllowPCHWithCompilerErrors(false),
DumpDeserializedPCHDecls(false),
- PrecompiledPreambleBytes(0, true),
+ PrecompiledPreambleBytes(0, false),
GeneratePreamble(false),
RemappedFilesKeepOriginalName(true),
RetainRemappedFileBuffers(false),
@@ -195,7 +195,7 @@ public:
LexEditorPlaceholders = true;
RetainRemappedFileBuffers = true;
PrecompiledPreambleBytes.first = 0;
- PrecompiledPreambleBytes.second = 0;
+ PrecompiledPreambleBytes.second = false;
}
};
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 3e3483d2c6..86460f17d0 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -591,7 +591,7 @@ void PrintPreambleAction::ExecuteAction() {
auto Buffer = CI.getFileManager().getBufferForFile(getCurrentFile());
if (Buffer) {
unsigned Preamble =
- Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).first;
+ Lexer::ComputePreamble((*Buffer)->getBuffer(), CI.getLangOpts()).Size;
llvm::outs().write((*Buffer)->getBufferStart(), Preamble);
}
}
diff --git a/lib/Frontend/PrecompiledPreamble.cpp b/lib/Frontend/PrecompiledPreamble.cpp
index cd7446189c..bd6770acdf 100644
--- a/lib/Frontend/PrecompiledPreamble.cpp
+++ b/lib/Frontend/PrecompiledPreamble.cpp
@@ -195,8 +195,7 @@ template <class T> bool moveOnNoError(llvm::ErrorOr<T> Val, T &Output) {
PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
llvm::MemoryBuffer *Buffer,
unsigned MaxLines) {
- auto Pre = Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
- return PreambleBounds(Pre.first, Pre.second);
+ return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
}
llvm::ErrorOr<PrecompiledPreamble> PrecompiledPreamble::Build(
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 928c24d94c..b7f97a583d 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -552,9 +552,9 @@ namespace {
} // end anonymous namespace
-std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
- const LangOptions &LangOpts,
- unsigned MaxLines) {
+PreambleBounds Lexer::ComputePreamble(StringRef Buffer,
+ const LangOptions &LangOpts,
+ unsigned MaxLines) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
@@ -688,7 +688,7 @@ std::pair<unsigned, bool> Lexer::ComputePreamble(StringRef Buffer,
else
End = TheTok.getLocation();
- return std::make_pair(End.getRawEncoding() - StartLoc.getRawEncoding(),
+ return PreambleBounds(End.getRawEncoding() - FileLoc.getRawEncoding(),
TheTok.isAtStartOfLine());
}
@@ -1394,9 +1394,9 @@ Slash:
// Helper methods for lexing.
//===----------------------------------------------------------------------===//
-/// \brief Routine that indiscriminately skips bytes in the source file.
-void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
- BufferPtr += Bytes;
+/// \brief Routine that indiscriminately sets the offset into the source file.
+void Lexer::SetByteOffset(unsigned Offset, bool StartOfLine) {
+ BufferPtr = BufferStart + Offset;
if (BufferPtr > BufferEnd)
BufferPtr = BufferEnd;
// FIXME: What exactly does the StartOfLine bit mean? There are two
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e1294994df..1f9a469bc5 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -516,9 +516,9 @@ void Preprocessor::EnterMainSourceFile() {
// If we've been asked to skip bytes in the main file (e.g., as part of a
// precompiled preamble), do so now.
if (SkipMainFilePreamble.first > 0)
- CurLexer->SkipBytes(SkipMainFilePreamble.first,
- SkipMainFilePreamble.second);
-
+ CurLexer->SetByteOffset(SkipMainFilePreamble.first,
+ SkipMainFilePreamble.second);
+
// Tell the header info that the main file was entered. If the file is later
// #imported, it won't be re-entered.
if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
diff --git a/unittests/Frontend/PCHPreambleTest.cpp b/unittests/Frontend/PCHPreambleTest.cpp
index a771167cc7..162a281b04 100644
--- a/unittests/Frontend/PCHPreambleTest.cpp
+++ b/unittests/Frontend/PCHPreambleTest.cpp
@@ -153,4 +153,48 @@ TEST_F(PCHPreambleTest, ReparseWithOverriddenFileDoesNotInvalidatePreamble) {
ASSERT_EQ(initialCounts[2], GetFileReadCount(Header2));
}
+TEST_F(PCHPreambleTest, ParseWithBom) {
+ std::string Header = "//./header.h";
+ std::string Main = "//./main.cpp";
+ AddFile(Header, "int random() { return 4; }");
+ AddFile(Main,
+ "\xef\xbb\xbf"
+ "#include \"//./header.h\"\n"
+ "int main() { return random() -2; }");
+
+ std::unique_ptr<ASTUnit> AST(ParseAST(Main));
+ ASSERT_TRUE(AST.get());
+ ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+ unsigned HeaderReadCount = GetFileReadCount(Header);
+
+ ASSERT_TRUE(ReparseAST(AST));
+ ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+ // Check preamble PCH was really reused
+ ASSERT_EQ(HeaderReadCount, GetFileReadCount(Header));
+
+ // Remove BOM
+ RemapFile(Main,
+ "#include \"//./header.h\"\n"
+ "int main() { return random() -2; }");
+
+ ASSERT_TRUE(ReparseAST(AST));
+ ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+ ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
+ HeaderReadCount = GetFileReadCount(Header);
+
+ // Add BOM back
+ RemapFile(Main,
+ "\xef\xbb\xbf"
+ "#include \"//./header.h\"\n"
+ "int main() { return random() -2; }");
+
+ ASSERT_TRUE(ReparseAST(AST));
+ ASSERT_FALSE(AST->getDiagnostics().hasErrorOccurred());
+
+ ASSERT_LE(HeaderReadCount, GetFileReadCount(Header));
+}
+
} // anonymous namespace