diff options
author | Edward Welbourne <edward.welbourne@theqtcompany.com> | 2015-11-24 14:45:52 +0100 |
---|---|---|
committer | Edward Welbourne <edward.welbourne@theqtcompany.com> | 2015-12-15 09:21:27 +0000 |
commit | 214e271b93b9c9f4f52d2e56956cf5c8da9b4251 (patch) | |
tree | f40d73c9871998b3863e91bddc50b01d62ef1adf /qmake | |
parent | d47baa7236fb0c44e85c1247eedec444aefe428c (diff) |
Rewrote qmake's #include-detection to be more faithful to CPP.
The C preprocessor allows backslash-newline anywhere and allows
comments anywhere it allows space. Testing wilfully perverse
applications of that revealed qmake's parsing of #include directives
wasn't very robust. So rework to actually follow the rules and add
those tests.
Change-Id: If5cc7bfb65f9994e9ab9ed216dd1ee7285c63934
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
Diffstat (limited to 'qmake')
-rw-r--r-- | qmake/generators/makefiledeps.cpp | 338 |
1 files changed, 236 insertions, 102 deletions
diff --git a/qmake/generators/makefiledeps.cpp b/qmake/generators/makefiledeps.cpp index 43f368f0be..656acdc6ba 100644 --- a/qmake/generators/makefiledeps.cpp +++ b/qmake/generators/makefiledeps.cpp @@ -388,6 +388,40 @@ QFileInfo QMakeSourceFileInfo::findFileInfo(const QMakeLocalFileName &dep) return QFileInfo(dep.real()); } +static int skipEscapedLineEnds(const char *buffer, int buffer_len, int offset, int *lines) +{ + // Join physical lines to make logical lines, as in the C preprocessor + while (offset + 1 < buffer_len + && buffer[offset] == '\\' + && qmake_endOfLine(buffer[offset + 1])) { + offset += 2; + ++*lines; + if (offset < buffer_len + && buffer[offset - 1] == '\r' + && buffer[offset] == '\n') // CRLF + offset++; + } + return offset; +} + +static bool matchWhileUnsplitting(const char *buffer, int buffer_len, int start, + const char *needle, int needle_len, + int *matchlen, int *lines) +{ + int x = start; + for (int n = 0; n < needle_len && x < buffer_len; + n++, x = skipEscapedLineEnds(buffer, buffer_len, x + 1, lines)) { + if (buffer[x] != needle[n]) + return false; + } + // That also skipped any remaining BSNLs immediately after the match. + + // Tell caller how long the match was: + *matchlen = x - start; + + return true; +} + bool QMakeSourceFileInfo::findDeps(SourceFile *file) { if(file->dep_checked || file->type == TYPE_UNKNOWN) @@ -426,6 +460,18 @@ bool QMakeSourceFileInfo::findDeps(SourceFile *file) file->deps = new SourceDependChildren; int line_count = 1; + enum { + /* + States of C preprocessing (for TYPE_C only), after backslash-newline + elimination and skipping comments and spaces (i.e. in ANSI X3.159-1989 + section 2.1.1.2's phase 4). We're about to study buffer[x] to decide + on which transition to do. + */ + AtStart, // start of logical line; a # may start a preprocessor directive + HadHash, // saw a # at start, looking for preprocessor keyword + WantName, // saw #include or #import, waiting for name + InCode // after directive, parsing non-#include directive or in actual code + } cpp_state = AtStart; for(int x = 0; x < buffer_len; ++x) { bool try_local = true; @@ -505,144 +551,232 @@ bool QMakeSourceFileInfo::findDeps(SourceFile *file) ++line_count; } else if(file->type == QMakeSourceFileInfo::TYPE_QRC) { } else if(file->type == QMakeSourceFileInfo::TYPE_C) { - for(int beginning=1; x < buffer_len; ++x) { + // We've studied all buffer[i] for i < x + for (; x < buffer_len; ++x) { + // How to handle backslash-newline (BSNL) pairs: +#define SKIP_BSNL(pos) skipEscapedLineEnds(buffer, buffer_len, (pos), &line_count) + // Seek code or directive, skipping comments and space: for(; x < buffer_len; ++x) { + x = SKIP_BSNL(x); if (buffer[x] == ' ' || buffer[x] == '\t') { // keep going - } else if (buffer[x] == '/' && x + 1 < buffer_len && - (buffer[x + 1] == '/' || buffer[x + 1] == '*')) { - ++x; - if (buffer[x] == '/') { // C++-style comment - for (; x < buffer_len && !qmake_endOfLine(buffer[x]); ++x) {} // skip - beginning = 1; - } else { // C-style comment + } else if (buffer[x] == '/') { + int extralines = 0; + int y = skipEscapedLineEnds(buffer, buffer_len, x + 1, &extralines); + if (buffer[y] == '/') { // C++-style comment + line_count += extralines; + x = SKIP_BSNL(y + 1); + while (x < buffer_len && !qmake_endOfLine(buffer[x])) + x = SKIP_BSNL(x + 1); // skip + + cpp_state = AtStart; + ++line_count; + } else if (buffer[y] == '*') { // C-style comment + line_count += extralines; + x = y; while (++x < buffer_len) { + x = SKIP_BSNL(x); if (buffer[x] == '*') { - if (x + 1 < buffer_len && buffer[x + 1] == '/') { - ++x; // skip '*'; for loop skips '/'. + extralines = 0; + y = skipEscapedLineEnds(buffer, buffer_len, + x + 1, &extralines); + if (y < buffer_len && buffer[y] == '/') { + line_count += extralines; + x = y; // for loop shall step past this break; } } else if (qmake_endOfLine(buffer[x])) { ++line_count; } } + } else { + // buffer[x] is the division operator + break; } } else if (qmake_endOfLine(buffer[x])) { ++line_count; - beginning = 1; + cpp_state = AtStart; } else { + /* Drop out of phases 1, 2, 3, into phase 4 */ break; } } + // Phase 4 study of buffer[x]: if(x >= buffer_len) break; - // preprocessor directive - if (beginning && buffer[x] == '#') { - // Advance to start of preprocessing directive - while (++x < buffer_len - && (buffer[x] == ' ' || buffer[x] == '\t')) {} // skip - - if (qmake_endOfLine(buffer[x])) { - ++line_count; - beginning = 1; - continue; + switch (cpp_state) { + case HadHash: + { + // Read keyword; buffer[x] starts first preprocessing token after # + const char *const keyword = buffer + x; + int clean = x; + while (x < buffer_len && buffer[x] >= 'a' && buffer[x] <= 'z') { + // skip over keyword, consolidating it if it contains BSNLs + // (see WantName's similar code consolidating inc, below) + if (clean < x) + buffer[clean++] = buffer[x]; + else + clean++; + + x = SKIP_BSNL(x + 1); } + const int keyword_len = buffer + clean - keyword; + x--; // Still need to study buffer[x] next time round for loop. + + cpp_state = + ((keyword_len == 7 && !strncmp(keyword, "include", 7)) // C & Obj-C + || (keyword_len == 6 && !strncmp(keyword, "import", 6))) // Obj-C + ? WantName : InCode; break; } - // quoted strings - if (buffer[x] == '\'' || buffer[x] == '"') { - // It might be a C++11 raw string. - bool israw = false; - if (buffer[x] == '"' && x > 0) { - int y = x; - while (--y > 0 && (buffer[y] == '8' || buffer[y] == 'u' || buffer[y] == 'U')) {} // skip - israw = (buffer[y] == 'R'); - } - if (israw) { - x++; - const char *const delim = buffer + x; - while (x < buffer_len && buffer[x] != '(') - x++; + case WantName: + { + char term = buffer[x]; + if (term == '<') { + try_local = false; + term = '>'; + } else if (term != '"') { /* - Not checking correctness (trust real compiler to do that): - - no controls, spaces, '(', ')', '\\' or (presumably) '"' in delim; - - at most 16 bytes in delim - */ - - const int delimlen = buffer + x - delim; - while (++x < buffer_len - && (buffer[x] != ')' - || (delimlen > 0 && - strncmp(buffer + x + 1, delim, delimlen)) - || buffer[x + 1 + delimlen] != '"')) {} // skip - // buffer[x] is ')' - x += 1 + delimlen; // 1 for ')', then delim - // buffer[x] is '"' - } else { - const char term = buffer[x]; - while (++x < buffer_len && buffer[x] != term) { - if (buffer[x] == '\\') - ++x; - else if (qmake_endOfLine(buffer[x])) - ++line_count; + Possibly malformed, but this may be something like: + #include IDENTIFIER + which does work, if #define IDENTIFIER "filename" is + in effect. This is beyond this noddy preprocessor's + powers of tracking. So give up and resume searching + for a directive. We haven't made sense of buffer[x], + so back up to ensure we do study it (now as code) next + time round the loop. + */ + x--; + cpp_state = InCode; + continue; + } + + x = SKIP_BSNL(x + 1); + inc = buffer + x; + int clean = x; // offset if we need to clear \-newlines + for (; x < buffer_len && buffer[x] != term; x = SKIP_BSNL(x + 1)) { + if (qmake_endOfLine(buffer[x])) { // malformed + cpp_state = AtStart; + ++line_count; + break; } + + /* + If we do skip any BSNLs, we need to consolidate the + surviving text by copying to lower indices. For that + to be possible, we also have to keep 'clean' advanced + in step with x even when we've yet to see any BSNLs. + */ + if (clean < x) + buffer[clean++] = buffer[x]; + else + clean++; } - // for loop's ++x shall step over the closing quote. - } - beginning = 0; - } - if(x >= buffer_len) - break; + if (cpp_state == WantName) + buffer[clean] = '\0'; + else // i.e. malformed + inc = 0; - // Got a preprocessor directive - const char *const keyword = buffer + x; - for (; - x < buffer_len && buffer[x] >= 'a' && buffer[x] <= 'z'; - x++) {} // skip over identifier - int keyword_len = buffer + x - keyword; - for (; - x < buffer_len && (buffer[x] == ' ' || buffer[x] == '\t'); - x++) {} // skip spaces after keyword - - /* Keyword with nothing after it, e.g. #endif: not interesting. */ - if (qmake_endOfLine(buffer[x])) - keyword_len = 0; - - if((keyword_len == 7 && !strncmp(keyword, "include", 7)) // C & Obj-C - || (keyword_len == 6 && !strncmp(keyword, "import", 6))) { // Obj-C - char term = buffer[x]; - if(term == '<') { - try_local = false; - term = '>'; - } else if(term != '"') { //wtf? - continue; + cpp_state = InCode; // hereafter + break; } - x++; - inc = buffer + x; - for (; - buffer[x] != term && !qmake_endOfLine(buffer[x]); - ++x) {} // skip until end of include name - buffer[x] = '\0'; - } else if (buffer[x] == '\'' || buffer[x] == '"') { - const char term = buffer[x++]; - while(x < buffer_len) { - if (buffer[x] == term) + + case AtStart: + // Preprocessor directive? + if (buffer[x] == '#') { + cpp_state = HadHash; break; - if (buffer[x] == '\\') { - x+=2; - } else { - if (qmake_endOfLine(buffer[x])) - ++line_count; - ++x; } + cpp_state = InCode; + // ... and fall through to handle buffer[x] as such. + case InCode: + // matching quotes (string literals and character literals) + if (buffer[x] == '\'' || buffer[x] == '"') { + // It might be a C++11 raw string. + bool israw = false; + if (buffer[x] == '"' && x > 0) { + int y = x - 1; + while (y > 0 && buffer[y] != 'R') { + if (buffer[y] == '8' || buffer[y] == 'u' || buffer[y] == 'U') + y--; + else if (y > 1 && qmake_endOfLine(buffer[y]) + && buffer[y - 1] == '\\') + y -= 2; + else if (y > 2 && buffer[y] == '\n' + && buffer[y - 1] == '\r' + && buffer[y - 2] == '\\') + y -= 3; + else + break; + } + israw = (buffer[y] == 'R'); + } + if (israw) { + x = SKIP_BSNL(x + 1); + const char *const delim = buffer + x; + int clean = x; + while (x < buffer_len && buffer[x] != '(') { + if (clean < x) + buffer[clean++] = buffer[x]; + else + clean++; + + x = SKIP_BSNL(x + 1); + } + /* + Not checking correctness (trust real compiler to do that): + - no controls, spaces, '(', ')', '\\' or (presumably) '"' in delim; + - at most 16 bytes in delim + + Raw strings are surely defined after phase 2, when + BSNLs are resolved; so the delimiter's exclusion + of '\\' and space (including newlines) applies too + late to save us the need to cope with BSNLs in it. + */ + + const int delimlen = buffer + clean - delim; + int matchlen = delimlen, extralines = 0; + while ((x = SKIP_BSNL(x + 1)) < buffer_len + && (buffer[x] != ')' + || (delimlen > 0 && + !matchWhileUnsplitting(buffer, buffer_len, + x + 1, delim, delimlen, + &matchlen, &extralines)) + || buffer[x + 1 + matchlen] != '"')) { + // skip, but keep track of lines + if (qmake_endOfLine(buffer[x])) + ++line_count; + extralines = 0; + } + line_count += extralines; // from the match + // buffer[x] is ')' + x += 1 + matchlen; // 1 for ')', then delim + // buffer[x] is '"' + } else { + const char term = buffer[x]; + while (++x < buffer_len && buffer[x] != term) { + if (buffer[x] == '\\') + ++x; + else if (qmake_endOfLine(buffer[x])) + ++line_count; + } + } + // for loop's ++x shall step over the closing quote. + } + // else: buffer[x] is just some code; move on. + break; } - } else { - --x; + + if (inc) // We were in WantName and found a name. + break; +#undef SKIP_BSNL } + if(x >= buffer_len) + break; } if(inc) { |