//===--- CommentLexer.h - Lexer for structured comments ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines lexer for structured comments and supporting token class. // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_COMMENTLEXER_H #define LLVM_CLANG_AST_COMMENTLEXER_H #include "clang/Basic/Diagnostic.h" #include "clang/Basic/SourceManager.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/raw_ostream.h" namespace clang { namespace comments { class Lexer; class TextTokenRetokenizer; struct CommandInfo; class CommandTraits; namespace tok { enum TokenKind { eof, newline, text, unknown_command, // Command that does not have an ID. backslash_command, // Command with an ID, that used backslash marker. at_command, // Command with an ID, that used 'at' marker. verbatim_block_begin, verbatim_block_line, verbatim_block_end, verbatim_line_name, verbatim_line_text, html_start_tag, // html_slash_greater, // /> html_end_tag // '. LS_HTMLEndTag }; /// Current lexing mode. LexerState State; /// If State is LS_VerbatimBlock, contains the name of verbatim end /// command, including command marker. SmallString<16> VerbatimBlockEndCommandName; /// If true, the commands, html tags, etc will be parsed and reported as /// separate tokens inside the comment body. If false, the comment text will /// be parsed into text and newline tokens. bool ParseCommands; /// Given a character reference name (e.g., "lt"), return the character that /// it stands for (e.g., "<"). StringRef resolveHTMLNamedCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-10 integer, return the character. StringRef resolveHTMLDecimalCharacterReference(StringRef Name) const; /// Given a Unicode codepoint as base-16 integer, return the character. StringRef resolveHTMLHexCharacterReference(StringRef Name) const; void formTokenWithChars(Token &Result, const char *TokEnd, tok::TokenKind Kind); void formTextToken(Token &Result, const char *TokEnd) { StringRef Text(BufferPtr, TokEnd - BufferPtr); formTokenWithChars(Result, TokEnd, tok::text); Result.setText(Text); } SourceLocation getSourceLocation(const char *Loc) const { assert(Loc >= BufferStart && Loc <= BufferEnd && "Location out of range for this buffer!"); const unsigned CharNo = Loc - BufferStart; return FileLoc.getLocWithOffset(CharNo); } DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) { return Diags.Report(Loc, DiagID); } /// Eat string matching regexp \code \s*\* \endcode. void skipLineStartingDecorations(); /// Lex comment text, including commands if ParseCommands is set to true. void lexCommentText(Token &T); void setupAndLexVerbatimBlock(Token &T, const char *TextBegin, char Marker, const CommandInfo *Info); void lexVerbatimBlockFirstLine(Token &T); void lexVerbatimBlockBody(Token &T); void setupAndLexVerbatimLine(Token &T, const char *TextBegin, const CommandInfo *Info); void lexVerbatimLineText(Token &T); void lexHTMLCharacterReference(Token &T); void setupAndLexHTMLStartTag(Token &T); void lexHTMLStartTag(Token &T); void setupAndLexHTMLEndTag(Token &T); void lexHTMLEndTag(Token &T); public: Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, const CommandTraits &Traits, SourceLocation FileLoc, const char *BufferStart, const char *BufferEnd, bool ParseCommands = true); void lex(Token &T); StringRef getSpelling(const Token &Tok, const SourceManager &SourceMgr, bool *Invalid = nullptr) const; }; } // end namespace comments } // end namespace clang #endif