#include #include #include "exp.h" #include "scanner.h" #include "token.h" #include "yaml-cpp/exceptions.h" // IWYU pragma: keep namespace YAML { Scanner::Scanner(std::istream& in) : INPUT(in), m_tokens{}, m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false), m_simpleKeys{}, m_indents{}, m_indentRefs{}, m_flows{} {} Scanner::~Scanner() = default; bool Scanner::empty() { EnsureTokensInQueue(); return m_tokens.empty(); } void Scanner::pop() { EnsureTokensInQueue(); if (!m_tokens.empty()) m_tokens.pop(); } Token& Scanner::peek() { EnsureTokensInQueue(); assert(!m_tokens.empty()); // should we be asserting here? I mean, we really // just be checking // if it's empty before peeking. #if 0 static Token *pLast = 0; if(pLast != &m_tokens.front()) std::cerr << "peek: " << m_tokens.front() << "\n"; pLast = &m_tokens.front(); #endif return m_tokens.front(); } Mark Scanner::mark() const { return INPUT.mark(); } void Scanner::EnsureTokensInQueue() { while (true) { if (!m_tokens.empty()) { Token& token = m_tokens.front(); // if this guy's valid, then we're done if (token.status == Token::VALID) { return; } // here's where we clean up the impossible tokens if (token.status == Token::INVALID) { m_tokens.pop(); continue; } // note: what's left are the unverified tokens } // no token? maybe we've actually finished if (m_endedStream) { return; } // no? then scan... ScanNextToken(); } } void Scanner::ScanNextToken() { if (m_endedStream) { return; } if (!m_startedStream) { return StartStream(); } // get rid of whitespace, etc. (in between tokens it should be irrelevant) ScanToNextToken(); // maybe need to end some blocks PopIndentToHere(); // ***** // And now branch based on the next few characters! // ***** // end of stream if (!INPUT) { return EndStream(); } if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) { return ScanDirective(); } // document token if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) { return ScanDocStart(); } if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) { return ScanDocEnd(); } // flow start/end/entry if (INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart) { return ScanFlowStart(); } if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) { return ScanFlowEnd(); } if (INPUT.peek() == Keys::FlowEntry) { return ScanFlowEntry(); } // block/map stuff if (Exp::BlockEntry().Matches(INPUT)) { return ScanBlockEntry(); } if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) { return ScanKey(); } if (GetValueRegex().Matches(INPUT)) { return ScanValue(); } // alias/anchor if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) { return ScanAnchorOrAlias(); } // tag if (INPUT.peek() == Keys::Tag) { return ScanTag(); } // special scalars if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar)) { return ScanBlockScalar(); } if (INPUT.peek() == '\'' || INPUT.peek() == '\"') { return ScanQuotedScalar(); } // plain scalars if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()) .Matches(INPUT)) { return ScanPlainScalar(); } // don't know what it is! throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN); } void Scanner::ScanToNextToken() { while (true) { // first eat whitespace while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) { if (InBlockContext() && Exp::Tab().Matches(INPUT)) { m_simpleKeyAllowed = false; } INPUT.eat(1); } // then eat a comment if (Exp::Comment().Matches(INPUT)) { // eat until line break while (INPUT && !Exp::Break().Matches(INPUT)) { INPUT.eat(1); } } // if it's NOT a line break, then we're done! if (!Exp::Break().Matches(INPUT)) { break; } // otherwise, let's eat the line break and keep going int n = Exp::Break().Match(INPUT); INPUT.eat(n); // oh yeah, and let's get rid of that simple key InvalidateSimpleKey(); // new line - we may be able to accept a simple key now if (InBlockContext()) { m_simpleKeyAllowed = true; } } } /////////////////////////////////////////////////////////////////////// // Misc. helpers // IsWhitespaceToBeEaten // . We can eat whitespace if it's a space or tab // . Note: originally tabs in block context couldn't be eaten // "where a simple key could be allowed // (i.e., not at the beginning of a line, or following '-', '?', or // ':')" // I think this is wrong, since tabs can be non-content whitespace; it's just // that they can't contribute to indentation, so once you've seen a tab in a // line, you can't start a simple key bool Scanner::IsWhitespaceToBeEaten(char ch) { if (ch == ' ') { return true; } if (ch == '\t') { return true; } return false; } const RegEx& Scanner::GetValueRegex() const { if (InBlockContext()) { return Exp::Value(); } return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow(); } void Scanner::StartStream() { m_startedStream = true; m_simpleKeyAllowed = true; std::unique_ptr pIndent( new IndentMarker(-1, IndentMarker::NONE)); m_indentRefs.push_back(std::move(pIndent)); m_indents.push(&m_indentRefs.back()); } void Scanner::EndStream() { // force newline if (INPUT.column() > 0) { INPUT.ResetColumn(); } PopAllIndents(); PopAllSimpleKeys(); m_simpleKeyAllowed = false; m_endedStream = true; } Token* Scanner::PushToken(Token::TYPE type) { m_tokens.push(Token(type, INPUT.mark())); return &m_tokens.back(); } Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const { switch (type) { case IndentMarker::SEQ: return Token::BLOCK_SEQ_START; case IndentMarker::MAP: return Token::BLOCK_MAP_START; case IndentMarker::NONE: assert(false); break; } assert(false); throw std::runtime_error("yaml-cpp: internal error, invalid indent type"); } Scanner::IndentMarker* Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type) { // are we in flow? if (InFlowContext()) { return nullptr; } std::unique_ptr pIndent(new IndentMarker(column, type)); IndentMarker& indent = *pIndent; const IndentMarker& lastIndent = *m_indents.top(); // is this actually an indentation? if (indent.column < lastIndent.column) { return nullptr; } if (indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP)) { return nullptr; } // push a start token indent.pStartToken = PushToken(GetStartTokenFor(type)); // and then the indent m_indents.push(&indent); m_indentRefs.push_back(std::move(pIndent)); return &m_indentRefs.back(); } void Scanner::PopIndentToHere() { // are we in flow? if (InFlowContext()) { return; } // now pop away while (!m_indents.empty()) { const IndentMarker& indent = *m_indents.top(); if (indent.column < INPUT.column()) { break; } if (indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT))) { break; } PopIndent(); } while (!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID) { PopIndent(); } } void Scanner::PopAllIndents() { // are we in flow? if (InFlowContext()) { return; } // now pop away while (!m_indents.empty()) { const IndentMarker& indent = *m_indents.top(); if (indent.type == IndentMarker::NONE) { break; } PopIndent(); } } void Scanner::PopIndent() { const IndentMarker& indent = *m_indents.top(); m_indents.pop(); if (indent.status != IndentMarker::VALID) { InvalidateSimpleKey(); return; } if (indent.type == IndentMarker::SEQ) { m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark())); } else if (indent.type == IndentMarker::MAP) { m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark())); } } int Scanner::GetTopIndent() const { if (m_indents.empty()) { return 0; } return m_indents.top()->column; } void Scanner::ThrowParserException(const std::string& msg) const { Mark mark = Mark::null_mark(); if (!m_tokens.empty()) { const Token& token = m_tokens.front(); mark = token.mark; } throw ParserException(mark, msg); } } // namespace YAML